mirror of
https://github.com/oven-sh/bun
synced 2026-02-09 18:38:55 +00:00
Micro-optimize sourcemaps (#17757)
Co-authored-by: chloe caruso <git@paperclover.net>
This commit is contained in:
@@ -1,8 +1,3 @@
|
||||
pub const VLQ_BASE_SHIFT: u32 = 5;
|
||||
pub const VLQ_BASE: u32 = 1 << VLQ_BASE_SHIFT;
|
||||
pub const VLQ_BASE_MASK: u32 = VLQ_BASE - 1;
|
||||
pub const VLQ_CONTINUATION_BIT: u32 = VLQ_BASE;
|
||||
pub const VLQ_CONTINUATION_MASK: u32 = 1 << VLQ_CONTINUATION_BIT;
|
||||
const std = @import("std");
|
||||
const bun = @import("root").bun;
|
||||
const string = bun.string;
|
||||
@@ -701,13 +696,13 @@ pub const ParsedSourceMap = struct {
|
||||
} else if (i != 0) {
|
||||
try writer.writeByte(',');
|
||||
}
|
||||
try encodeVLQ(gen.columns - last_col).writeTo(writer);
|
||||
try VLQ.encode(gen.columns - last_col).writeTo(writer);
|
||||
last_col = gen.columns;
|
||||
try encodeVLQ(source_index - last_src).writeTo(writer);
|
||||
try VLQ.encode(source_index - last_src).writeTo(writer);
|
||||
last_src = source_index;
|
||||
try encodeVLQ(orig.lines - last_ol).writeTo(writer);
|
||||
try VLQ.encode(orig.lines - last_ol).writeTo(writer);
|
||||
last_ol = orig.lines;
|
||||
try encodeVLQ(orig.columns - last_oc).writeTo(writer);
|
||||
try VLQ.encode(orig.columns - last_oc).writeTo(writer);
|
||||
last_oc = orig.columns;
|
||||
}
|
||||
}
|
||||
@@ -1066,8 +1061,8 @@ pub const SourceMapPieces = struct {
|
||||
|
||||
const shift_column_delta = shift.after.columns - shift.before.columns;
|
||||
const vlq_value = decode_result.value + shift_column_delta - prev_shift_column_delta;
|
||||
const encode = encodeVLQ(vlq_value);
|
||||
j.pushCloned(encode.bytes[0..encode.len]);
|
||||
const encode = VLQ.encode(vlq_value);
|
||||
j.pushCloned(encode.slice());
|
||||
prev_shift_column_delta = shift_column_delta;
|
||||
|
||||
start_of_run = potential_start_of_run;
|
||||
@@ -1147,393 +1142,6 @@ pub fn appendSourceMapChunk(j: *StringJoiner, allocator: std.mem.Allocator, prev
|
||||
j.pushStatic(source_map);
|
||||
}
|
||||
|
||||
const vlq_lookup_table: [256]VLQ = brk: {
|
||||
var entries: [256]VLQ = undefined;
|
||||
var i: usize = 0;
|
||||
var j: i32 = 0;
|
||||
while (i < 256) : (i += 1) {
|
||||
entries[i] = encodeVLQ(j);
|
||||
j += 1;
|
||||
}
|
||||
break :brk entries;
|
||||
};
|
||||
|
||||
/// Source map VLQ values are limited to i32
|
||||
/// Encoding min and max ints are "//////D" and "+/////D", respectively.
|
||||
/// These are 7 bytes long. This makes the `VLQ` struct 8 bytes.
|
||||
const vlq_max_in_bytes = 7;
|
||||
pub const VLQ = struct {
|
||||
bytes: [vlq_max_in_bytes]u8,
|
||||
len: u4 = 0,
|
||||
|
||||
pub fn slice(self: *const VLQ) []const u8 {
|
||||
return self.bytes[0..self.len];
|
||||
}
|
||||
|
||||
pub fn writeTo(self: VLQ, writer: anytype) !void {
|
||||
try writer.writeAll(self.bytes[0..self.len]);
|
||||
}
|
||||
};
|
||||
|
||||
pub fn encodeVLQWithLookupTable(value: i32) VLQ {
|
||||
return if (value >= 0 and value <= 255)
|
||||
vlq_lookup_table[@as(usize, @intCast(value))]
|
||||
else
|
||||
encodeVLQ(value);
|
||||
}
|
||||
|
||||
// A single base 64 digit can contain 6 bits of data. For the base 64 variable
|
||||
// length quantities we use in the source map spec, the first bit is the sign,
|
||||
// the next four bits are the actual value, and the 6th bit is the continuation
|
||||
// bit. The continuation bit tells us whether there are more digits in this
|
||||
// value following this digit.
|
||||
//
|
||||
// Continuation
|
||||
// | Sign
|
||||
// | |
|
||||
// V V
|
||||
// 101011
|
||||
//
|
||||
pub fn encodeVLQ(value: i32) VLQ {
|
||||
var len: u4 = 0;
|
||||
var bytes: [vlq_max_in_bytes]u8 = undefined;
|
||||
|
||||
var vlq: u32 = if (value >= 0)
|
||||
@as(u32, @bitCast(value << 1))
|
||||
else
|
||||
@as(u32, @bitCast((-value << 1) | 1));
|
||||
|
||||
// source mappings are limited to i32
|
||||
comptime var i: usize = 0;
|
||||
inline while (i < vlq_max_in_bytes) : (i += 1) {
|
||||
var digit = vlq & 31;
|
||||
vlq >>= 5;
|
||||
|
||||
// If there are still more digits in this value, we must make sure the
|
||||
// continuation bit is marked
|
||||
if (vlq != 0) {
|
||||
digit |= 32;
|
||||
}
|
||||
|
||||
bytes[len] = base64[digit];
|
||||
len += 1;
|
||||
|
||||
if (vlq == 0) {
|
||||
return .{ .bytes = bytes, .len = len };
|
||||
}
|
||||
}
|
||||
|
||||
return .{ .bytes = bytes, .len = 0 };
|
||||
}
|
||||
|
||||
pub const VLQResult = struct {
|
||||
value: i32 = 0,
|
||||
start: usize = 0,
|
||||
};
|
||||
|
||||
const base64 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
|
||||
|
||||
// base64 stores values up to 7 bits
|
||||
const base64_lut: [std.math.maxInt(u7)]u7 = brk: {
|
||||
@setEvalBranchQuota(9999);
|
||||
var bytes = [_]u7{std.math.maxInt(u7)} ** std.math.maxInt(u7);
|
||||
|
||||
for (base64, 0..) |c, i| {
|
||||
bytes[c] = i;
|
||||
}
|
||||
|
||||
break :brk bytes;
|
||||
};
|
||||
|
||||
pub fn decodeVLQ(encoded: []const u8, start: usize) VLQResult {
|
||||
var shift: u8 = 0;
|
||||
var vlq: u32 = 0;
|
||||
|
||||
// hint to the compiler what the maximum value is
|
||||
const encoded_ = encoded[start..][0..@min(encoded.len - start, comptime (vlq_max_in_bytes + 1))];
|
||||
|
||||
// inlining helps for the 1 or 2 byte case, hurts a little for larger
|
||||
comptime var i: usize = 0;
|
||||
inline while (i < vlq_max_in_bytes + 1) : (i += 1) {
|
||||
const index = @as(u32, base64_lut[@as(u7, @truncate(encoded_[i]))]);
|
||||
|
||||
// decode a byte
|
||||
vlq |= (index & 31) << @as(u5, @truncate(shift));
|
||||
shift += 5;
|
||||
|
||||
// Stop if there's no continuation bit
|
||||
if ((index & 32) == 0) {
|
||||
return VLQResult{
|
||||
.start = start + comptime (i + 1),
|
||||
.value = if ((vlq & 1) == 0)
|
||||
@as(i32, @intCast(vlq >> 1))
|
||||
else
|
||||
-@as(i32, @intCast((vlq >> 1))),
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
return VLQResult{ .start = start + encoded_.len, .value = 0 };
|
||||
}
|
||||
|
||||
pub fn decodeVLQAssumeValid(encoded: []const u8, start: usize) VLQResult {
|
||||
var shift: u8 = 0;
|
||||
var vlq: u32 = 0;
|
||||
|
||||
// hint to the compiler what the maximum value is
|
||||
const encoded_ = encoded[start..][0..@min(encoded.len - start, comptime (vlq_max_in_bytes + 1))];
|
||||
|
||||
// inlining helps for the 1 or 2 byte case, hurts a little for larger
|
||||
comptime var i: usize = 0;
|
||||
inline while (i < vlq_max_in_bytes + 1) : (i += 1) {
|
||||
bun.assert(encoded_[i] < std.math.maxInt(u7)); // invalid base64 character
|
||||
const index = @as(u32, base64_lut[@as(u7, @truncate(encoded_[i]))]);
|
||||
bun.assert(index != std.math.maxInt(u7)); // invalid base64 character
|
||||
|
||||
// decode a byte
|
||||
vlq |= (index & 31) << @as(u5, @truncate(shift));
|
||||
shift += 5;
|
||||
|
||||
// Stop if there's no continuation bit
|
||||
if ((index & 32) == 0) {
|
||||
return VLQResult{
|
||||
.start = start + comptime (i + 1),
|
||||
.value = if ((vlq & 1) == 0)
|
||||
@as(i32, @intCast(vlq >> 1))
|
||||
else
|
||||
-@as(i32, @intCast((vlq >> 1))),
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
return VLQResult{ .start = start + encoded_.len, .value = 0 };
|
||||
}
|
||||
|
||||
pub const LineOffsetTable = struct {
|
||||
/// The source map specification is very loose and does not specify what
|
||||
/// column numbers actually mean. The popular "source-map" library from Mozilla
|
||||
/// appears to interpret them as counts of UTF-16 code units, so we generate
|
||||
/// those too for compatibility.
|
||||
///
|
||||
/// We keep mapping tables around to accelerate conversion from byte offsets
|
||||
/// to UTF-16 code unit counts. However, this mapping takes up a lot of memory
|
||||
/// and takes up a lot of memory. Since most JavaScript is ASCII and the
|
||||
/// mapping for ASCII is 1:1, we avoid creating a table for ASCII-only lines
|
||||
/// as an optimization.
|
||||
///
|
||||
columns_for_non_ascii: BabyList(i32) = .{},
|
||||
byte_offset_to_first_non_ascii: u32 = 0,
|
||||
byte_offset_to_start_of_line: u32 = 0,
|
||||
|
||||
pub const List = std.MultiArrayList(LineOffsetTable);
|
||||
|
||||
pub fn findLine(byte_offsets_to_start_of_line: []const u32, loc: Logger.Loc) i32 {
|
||||
assert(loc.start > -1); // checked by caller
|
||||
var original_line: usize = 0;
|
||||
const loc_start = @as(usize, @intCast(loc.start));
|
||||
|
||||
{
|
||||
var count = @as(usize, @truncate(byte_offsets_to_start_of_line.len));
|
||||
var i: usize = 0;
|
||||
while (count > 0) {
|
||||
const step = count / 2;
|
||||
i = original_line + step;
|
||||
if (byte_offsets_to_start_of_line[i] <= loc_start) {
|
||||
original_line = i + 1;
|
||||
count = count - step - 1;
|
||||
} else {
|
||||
count = step;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return @as(i32, @intCast(original_line)) - 1;
|
||||
}
|
||||
|
||||
pub fn findIndex(byte_offsets_to_start_of_line: []const u32, loc: Logger.Loc) ?usize {
|
||||
assert(loc.start > -1); // checked by caller
|
||||
var original_line: usize = 0;
|
||||
const loc_start = @as(usize, @intCast(loc.start));
|
||||
|
||||
var count = @as(usize, @truncate(byte_offsets_to_start_of_line.len));
|
||||
var i: usize = 0;
|
||||
while (count > 0) {
|
||||
const step = count / 2;
|
||||
i = original_line + step;
|
||||
const byte_offset = byte_offsets_to_start_of_line[i];
|
||||
if (byte_offset == loc_start) {
|
||||
return i;
|
||||
}
|
||||
if (i + 1 < byte_offsets_to_start_of_line.len) {
|
||||
const next_byte_offset = byte_offsets_to_start_of_line[i + 1];
|
||||
if (byte_offset < loc_start and loc_start < next_byte_offset) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
|
||||
if (byte_offset < loc_start) {
|
||||
original_line = i + 1;
|
||||
count = count - step - 1;
|
||||
} else {
|
||||
count = step;
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
pub fn generate(allocator: std.mem.Allocator, contents: []const u8, approximate_line_count: i32) List {
|
||||
var list = List{};
|
||||
// Preallocate the top-level table using the approximate line count from the lexer
|
||||
list.ensureUnusedCapacity(allocator, @as(usize, @intCast(@max(approximate_line_count, 1)))) catch unreachable;
|
||||
var column: i32 = 0;
|
||||
var byte_offset_to_first_non_ascii: u32 = 0;
|
||||
var column_byte_offset: u32 = 0;
|
||||
var line_byte_offset: u32 = 0;
|
||||
|
||||
// the idea here is:
|
||||
// we want to avoid re-allocating this array _most_ of the time
|
||||
// when lines _do_ have unicode characters, they probably still won't be longer than 255 much
|
||||
var stack_fallback = std.heap.stackFallback(@sizeOf(i32) * 256, allocator);
|
||||
var columns_for_non_ascii = std.ArrayList(i32).initCapacity(stack_fallback.get(), 120) catch unreachable;
|
||||
const reset_end_index = stack_fallback.fixed_buffer_allocator.end_index;
|
||||
const initial_columns_for_non_ascii = columns_for_non_ascii;
|
||||
|
||||
var remaining = contents;
|
||||
while (remaining.len > 0) {
|
||||
const len_ = strings.wtf8ByteSequenceLengthWithInvalid(remaining[0]);
|
||||
const c = strings.decodeWTF8RuneT(remaining.ptr[0..4], len_, i32, 0);
|
||||
const cp_len = @as(usize, len_);
|
||||
|
||||
if (column == 0) {
|
||||
line_byte_offset = @as(
|
||||
u32,
|
||||
@truncate(@intFromPtr(remaining.ptr) - @intFromPtr(contents.ptr)),
|
||||
);
|
||||
}
|
||||
|
||||
if (c > 0x7F and columns_for_non_ascii.items.len == 0) {
|
||||
assert(@intFromPtr(
|
||||
remaining.ptr,
|
||||
) >= @intFromPtr(
|
||||
contents.ptr,
|
||||
));
|
||||
// we have a non-ASCII character, so we need to keep track of the
|
||||
// mapping from byte offsets to UTF-16 code unit counts
|
||||
columns_for_non_ascii.appendAssumeCapacity(column);
|
||||
column_byte_offset = @as(
|
||||
u32,
|
||||
@intCast((@intFromPtr(
|
||||
remaining.ptr,
|
||||
) - @intFromPtr(
|
||||
contents.ptr,
|
||||
)) - line_byte_offset),
|
||||
);
|
||||
byte_offset_to_first_non_ascii = column_byte_offset;
|
||||
}
|
||||
|
||||
// Update the per-byte column offsets
|
||||
if (columns_for_non_ascii.items.len > 0) {
|
||||
const line_bytes_so_far = @as(u32, @intCast(@as(
|
||||
u32,
|
||||
@truncate(@intFromPtr(remaining.ptr) - @intFromPtr(contents.ptr)),
|
||||
))) - line_byte_offset;
|
||||
columns_for_non_ascii.ensureUnusedCapacity((line_bytes_so_far - column_byte_offset) + 1) catch unreachable;
|
||||
while (column_byte_offset <= line_bytes_so_far) : (column_byte_offset += 1) {
|
||||
columns_for_non_ascii.appendAssumeCapacity(column);
|
||||
}
|
||||
} else {
|
||||
switch (c) {
|
||||
(@max('\r', '\n') + 1)...127 => {
|
||||
// skip ahead to the next newline or non-ascii character
|
||||
if (strings.indexOfNewlineOrNonASCIICheckStart(remaining, @as(u32, len_), false)) |j| {
|
||||
column += @as(i32, @intCast(j));
|
||||
remaining = remaining[j..];
|
||||
} else {
|
||||
// if there are no more lines, we are done!
|
||||
column += @as(i32, @intCast(remaining.len));
|
||||
remaining = remaining[remaining.len..];
|
||||
}
|
||||
|
||||
continue;
|
||||
},
|
||||
else => {},
|
||||
}
|
||||
}
|
||||
|
||||
switch (c) {
|
||||
'\r', '\n', 0x2028, 0x2029 => {
|
||||
// windows newline
|
||||
if (c == '\r' and remaining.len > 1 and remaining[1] == '\n') {
|
||||
column += 1;
|
||||
remaining = remaining[1..];
|
||||
continue;
|
||||
}
|
||||
|
||||
// We don't call .toOwnedSlice() because it is expensive to
|
||||
// reallocate the array AND when inside an Arena, it's
|
||||
// hideously expensive
|
||||
var owned = columns_for_non_ascii.items;
|
||||
if (stack_fallback.fixed_buffer_allocator.ownsSlice(std.mem.sliceAsBytes(owned))) {
|
||||
owned = allocator.dupe(i32, owned) catch unreachable;
|
||||
}
|
||||
|
||||
list.append(allocator, .{
|
||||
.byte_offset_to_start_of_line = line_byte_offset,
|
||||
.byte_offset_to_first_non_ascii = byte_offset_to_first_non_ascii,
|
||||
.columns_for_non_ascii = BabyList(i32).init(owned),
|
||||
}) catch unreachable;
|
||||
|
||||
column = 0;
|
||||
byte_offset_to_first_non_ascii = 0;
|
||||
column_byte_offset = 0;
|
||||
line_byte_offset = 0;
|
||||
|
||||
// reset the list to use the stack-allocated memory
|
||||
stack_fallback.fixed_buffer_allocator.reset();
|
||||
stack_fallback.fixed_buffer_allocator.end_index = reset_end_index;
|
||||
columns_for_non_ascii = initial_columns_for_non_ascii;
|
||||
},
|
||||
else => {
|
||||
// Mozilla's "source-map" library counts columns using UTF-16 code units
|
||||
column += @as(i32, @intFromBool(c > 0xFFFF)) + 1;
|
||||
},
|
||||
}
|
||||
|
||||
remaining = remaining[cp_len..];
|
||||
}
|
||||
|
||||
// Mark the start of the next line
|
||||
if (column == 0) {
|
||||
line_byte_offset = @as(u32, @intCast(contents.len));
|
||||
}
|
||||
|
||||
if (columns_for_non_ascii.items.len > 0) {
|
||||
const line_bytes_so_far = @as(u32, @intCast(contents.len)) - line_byte_offset;
|
||||
columns_for_non_ascii.ensureUnusedCapacity((line_bytes_so_far - column_byte_offset) + 1) catch unreachable;
|
||||
while (column_byte_offset <= line_bytes_so_far) : (column_byte_offset += 1) {
|
||||
columns_for_non_ascii.appendAssumeCapacity(column);
|
||||
}
|
||||
}
|
||||
{
|
||||
var owned = columns_for_non_ascii.toOwnedSlice() catch unreachable;
|
||||
if (stack_fallback.fixed_buffer_allocator.ownsSlice(std.mem.sliceAsBytes(owned))) {
|
||||
owned = allocator.dupe(i32, owned) catch unreachable;
|
||||
}
|
||||
list.append(allocator, .{
|
||||
.byte_offset_to_start_of_line = line_byte_offset,
|
||||
.byte_offset_to_first_non_ascii = byte_offset_to_first_non_ascii,
|
||||
.columns_for_non_ascii = BabyList(i32).init(owned),
|
||||
}) catch unreachable;
|
||||
}
|
||||
|
||||
if (list.capacity > list.len) {
|
||||
list.shrinkAndFree(allocator, list.len);
|
||||
}
|
||||
return list;
|
||||
}
|
||||
};
|
||||
|
||||
pub fn appendSourceMappingURLRemote(
|
||||
origin: URL,
|
||||
source: Logger.Source,
|
||||
@@ -1552,35 +1160,40 @@ pub fn appendSourceMappingURLRemote(
|
||||
try writer.writeAll(".map");
|
||||
}
|
||||
|
||||
/// This function is extremely hot.
|
||||
pub fn appendMappingToBuffer(buffer_: MutableString, last_byte: u8, prev_state: SourceMapState, current_state: SourceMapState) MutableString {
|
||||
var buffer = buffer_;
|
||||
const needs_comma = last_byte != 0 and last_byte != ';' and last_byte != '"';
|
||||
|
||||
const vlq = [_]VLQ{
|
||||
const vlqs = [_]VLQ{
|
||||
// Record the generated column (the line is recorded using ';' elsewhere)
|
||||
encodeVLQWithLookupTable(current_state.generated_column -| prev_state.generated_column),
|
||||
.encode(current_state.generated_column -| prev_state.generated_column),
|
||||
// Record the generated source
|
||||
encodeVLQWithLookupTable(current_state.source_index -| prev_state.source_index),
|
||||
.encode(current_state.source_index -| prev_state.source_index),
|
||||
// Record the original line
|
||||
encodeVLQWithLookupTable(current_state.original_line -| prev_state.original_line),
|
||||
.encode(current_state.original_line -| prev_state.original_line),
|
||||
// Record the original column
|
||||
encodeVLQWithLookupTable(current_state.original_column -| prev_state.original_column),
|
||||
.encode(current_state.original_column -| prev_state.original_column),
|
||||
};
|
||||
|
||||
// Count exactly how many bytes we need to write
|
||||
const total_len = @as(u32, vlq[0].len) +
|
||||
@as(u32, vlq[1].len) +
|
||||
@as(u32, vlq[2].len) +
|
||||
@as(u32, vlq[3].len);
|
||||
buffer.growIfNeeded(total_len + @as(u32, @intFromBool(needs_comma))) catch unreachable;
|
||||
const total_len = @as(usize, vlqs[0].len) +
|
||||
@as(usize, vlqs[1].len) +
|
||||
@as(usize, vlqs[2].len) +
|
||||
@as(usize, vlqs[3].len);
|
||||
|
||||
// Instead of updating .len 5 times, we only need to update it once.
|
||||
var writable = buffer.writableNBytes(total_len + @as(usize, @intFromBool(needs_comma))) catch unreachable;
|
||||
|
||||
// Put commas in between mappings
|
||||
if (needs_comma) {
|
||||
buffer.appendCharAssumeCapacity(',');
|
||||
writable[0] = ',';
|
||||
writable = writable[1..];
|
||||
}
|
||||
|
||||
inline for (vlq) |item| {
|
||||
buffer.appendAssumeCapacity(item.bytes[0..item.len]);
|
||||
inline for (&vlqs) |item| {
|
||||
@memcpy(writable[0..item.len], item.slice());
|
||||
writable = writable[item.len..];
|
||||
}
|
||||
|
||||
return buffer;
|
||||
@@ -1967,3 +1580,8 @@ pub const DebugIDFormatter = struct {
|
||||
const assert = bun.assert;
|
||||
|
||||
pub const coverage = @import("./CodeCoverage.zig");
|
||||
pub const VLQ = @import("./VLQ.zig");
|
||||
pub const LineOffsetTable = @import("./LineOffsetTable.zig");
|
||||
|
||||
const decodeVLQAssumeValid = VLQ.decodeAssumeValid;
|
||||
const decodeVLQ = VLQ.decode;
|
||||
|
||||
Reference in New Issue
Block a user