mirror of
https://github.com/oven-sh/bun
synced 2026-02-10 19:08:50 +00:00
faster Buffer.byteLength("latin1")
This commit is contained in:
@@ -1729,48 +1729,40 @@ pub fn elementLengthLatin1IntoUTF8(comptime Type: type, latin1_: Type) usize {
|
||||
|
||||
const latin1_last = latin1.ptr + latin1.len;
|
||||
if (latin1.ptr != latin1_last) {
|
||||
const wrapped_len = latin1.len - (latin1.len % ascii_vector_size);
|
||||
|
||||
// reference the pointer directly because it improves codegen
|
||||
var ptr = latin1.ptr;
|
||||
const latin1_vec_end = ptr + wrapped_len;
|
||||
|
||||
while (ptr != latin1_vec_end) {
|
||||
const vec: AsciiVector = ptr[0..ascii_vector_size].*;
|
||||
|
||||
if (@reduce(.Max, vec) > 127) {
|
||||
const Int = u64;
|
||||
const size = @sizeOf(Int);
|
||||
|
||||
const bytes = [2]Int{
|
||||
@bitCast(Int, ptr[0..size].*) & 0x8080808080808080,
|
||||
@bitCast(Int, ptr[size .. 2 * size].*) & 0x8080808080808080,
|
||||
};
|
||||
|
||||
total_non_ascii_count += @popCount(bytes[0]) + @popCount(bytes[1]);
|
||||
if (comptime Environment.enableSIMD) {
|
||||
const wrapped_len = latin1.len - (latin1.len % ascii_vector_size);
|
||||
const latin1_vec_end = ptr + wrapped_len;
|
||||
while (ptr != latin1_vec_end) {
|
||||
const vec: AsciiVector = ptr[0..ascii_vector_size].*;
|
||||
const cmp = vec & @splat(ascii_vector_size, @as(u8, 0x80));
|
||||
total_non_ascii_count += @reduce(.Add, cmp);
|
||||
ptr += ascii_vector_size;
|
||||
}
|
||||
} else {
|
||||
while (@ptrToInt(ptr + 8) < @ptrToInt(latin1_last)) {
|
||||
if (comptime Environment.allow_assert) std.debug.assert(@ptrToInt(ptr) <= @ptrToInt(latin1_last) and @ptrToInt(ptr) >= @ptrToInt(latin1_.ptr));
|
||||
const bytes = @bitCast(u64, ptr[0..8].*) & 0x8080808080808080;
|
||||
total_non_ascii_count += @popCount(bytes);
|
||||
ptr += 8;
|
||||
}
|
||||
|
||||
ptr += ascii_vector_size;
|
||||
}
|
||||
if (@ptrToInt(ptr + 4) < @ptrToInt(latin1_last)) {
|
||||
if (comptime Environment.allow_assert) std.debug.assert(@ptrToInt(ptr) <= @ptrToInt(latin1_last) and @ptrToInt(ptr) >= @ptrToInt(latin1_.ptr));
|
||||
const bytes = @bitCast(u32, ptr[0..4].*) & 0x80808080;
|
||||
total_non_ascii_count += @popCount(bytes);
|
||||
ptr += 4;
|
||||
}
|
||||
|
||||
if (@ptrToInt(ptr + 8) < @ptrToInt(latin1_last)) {
|
||||
if (comptime Environment.allow_assert) std.debug.assert(@ptrToInt(ptr) <= @ptrToInt(latin1_last) and @ptrToInt(ptr) >= @ptrToInt(latin1_.ptr));
|
||||
const bytes = @bitCast(u64, ptr[0..8].*) & 0x8080808080808080;
|
||||
total_non_ascii_count += @popCount(bytes);
|
||||
ptr += 8;
|
||||
}
|
||||
|
||||
if (@ptrToInt(ptr + 4) < @ptrToInt(latin1_last)) {
|
||||
if (comptime Environment.allow_assert) std.debug.assert(@ptrToInt(ptr) <= @ptrToInt(latin1_last) and @ptrToInt(ptr) >= @ptrToInt(latin1_.ptr));
|
||||
const bytes = @bitCast(u32, ptr[0..4].*) & 0x80808080;
|
||||
total_non_ascii_count += @popCount(bytes);
|
||||
ptr += 4;
|
||||
}
|
||||
|
||||
if (@ptrToInt(ptr + 2) < @ptrToInt(latin1_last)) {
|
||||
if (comptime Environment.allow_assert) std.debug.assert(@ptrToInt(ptr) <= @ptrToInt(latin1_last) and @ptrToInt(ptr) >= @ptrToInt(latin1_.ptr));
|
||||
const bytes = @bitCast(u16, ptr[0..2].*) & 0x8080;
|
||||
total_non_ascii_count += @popCount(bytes);
|
||||
ptr += 2;
|
||||
if (@ptrToInt(ptr + 2) < @ptrToInt(latin1_last)) {
|
||||
if (comptime Environment.allow_assert) std.debug.assert(@ptrToInt(ptr) <= @ptrToInt(latin1_last) and @ptrToInt(ptr) >= @ptrToInt(latin1_.ptr));
|
||||
const bytes = @bitCast(u16, ptr[0..2].*) & 0x8080;
|
||||
total_non_ascii_count += @popCount(bytes);
|
||||
ptr += 2;
|
||||
}
|
||||
}
|
||||
|
||||
while (ptr != latin1_last) {
|
||||
|
||||
Reference in New Issue
Block a user