mirror of
https://github.com/oven-sh/bun
synced 2026-02-09 10:28:47 +00:00
Use Highway SIMD (#19134)
Co-authored-by: Dylan Conway <dylan.conway567@gmail.com> Co-authored-by: Dylan Conway <35280289+dylan-conway@users.noreply.github.com> Co-authored-by: Jarred-Sumner <709451+Jarred-Sumner@users.noreply.github.com>
This commit is contained in:
@@ -1089,6 +1089,7 @@ set(BUN_DEPENDENCIES
|
||||
BoringSSL
|
||||
Brotli
|
||||
Cares
|
||||
Highway
|
||||
LibDeflate
|
||||
LolHtml
|
||||
Lshpack
|
||||
|
||||
33
cmake/targets/BuildHighway.cmake
Normal file
33
cmake/targets/BuildHighway.cmake
Normal file
@@ -0,0 +1,33 @@
|
||||
register_repository(
|
||||
NAME
|
||||
highway
|
||||
REPOSITORY
|
||||
google/highway
|
||||
COMMIT
|
||||
12b325bc1793dee68ab2157995a690db859fe9e0
|
||||
)
|
||||
|
||||
set(HIGHWAY_CMAKE_ARGS
|
||||
# Build a static library
|
||||
-DBUILD_SHARED_LIBS=OFF
|
||||
# Enable position-independent code for linking into the main executable
|
||||
-DCMAKE_POSITION_INDEPENDENT_CODE=ON
|
||||
# Disable unnecessary components
|
||||
-DHWY_ENABLE_TESTS=OFF
|
||||
-DHWY_ENABLE_EXAMPLES=OFF
|
||||
-DHWY_ENABLE_CONTRIB=OFF
|
||||
# Disable building of the install target
|
||||
-DHWY_ENABLE_INSTALL=OFF
|
||||
)
|
||||
|
||||
register_cmake_command(
|
||||
TARGET
|
||||
highway
|
||||
LIBRARIES
|
||||
hwy
|
||||
ARGS
|
||||
${HIGHWAY_CMAKE_ARGS}
|
||||
INCLUDES
|
||||
.
|
||||
hwy
|
||||
)
|
||||
@@ -367,4 +367,10 @@ SIMDUTFResult simdutf__base64_decode_from_binary16(const char16_t* input, size_t
|
||||
|
||||
return { .error = res.error, .count = res.count };
|
||||
}
|
||||
|
||||
size_t simdutf__utf16_length_from_latin1(const char* input, size_t length)
|
||||
{
|
||||
UNUSED_PARAM(input);
|
||||
return simdutf::utf16_length_from_latin1(length);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -95,6 +95,7 @@ pub extern fn simdutf__utf8_length_from_utf32(input: [*c]const c_uint, length: u
|
||||
pub extern fn simdutf__utf16_length_from_utf32(input: [*c]const c_uint, length: usize) usize;
|
||||
pub extern fn simdutf__utf32_length_from_utf8(input: [*]const u8, length: usize) usize;
|
||||
pub extern fn simdutf__utf8_length_from_latin1(input: [*]const u8, length: usize) usize;
|
||||
pub extern fn simdutf__utf16_length_from_latin1(input: [*]const u8, length: usize) usize;
|
||||
|
||||
pub const validate = struct {
|
||||
pub const with_errors = struct {
|
||||
@@ -295,6 +296,10 @@ pub const length = struct {
|
||||
JSC.markBinding(@src());
|
||||
return simdutf__utf16_length_from_utf32(input.ptr, input.len);
|
||||
}
|
||||
|
||||
pub fn latin1(input: []const u8) usize {
|
||||
return simdutf__utf16_length_from_latin1(input.ptr, input.len);
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
790
src/bun.js/bindings/highway_strings.cpp
Normal file
790
src/bun.js/bindings/highway_strings.cpp
Normal file
@@ -0,0 +1,790 @@
|
||||
// Must be first
|
||||
#include "root.h"
|
||||
#undef HWY_TARGET_INCLUDE
|
||||
// Correct path to this file relative to the build root (CMakeLists.txt)
|
||||
#define HWY_TARGET_INCLUDE "highway_strings.cpp"
|
||||
#include <hwy/foreach_target.h> // Must come before highway.h
|
||||
|
||||
// Now include Highway and other headers
|
||||
#include <hwy/highway.h>
|
||||
#include <hwy/aligned_allocator.h>
|
||||
|
||||
#include <hwy/contrib/algo/find-inl.h>
|
||||
|
||||
#include <cstring> // For memcmp
|
||||
#include <algorithm> // For std::min, std::max
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
|
||||
// Wrap the SIMD implementations in the Highway namespaces
|
||||
HWY_BEFORE_NAMESPACE();
|
||||
namespace bun {
|
||||
namespace HWY_NAMESPACE {
|
||||
|
||||
namespace hn = hwy::HWY_NAMESPACE; // Alias for convenience
|
||||
|
||||
// Type alias for SIMD vector tag
|
||||
using D8 = hn::ScalableTag<uint8_t>;
|
||||
|
||||
size_t IndexOfCharImpl(const uint8_t* HWY_RESTRICT haystack, size_t haystack_len,
|
||||
uint8_t needle)
|
||||
{
|
||||
D8 d;
|
||||
// Use the Find function from find-inl.h which handles both vectorized and scalar cases
|
||||
const size_t pos = hn::Find<D8>(d, needle, haystack, haystack_len);
|
||||
|
||||
// Convert to int64_t and return -1 if not found
|
||||
return (pos < haystack_len) ? pos : haystack_len;
|
||||
}
|
||||
|
||||
// --- Implementation Details ---
|
||||
|
||||
size_t IndexOfAnyCharImpl(const uint8_t* HWY_RESTRICT text, size_t text_len, const uint8_t* HWY_RESTRICT chars, size_t chars_len)
|
||||
{
|
||||
if (text_len == 0) return 0;
|
||||
D8 d;
|
||||
const size_t N = hn::Lanes(d);
|
||||
|
||||
if (chars_len == 1) {
|
||||
ASSERT_NOT_REACHED_WITH_MESSAGE("chars_len == 1");
|
||||
} else if (chars_len == 2) {
|
||||
// 2 character implemenation
|
||||
// covers the most common case:
|
||||
//
|
||||
// - { '\r', '\n' }
|
||||
// - { '\\', '/' }
|
||||
// - { ' ', '\t' }
|
||||
//
|
||||
const auto vec_char1 = hn::Set(d, chars[0]);
|
||||
const auto vec_char2 = hn::Set(d, chars[1]);
|
||||
|
||||
size_t i = 0;
|
||||
const size_t simd_text_len = text_len - (text_len % N);
|
||||
for (; i < simd_text_len; i += N) {
|
||||
const auto text_vec = hn::LoadN(d, text + i, N);
|
||||
const auto found_mask = hn::Or(hn::Eq(text_vec, vec_char2), hn::Eq(text_vec, vec_char1));
|
||||
|
||||
const intptr_t pos = hn::FindFirstTrue(d, found_mask);
|
||||
if (pos >= 0) {
|
||||
return i + pos;
|
||||
}
|
||||
}
|
||||
|
||||
for (; i < text_len; ++i) {
|
||||
const uint8_t text_char = text[i];
|
||||
if (text_char == chars[0] || text_char == chars[1]) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
|
||||
return text_len;
|
||||
} else {
|
||||
ASSERT(chars_len <= 16);
|
||||
constexpr size_t kMaxPreloadedChars = 16;
|
||||
hn::Vec<D8> char_vecs[kMaxPreloadedChars];
|
||||
const size_t num_chars_to_preload = std::min(chars_len, kMaxPreloadedChars);
|
||||
for (size_t c = 0; c < num_chars_to_preload; ++c) {
|
||||
char_vecs[c] = hn::Set(d, chars[c]);
|
||||
}
|
||||
|
||||
const size_t simd_text_len = text_len - (text_len % N);
|
||||
size_t i = 0;
|
||||
|
||||
for (; i < simd_text_len; i += N) {
|
||||
const auto text_vec = hn::LoadN(d, text + i, N);
|
||||
auto found_mask = hn::MaskFalse(d);
|
||||
|
||||
for (size_t c = 0; c < num_chars_to_preload; ++c) {
|
||||
found_mask = hn::Or(found_mask, hn::Eq(text_vec, char_vecs[c]));
|
||||
}
|
||||
if (chars_len > num_chars_to_preload) {
|
||||
for (size_t c = num_chars_to_preload; c < chars_len; ++c) {
|
||||
found_mask = hn::Or(found_mask, hn::Eq(text_vec, hn::Set(d, chars[c])));
|
||||
}
|
||||
}
|
||||
|
||||
const intptr_t pos = hn::FindFirstTrue(d, found_mask);
|
||||
if (pos >= 0) {
|
||||
return i + pos;
|
||||
}
|
||||
}
|
||||
|
||||
for (; i < text_len; ++i) {
|
||||
const uint8_t text_char = text[i];
|
||||
for (size_t c = 0; c < chars_len; ++c) {
|
||||
if (text_char == chars[c]) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return text_len;
|
||||
}
|
||||
|
||||
void CopyU16ToU8Impl(const uint16_t* HWY_RESTRICT input, size_t count,
|
||||
uint8_t* HWY_RESTRICT output)
|
||||
{
|
||||
// Tag for the output vector type (u8)
|
||||
const hn::ScalableTag<uint8_t> d8;
|
||||
// Tag for the input vector type (u16). OrderedTruncate2To takes two u16 vectors
|
||||
// (each N/2 lanes) to produce one u8 vector (N lanes).
|
||||
// Repartition<uint16_t, decltype(d8)> gives a u16 tag with N/2 lanes.
|
||||
const hn::Repartition<uint16_t, decltype(d8)> d16;
|
||||
|
||||
const size_t N8 = hn::Lanes(d8); // Number of u8 lanes processed per iteration
|
||||
const size_t N16 = hn::Lanes(d16); // Number of u16 lanes per input vector load
|
||||
|
||||
// Sanity check: we should load 2*N16 u16 elements to produce N8 u8 elements.
|
||||
// Since sizeof(u16) == 2 * sizeof(u8), N16 should be N8 / 2.
|
||||
// static_assert(N16 * 2 == N8, "Lane configuration mismatch"); // Highway ensures this
|
||||
|
||||
size_t i = 0;
|
||||
const size_t simd_count = count - (count % N8);
|
||||
// Process N8 elements (u8 output size) per iteration. This corresponds to
|
||||
// loading N8 u16 input elements (2 vectors of N16 lanes each).
|
||||
for (; i < simd_count; i += N8) {
|
||||
// Load two input vectors of u16
|
||||
const auto in1 = hn::LoadU(d16, input + i);
|
||||
const auto in2 = hn::LoadU(d16, input + i + N16);
|
||||
|
||||
// Truncate and interleave into a single u8 vector
|
||||
// OrderedTruncate2To(d_narrow, vec_wide_a, vec_wide_b)
|
||||
const hn::Vec<decltype(d8)> result8 = hn::OrderedTruncate2To(d8, in1, in2);
|
||||
|
||||
// Store the resulting u8 vector
|
||||
hn::StoreU(result8, d8, output + i);
|
||||
}
|
||||
|
||||
// Handle remaining elements (< N8)
|
||||
for (; i < count; ++i) {
|
||||
output[i] = static_cast<uint8_t>(input[i]); // Truncation happens here
|
||||
}
|
||||
}
|
||||
|
||||
// Implementation for scanCharFrequency (Unchanged from previous correct version)
|
||||
void ScanCharFrequencyImpl(const uint8_t* HWY_RESTRICT text, size_t text_len, int32_t* HWY_RESTRICT freqs, int32_t delta)
|
||||
{
|
||||
if (text_len == 0 || delta == 0) return;
|
||||
D8 d;
|
||||
const size_t N = hn::Lanes(d);
|
||||
|
||||
const auto vec_a = hn::Set(d, 'a');
|
||||
const auto vec_z = hn::Set(d, 'z');
|
||||
const auto vec_A = hn::Set(d, 'A');
|
||||
const auto vec_Z = hn::Set(d, 'Z');
|
||||
const auto vec_0 = hn::Set(d, '0');
|
||||
const auto vec_9 = hn::Set(d, '9');
|
||||
const auto vec_underscore = hn::Set(d, '_');
|
||||
const auto vec_dollar = hn::Set(d, '$');
|
||||
|
||||
const auto vec_offset_a = hn::Set(d, 'a');
|
||||
const auto vec_offset_A = hn::Set(d, 'A');
|
||||
const auto vec_offset_0 = hn::Set(d, '0');
|
||||
|
||||
size_t i = 0;
|
||||
size_t simd_text_len = text_len - (text_len % N);
|
||||
for (; i < simd_text_len; i += N) {
|
||||
const auto text_vec = hn::LoadU(d, text + i);
|
||||
const auto mask_az = hn::And(hn::Ge(text_vec, vec_a), hn::Le(text_vec, vec_z));
|
||||
const auto mask_AZ = hn::And(hn::Ge(text_vec, vec_A), hn::Le(text_vec, vec_Z));
|
||||
const auto mask_09 = hn::And(hn::Ge(text_vec, vec_0), hn::Le(text_vec, vec_9));
|
||||
const auto mask_underscore = hn::Eq(text_vec, vec_underscore);
|
||||
const auto mask_dollar = hn::Eq(text_vec, vec_dollar);
|
||||
auto valid_mask = hn::Or(mask_az, hn::Or(mask_AZ, hn::Or(mask_09, hn::Or(mask_underscore, mask_dollar))));
|
||||
if (hn::AllFalse(d, valid_mask)) continue;
|
||||
|
||||
const auto idx_az = hn::Sub(text_vec, vec_offset_a);
|
||||
const auto idx_AZ = hn::Add(hn::Sub(text_vec, vec_offset_A), hn::Set(d, uint8_t { 26 }));
|
||||
const auto idx_09 = hn::Add(hn::Sub(text_vec, vec_offset_0), hn::Set(d, uint8_t { 52 }));
|
||||
|
||||
auto indices_vec = hn::Zero(d);
|
||||
indices_vec = hn::IfThenElse(mask_az, idx_az, indices_vec);
|
||||
indices_vec = hn::IfThenElse(mask_AZ, idx_AZ, indices_vec);
|
||||
indices_vec = hn::IfThenElse(mask_09, idx_09, indices_vec);
|
||||
indices_vec = hn::IfThenElse(mask_underscore, hn::Set(d, uint8_t { 62 }), indices_vec);
|
||||
indices_vec = hn::IfThenElse(mask_dollar, hn::Set(d, uint8_t { 63 }), indices_vec);
|
||||
|
||||
alignas(HWY_ALIGNMENT) uint8_t indices_array[HWY_MAX_LANES_D(D8)];
|
||||
alignas(HWY_ALIGNMENT) uint8_t valid_bits_array[(HWY_MAX_LANES_D(D8) + 7) / 8];
|
||||
hn::Store(indices_vec, d, indices_array);
|
||||
hn::StoreMaskBits(d, valid_mask, valid_bits_array);
|
||||
|
||||
for (size_t j = 0; j < N; ++j) {
|
||||
if ((valid_bits_array[j / 8] >> (j % 8)) & 1) {
|
||||
assert(indices_array[j] < 64);
|
||||
freqs[indices_array[j]] += delta;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (; i < text_len; ++i) {
|
||||
const uint8_t c = text[i];
|
||||
if (c >= 'a' && c <= 'z')
|
||||
freqs[c - 'a'] += delta;
|
||||
else if (c >= 'A' && c <= 'Z')
|
||||
freqs[c - 'A' + 26] += delta;
|
||||
else if (c >= '0' && c <= '9')
|
||||
freqs[c - '0' + 52] += delta;
|
||||
else if (c == '_')
|
||||
freqs[62] += delta;
|
||||
else if (c == '$')
|
||||
freqs[63] += delta;
|
||||
}
|
||||
}
|
||||
|
||||
// Implementation for finding interesting characters in string literals
|
||||
size_t IndexOfInterestingCharacterInStringLiteralImpl(const uint8_t* HWY_RESTRICT text, size_t text_len, uint8_t quote)
|
||||
{
|
||||
ASSERT(text_len > 0);
|
||||
D8 d;
|
||||
const size_t N = hn::Lanes(d);
|
||||
|
||||
const auto vec_quote = hn::Set(d, quote);
|
||||
const auto vec_backslash = hn::Set(d, '\\');
|
||||
const auto vec_min_ascii = hn::Set(d, uint8_t { 0x20 }); // Space
|
||||
const auto vec_max_ascii = hn::Set(d, uint8_t { 0x7E }); // ~
|
||||
|
||||
const size_t simd_text_len = text_len - (text_len % N);
|
||||
size_t i = 0;
|
||||
for (; i < simd_text_len; i += N) {
|
||||
const auto text_vec = hn::LoadN(d, text + i, N);
|
||||
|
||||
// Check for quote, backslash, or characters outside printable ASCII range
|
||||
const auto mask_quote = hn::Eq(text_vec, vec_quote);
|
||||
const auto mask_backslash = hn::Eq(text_vec, vec_backslash);
|
||||
const auto mask_lt_min = hn::Lt(text_vec, vec_min_ascii);
|
||||
const auto mask_gt_max = hn::Gt(text_vec, vec_max_ascii);
|
||||
|
||||
const auto found_mask = hn::Or(
|
||||
hn::Or(mask_quote, mask_backslash),
|
||||
hn::Or(mask_lt_min, mask_gt_max));
|
||||
|
||||
const intptr_t pos = hn::FindFirstTrue(d, found_mask);
|
||||
if (pos >= 0) {
|
||||
return i + pos;
|
||||
}
|
||||
}
|
||||
|
||||
for (; i < text_len; ++i) {
|
||||
const uint8_t c = text[i];
|
||||
if (c == quote || c == '\\' || (c < 0x20 || c > 0x7E)) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
|
||||
return text_len;
|
||||
}
|
||||
|
||||
size_t IndexOfNewlineOrNonASCIIOrHashOrAtImpl(const uint8_t* HWY_RESTRICT start_ptr, size_t search_len)
|
||||
{
|
||||
ASSERT(search_len > 0);
|
||||
|
||||
D8 d;
|
||||
const size_t N = hn::Lanes(d);
|
||||
|
||||
const auto vec_hash = hn::Set(d, '#');
|
||||
const auto vec_at = hn::Set(d, '@');
|
||||
const auto vec_min_ascii = hn::Set(d, uint8_t { 0x20 });
|
||||
const auto vec_max_ascii = hn::Set(d, uint8_t { 0x7E });
|
||||
|
||||
size_t i = 0;
|
||||
const size_t simd_text_len = search_len - (search_len % N);
|
||||
for (; i < simd_text_len; i += N) {
|
||||
const auto vec = hn::LoadU(d, start_ptr + i);
|
||||
|
||||
const auto mask_hash = hn::Eq(vec, vec_hash);
|
||||
const auto mask_at = hn::Eq(vec, vec_at);
|
||||
const auto mask_lt_min = hn::Lt(vec, vec_min_ascii);
|
||||
const auto mask_gt_max = hn::Gt(vec, vec_max_ascii);
|
||||
|
||||
const auto found_mask = hn::Or(hn::Or(mask_hash, mask_at), hn::Or(mask_lt_min, mask_gt_max));
|
||||
|
||||
const intptr_t pos = hn::FindFirstTrue(d, found_mask);
|
||||
if (pos >= 0) {
|
||||
return i + pos;
|
||||
}
|
||||
}
|
||||
|
||||
for (; i < search_len; ++i) {
|
||||
const uint8_t char_ = start_ptr[i];
|
||||
if (char_ == '#' || char_ == '@' || char_ < 0x20 || char_ > 127) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
|
||||
return search_len;
|
||||
}
|
||||
|
||||
size_t IndexOfNewlineOrNonASCIIImpl(const uint8_t* HWY_RESTRICT start_ptr, size_t search_len)
|
||||
{
|
||||
ASSERT(search_len > 0);
|
||||
|
||||
D8 d;
|
||||
const size_t N = hn::Lanes(d);
|
||||
|
||||
// SIMD constants
|
||||
const auto vec_max_ascii = hn::Set(d, uint8_t { 127 });
|
||||
const auto vec_min_ascii = hn::Set(d, uint8_t { 0x20 });
|
||||
|
||||
// FUTURE TODO: normalize tabs
|
||||
// Some tests involving githubactions depend on tabs not being normalized right now.
|
||||
|
||||
size_t i = 0;
|
||||
const size_t simd_text_len = search_len - (search_len % N);
|
||||
// Process full vectors
|
||||
for (; i < simd_text_len; i += N) {
|
||||
const auto vec = hn::LoadU(d, start_ptr + i);
|
||||
const auto mask_lt_min = hn::Lt(vec, vec_min_ascii);
|
||||
const auto mask_gt_max = hn::Gt(vec, vec_max_ascii);
|
||||
|
||||
const auto found_mask = hn::Or(mask_gt_max, mask_lt_min);
|
||||
|
||||
const intptr_t pos = hn::FindFirstTrue(d, found_mask);
|
||||
if (pos >= 0) {
|
||||
return i + pos;
|
||||
}
|
||||
}
|
||||
|
||||
// Scalar check for the remainder
|
||||
for (; i < search_len; ++i) {
|
||||
const uint8_t char_ = start_ptr[i];
|
||||
if (char_ > 127 || char_ < 0x20) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
|
||||
return search_len;
|
||||
}
|
||||
|
||||
size_t IndexOfSpaceOrNewlineOrNonASCIIImpl(const uint8_t* HWY_RESTRICT start_ptr, size_t search_len)
|
||||
{
|
||||
ASSERT(search_len > 0);
|
||||
|
||||
D8 d;
|
||||
const size_t N = hn::Lanes(d);
|
||||
|
||||
const uint8_t after_space = ' ' + 1;
|
||||
|
||||
const auto vec_min_ascii_including_space = hn::Set(d, after_space);
|
||||
const auto vec_max_ascii = hn::Set(d, uint8_t { 127 });
|
||||
size_t simd_text_len = search_len - (search_len % N);
|
||||
|
||||
size_t i = 0;
|
||||
for (; i < simd_text_len; i += N) {
|
||||
const auto vec = hn::LoadU(d, start_ptr + i);
|
||||
const auto mask_lt_min = hn::Lt(vec, vec_min_ascii_including_space);
|
||||
const auto mask_gt_max = hn::Gt(vec, vec_max_ascii);
|
||||
const auto found_mask = hn::Or(mask_gt_max, mask_lt_min);
|
||||
const intptr_t pos = hn::FindFirstTrue(d, found_mask);
|
||||
if (pos >= 0) {
|
||||
return i + pos;
|
||||
}
|
||||
}
|
||||
|
||||
for (; i < search_len; ++i) {
|
||||
const uint8_t char_ = start_ptr[i];
|
||||
if (char_ <= ' ' || char_ > 127) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
|
||||
return search_len;
|
||||
}
|
||||
|
||||
bool ContainsNewlineOrNonASCIIOrQuoteImpl(const uint8_t* HWY_RESTRICT text, size_t text_len)
|
||||
{
|
||||
ASSERT(text_len > 0);
|
||||
|
||||
D8 d;
|
||||
const size_t N = hn::Lanes(d);
|
||||
|
||||
// SIMD constants
|
||||
const auto vec_max_ascii = hn::Set(d, uint8_t { 127 });
|
||||
const auto vec_min_ascii = hn::Set(d, uint8_t { 0x20 });
|
||||
const auto vec_quote = hn::Set(d, uint8_t { '"' });
|
||||
|
||||
size_t i = 0;
|
||||
const size_t simd_text_len = text_len - (text_len % N);
|
||||
|
||||
// Process full vectors
|
||||
for (; i < simd_text_len; i += N) {
|
||||
const auto vec = hn::LoadU(d, text + i);
|
||||
const auto mask_lt_min = hn::Lt(vec, vec_min_ascii);
|
||||
const auto mask_gt_max = hn::Gt(vec, vec_max_ascii);
|
||||
|
||||
const auto mask_quote_eq = hn::Eq(vec, vec_quote);
|
||||
|
||||
const auto found_mask = hn::Or(hn::Or(mask_gt_max, mask_lt_min), mask_quote_eq);
|
||||
|
||||
if (!hn::AllFalse(d, found_mask)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
// Scalar check for the remainder
|
||||
for (; i < text_len; ++i) {
|
||||
const uint8_t char_ = text[i];
|
||||
if (char_ > 127 || char_ < 0x20 || char_ == '"') {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
template<bool is_backtick>
|
||||
static size_t IndexOfNeedsEscapeForJavaScriptStringImpl(const uint8_t* HWY_RESTRICT text, size_t text_len, uint8_t quote_char)
|
||||
{
|
||||
ASSERT(text_len > 0);
|
||||
|
||||
D8 d;
|
||||
const size_t N = hn::Lanes(d);
|
||||
|
||||
// Set up SIMD constants
|
||||
const auto vec_backslash = hn::Set(d, uint8_t { '\\' });
|
||||
const auto vec_min_ascii = hn::Set(d, uint8_t { 0x20 });
|
||||
const auto vec_max_ascii = hn::Set(d, uint8_t { 127 });
|
||||
const auto vec_quote = hn::Set(d, quote_char);
|
||||
|
||||
const auto vec_dollar = hn::Set(d, uint8_t { '$' });
|
||||
ASSERT(is_backtick || quote_char != '`');
|
||||
|
||||
// Calculate how many full SIMD vectors we can process
|
||||
const size_t simd_text_len = text_len - (text_len % N);
|
||||
size_t i = 0;
|
||||
|
||||
// Process chunks of the string
|
||||
for (; i < simd_text_len; i += N) {
|
||||
const auto text_vec = hn::LoadN(d, text + i, N);
|
||||
|
||||
// Check for characters that need escaping
|
||||
const auto mask_gt_max = hn::Gt(text_vec, vec_max_ascii);
|
||||
const auto mask_lt_min = hn::Lt(text_vec, vec_min_ascii);
|
||||
const auto mask_backslash = hn::Eq(text_vec, vec_backslash);
|
||||
const auto mask_quote = hn::Eq(text_vec, vec_quote);
|
||||
|
||||
auto found_mask = !is_backtick ? hn::Or(
|
||||
hn::Or(mask_gt_max, mask_lt_min),
|
||||
hn::Or(mask_backslash, mask_quote))
|
||||
: hn::Or(
|
||||
hn::Or(
|
||||
hn::Or(mask_gt_max, mask_lt_min),
|
||||
hn::Or(mask_backslash, mask_quote)),
|
||||
hn::Eq(text_vec, vec_dollar));
|
||||
|
||||
const intptr_t pos = hn::FindFirstTrue(d, found_mask);
|
||||
if (pos >= 0) {
|
||||
return i + pos;
|
||||
}
|
||||
}
|
||||
|
||||
// Scalar check for the remainder
|
||||
for (; i < text_len; ++i) {
|
||||
const uint8_t char_ = text[i];
|
||||
if (char_ >= 127 || (char_ < 0x20 && char_ != 0x09) || char_ == '\\' || char_ == quote_char || (is_backtick && char_ == '$')) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
|
||||
return text_len; // No characters needing escape found
|
||||
}
|
||||
|
||||
size_t IndexOfNeedsEscapeForJavaScriptStringImplBacktick(const uint8_t* HWY_RESTRICT text, size_t text_len, uint8_t quote_char)
|
||||
{
|
||||
return IndexOfNeedsEscapeForJavaScriptStringImpl<true>(text, text_len, quote_char);
|
||||
}
|
||||
|
||||
size_t IndexOfNeedsEscapeForJavaScriptStringImplQuote(const uint8_t* HWY_RESTRICT text, size_t text_len, uint8_t quote_char)
|
||||
{
|
||||
return IndexOfNeedsEscapeForJavaScriptStringImpl<false>(text, text_len, quote_char);
|
||||
}
|
||||
|
||||
// Highway implementation of memmem
|
||||
// Returns a pointer to the first occurrence of `needle` in `haystack`,
|
||||
// or nullptr if not found. The return type is non-const `uint8_t*`
|
||||
// to match the standard C `memmem` signature, even though the input
|
||||
// is const. The caller should handle constness appropriately.
|
||||
uint8_t* MemMemImpl(const uint8_t* haystack, size_t haystack_len,
|
||||
const uint8_t* needle, size_t needle_len)
|
||||
{
|
||||
// --- Edge Cases ---
|
||||
if (HWY_UNLIKELY(needle_len == 0)) {
|
||||
return const_cast<uint8_t*>(haystack);
|
||||
}
|
||||
if (HWY_UNLIKELY(haystack_len < needle_len)) {
|
||||
return nullptr;
|
||||
}
|
||||
if (HWY_UNLIKELY(needle_len == 1)) {
|
||||
size_t index = IndexOfCharImpl(haystack, haystack_len, needle[0]);
|
||||
if (index != haystack_len) {
|
||||
return const_cast<uint8_t*>(haystack + index);
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// --- SIMD Setup ---
|
||||
const hn::ScalableTag<uint8_t> d;
|
||||
const size_t N = hn::Lanes(d);
|
||||
const uint8_t first_needle_char = needle[0];
|
||||
const hn::Vec<decltype(d)> v_first_needle = hn::Set(d, first_needle_char);
|
||||
const size_t last_possible_start = haystack_len - needle_len;
|
||||
|
||||
// --- SIMD Main Loop ---
|
||||
size_t i = 0;
|
||||
while (i + N <= haystack_len && i <= last_possible_start) {
|
||||
const hn::Vec<decltype(d)> haystack_vec = hn::LoadU(d, haystack + i);
|
||||
hn::Mask<decltype(d)> m_starts = hn::Eq(haystack_vec, v_first_needle);
|
||||
|
||||
// Iterate through potential matches within this vector chunk using FindFirstTrue
|
||||
while (!hn::AllFalse(d, m_starts)) {
|
||||
const intptr_t bit_idx_ptr = hn::FindFirstTrue(d, m_starts);
|
||||
// Loop condition guarantees FindFirstTrue finds something
|
||||
HWY_ASSERT(bit_idx_ptr >= 0);
|
||||
const size_t bit_idx = static_cast<size_t>(bit_idx_ptr);
|
||||
|
||||
const size_t potential_pos = i + bit_idx;
|
||||
|
||||
// Double-check bounds (essential if N > needle_len, and correct otherwise)
|
||||
if (potential_pos <= last_possible_start) {
|
||||
if (memcmp(haystack + potential_pos, needle, needle_len) == 0) {
|
||||
return const_cast<uint8_t*>(haystack + potential_pos);
|
||||
}
|
||||
} else {
|
||||
// Optimization: If the first match found in this chunk is already
|
||||
// beyond the last possible start, no subsequent match in this
|
||||
// chunk can be valid.
|
||||
goto remainder_check; // Exit both loops and proceed to scalar remainder
|
||||
}
|
||||
|
||||
// Clear the found bit to find the next one in the next iteration.
|
||||
// SetOnlyFirst creates a mask with only the first true bit set.
|
||||
// AndNot removes that bit from m_starts.
|
||||
const hn::Mask<decltype(d)> first_bit_mask = hn::SetOnlyFirst(m_starts);
|
||||
m_starts = hn::AndNot(first_bit_mask, m_starts);
|
||||
} // End while (!AllFalse)
|
||||
|
||||
i += N;
|
||||
} // End SIMD loop
|
||||
|
||||
remainder_check:
|
||||
// --- Scalar Remainder Loop ---
|
||||
// Check any remaining bytes that couldn't form a full vector load
|
||||
// or potential starts within the last vector load that weren't checked
|
||||
// because they were past last_possible_start.
|
||||
// Start `i` from where the SIMD loop *could* have last started a valid check.
|
||||
size_t remainder_start = (i >= N) ? (i - N) : 0;
|
||||
// Ensure we re-check any potential starts the SIMD loop might have skipped
|
||||
// due to the bounds check optimization or being in the final partial vector.
|
||||
for (; remainder_start <= last_possible_start; ++remainder_start) {
|
||||
// Optimization: Check first character before expensive memcmp
|
||||
if (haystack[remainder_start] == first_needle_char) {
|
||||
if (memcmp(haystack + remainder_start, needle, needle_len) == 0) {
|
||||
return const_cast<uint8_t*>(haystack + remainder_start);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nullptr; // Not found
|
||||
}
|
||||
|
||||
// Implementation for WebSocket mask application
|
||||
void FillWithSkipMaskImpl(const uint8_t* HWY_RESTRICT mask, size_t mask_len, uint8_t* HWY_RESTRICT output, const uint8_t* HWY_RESTRICT input, size_t length, bool skip_mask)
|
||||
{
|
||||
ASSERT(mask_len == 4);
|
||||
|
||||
ASSERT(length > 0);
|
||||
|
||||
// If we're skipping masking or there's no data, return early
|
||||
if (skip_mask) {
|
||||
std::memcpy(output, input, length);
|
||||
return;
|
||||
}
|
||||
|
||||
D8 d;
|
||||
const size_t N = hn::Lanes(d);
|
||||
|
||||
// Create a vector filled with the mask pattern repeating every 4 bytes
|
||||
alignas(HWY_ALIGNMENT) uint8_t mask_pattern[HWY_MAX_LANES_D(D8)] = {};
|
||||
for (size_t i = 0; i < HWY_MAX_LANES_D(D8); i += 4) {
|
||||
mask_pattern[i] = mask[0];
|
||||
mask_pattern[i + 1] = mask[1];
|
||||
mask_pattern[i + 2] = mask[2];
|
||||
mask_pattern[i + 3] = mask[3];
|
||||
}
|
||||
const auto mask_vec = hn::Load(d, mask_pattern);
|
||||
|
||||
// Process data in chunks of size N
|
||||
size_t i = 0;
|
||||
const size_t vector_length = length - (length % N);
|
||||
for (; i < vector_length; i += N) {
|
||||
// Load input data
|
||||
const auto input_vec = hn::LoadU(d, input + i);
|
||||
// XOR with mask
|
||||
const auto masked_vec = hn::Xor(input_vec, mask_vec);
|
||||
// Store result
|
||||
hn::StoreU(masked_vec, d, output + i);
|
||||
}
|
||||
|
||||
// Handle remaining bytes with scalar operations
|
||||
for (; i < length; ++i) {
|
||||
output[i] = input[i] ^ mask[i % 4];
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace HWY_NAMESPACE
|
||||
} // namespace bun
|
||||
HWY_AFTER_NAMESPACE();
|
||||
|
||||
// HWY_ONCE ensures this block is only included once,
|
||||
// in the final pass after all target-specific code is generated.
|
||||
#if HWY_ONCE
|
||||
|
||||
namespace bun {
|
||||
|
||||
// Define the dispatch tables. The names here must exactly match
|
||||
// the *Impl function names defined within the HWY_NAMESPACE block above.
|
||||
HWY_EXPORT(ContainsNewlineOrNonASCIIOrQuoteImpl);
|
||||
HWY_EXPORT(CopyU16ToU8Impl);
|
||||
HWY_EXPORT(FillWithSkipMaskImpl);
|
||||
HWY_EXPORT(IndexOfAnyCharImpl);
|
||||
HWY_EXPORT(IndexOfCharImpl);
|
||||
HWY_EXPORT(IndexOfInterestingCharacterInStringLiteralImpl);
|
||||
HWY_EXPORT(IndexOfNeedsEscapeForJavaScriptStringImplBacktick);
|
||||
HWY_EXPORT(IndexOfNeedsEscapeForJavaScriptStringImplQuote);
|
||||
HWY_EXPORT(IndexOfNewlineOrNonASCIIImpl);
|
||||
HWY_EXPORT(IndexOfNewlineOrNonASCIIOrHashOrAtImpl);
|
||||
HWY_EXPORT(IndexOfSpaceOrNewlineOrNonASCIIImpl);
|
||||
HWY_EXPORT(MemMemImpl);
|
||||
HWY_EXPORT(ScanCharFrequencyImpl);
|
||||
} // namespace bun
|
||||
|
||||
// Define the C-callable wrappers that use HWY_DYNAMIC_DISPATCH.
|
||||
// These need to be defined *after* the HWY_EXPORT block.
|
||||
extern "C" {
|
||||
|
||||
void* highway_memmem(const uint8_t* haystack, size_t haystack_len, const uint8_t* needle, size_t needle_len)
|
||||
{
|
||||
return HWY_DYNAMIC_DISPATCH(bun::MemMemImpl)(haystack, haystack_len, needle, needle_len);
|
||||
}
|
||||
|
||||
static void highway_copy_u16_to_u8_impl(
|
||||
const uint16_t* input,
|
||||
size_t count,
|
||||
uint8_t* output)
|
||||
{
|
||||
return HWY_DYNAMIC_DISPATCH(bun::CopyU16ToU8Impl)(input, count, output);
|
||||
}
|
||||
|
||||
void highway_copy_u16_to_u8(
|
||||
// No HWY_RESTRICT
|
||||
const uint16_t* input,
|
||||
|
||||
size_t count,
|
||||
// No HWY_RESTRICT
|
||||
uint8_t* output)
|
||||
{
|
||||
|
||||
if (count == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Check alignment of the input pointer
|
||||
if (!hwy::IsAligned(input, alignof(uint16_t))) {
|
||||
// Handle the first unaligned element scalar-ly
|
||||
output[0] = static_cast<uint8_t>(input[0]);
|
||||
|
||||
// Call the core implementation with adjusted pointers and count,
|
||||
// which are now guaranteed to be aligned or have count == 0.
|
||||
// The HWY_RESTRICT inside CopyU16ToU8Impl is now valid for the
|
||||
// ranges it operates on.
|
||||
if (count > 1)
|
||||
highway_copy_u16_to_u8_impl(input + 1, count - 1, output + 1);
|
||||
} else {
|
||||
// Input is already aligned, call the core implementation directly.
|
||||
highway_copy_u16_to_u8_impl(input, count, output);
|
||||
}
|
||||
}
|
||||
size_t highway_index_of_any_char(const uint8_t* HWY_RESTRICT text, size_t text_len, const uint8_t* HWY_RESTRICT chars, size_t chars_len)
|
||||
{
|
||||
return HWY_DYNAMIC_DISPATCH(bun::IndexOfAnyCharImpl)(text, text_len, chars, chars_len);
|
||||
}
|
||||
|
||||
void highway_char_frequency(const uint8_t* HWY_RESTRICT text, size_t text_len,
|
||||
int32_t* freqs, int32_t delta)
|
||||
{
|
||||
HWY_DYNAMIC_DISPATCH(bun::ScanCharFrequencyImpl)(text, text_len, freqs, delta);
|
||||
}
|
||||
|
||||
size_t highway_index_of_char(const uint8_t* HWY_RESTRICT haystack, size_t haystack_len,
|
||||
uint8_t needle)
|
||||
{
|
||||
return HWY_DYNAMIC_DISPATCH(bun::IndexOfCharImpl)(haystack, haystack_len, needle);
|
||||
}
|
||||
|
||||
size_t highway_index_of_interesting_character_in_string_literal(const uint8_t* HWY_RESTRICT text, size_t text_len, uint8_t quote)
|
||||
{
|
||||
return HWY_DYNAMIC_DISPATCH(bun::IndexOfInterestingCharacterInStringLiteralImpl)(text, text_len, quote);
|
||||
}
|
||||
|
||||
size_t highway_index_of_newline_or_non_ascii(const uint8_t* HWY_RESTRICT haystack, size_t haystack_len)
|
||||
{
|
||||
return HWY_DYNAMIC_DISPATCH(bun::IndexOfNewlineOrNonASCIIImpl)(haystack, haystack_len);
|
||||
}
|
||||
|
||||
size_t highway_index_of_newline_or_non_ascii_or_hash_or_at(const uint8_t* HWY_RESTRICT haystack, size_t haystack_len)
|
||||
{
|
||||
return HWY_DYNAMIC_DISPATCH(bun::IndexOfNewlineOrNonASCIIOrHashOrAtImpl)(haystack, haystack_len);
|
||||
}
|
||||
|
||||
bool highway_contains_newline_or_non_ascii_or_quote(const uint8_t* HWY_RESTRICT text, size_t text_len)
|
||||
{
|
||||
return HWY_DYNAMIC_DISPATCH(bun::ContainsNewlineOrNonASCIIOrQuoteImpl)(text, text_len);
|
||||
}
|
||||
|
||||
size_t highway_index_of_needs_escape_for_javascript_string(const uint8_t* HWY_RESTRICT text, size_t text_len, uint8_t quote_char)
|
||||
{
|
||||
if (quote_char == '`') {
|
||||
return HWY_DYNAMIC_DISPATCH(bun::IndexOfNeedsEscapeForJavaScriptStringImplBacktick)(text, text_len, quote_char);
|
||||
} else {
|
||||
return HWY_DYNAMIC_DISPATCH(bun::IndexOfNeedsEscapeForJavaScriptStringImplQuote)(text, text_len, quote_char);
|
||||
}
|
||||
}
|
||||
|
||||
size_t highway_index_of_space_or_newline_or_non_ascii(const uint8_t* HWY_RESTRICT text, size_t text_len)
|
||||
{
|
||||
return HWY_DYNAMIC_DISPATCH(bun::IndexOfSpaceOrNewlineOrNonASCIIImpl)(text, text_len);
|
||||
}
|
||||
|
||||
void highway_fill_with_skip_mask(
|
||||
const uint8_t* mask, // 4-byte mask array
|
||||
size_t mask_len, // Should be 4
|
||||
uint8_t* output, // Output buffer
|
||||
const uint8_t* input, // Input buffer
|
||||
size_t length, // Length of input/output
|
||||
bool skip_mask) // Whether to skip masking
|
||||
{
|
||||
HWY_DYNAMIC_DISPATCH(bun::FillWithSkipMaskImpl)(mask, mask_len, output, input, length, skip_mask);
|
||||
}
|
||||
|
||||
} // extern "C"
|
||||
|
||||
#if OS(DARWIN)
|
||||
// On macOS, override the libc memmem with our implementation
|
||||
// This uses inline assembly to ensure the symbol is exported with the correct name
|
||||
__asm__(".globl _memmem");
|
||||
__asm__(".set _memmem, _highway_memmem");
|
||||
#elif OS(LINUX)
|
||||
// On Linux, override the libc memmem with our implementation
|
||||
// This uses the GNU-specific attribute to alias our function to the libc symbol
|
||||
// The alias will be visible across the entire program, not just this file
|
||||
extern "C" {
|
||||
// Using both "default" visibility and "weak" ensures our implementation is used
|
||||
// throughout the entire program when linked, not just in this object file
|
||||
__attribute__((visibility("default"), weak, used)) void* memmem(const void* haystack, size_t haystacklen, const void* needle, size_t needlelen)
|
||||
__attribute__((alias("highway_memmem")));
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#endif // HWY_ONCE
|
||||
@@ -355,7 +355,7 @@ pub const Encoder = struct {
|
||||
|
||||
switch (comptime encoding) {
|
||||
.utf8 => {
|
||||
return strings.elementLengthLatin1IntoUTF8([]const u8, input[0..len]);
|
||||
return strings.elementLengthLatin1IntoUTF8(input[0..len]);
|
||||
},
|
||||
|
||||
.latin1, .ascii, .buffer => {
|
||||
@@ -395,7 +395,7 @@ pub const Encoder = struct {
|
||||
},
|
||||
.latin1, .ascii, .buffer => {
|
||||
const out = @min(len, to_len);
|
||||
strings.copyU16IntoU8(to[0..to_len], []const u16, input[0..out]);
|
||||
strings.copyU16IntoU8(to[0..to_len], input[0..out]);
|
||||
return out;
|
||||
},
|
||||
// string is already encoded, just need to copy the data
|
||||
@@ -404,7 +404,7 @@ pub const Encoder = struct {
|
||||
const bytes_input_len = len * 2;
|
||||
const written = @min(bytes_input_len, to_len);
|
||||
const input_u8 = @as([*]const u8, @ptrCast(input));
|
||||
strings.copyU16IntoU8(to[0..written], []const u8, input_u8[0..written]);
|
||||
bun.memmove(to[0..written], input_u8[0..written]);
|
||||
return written;
|
||||
} else {
|
||||
const bytes_input_len = len * 2;
|
||||
@@ -413,7 +413,7 @@ pub const Encoder = struct {
|
||||
|
||||
const fixed_len = (written / 2) * 2;
|
||||
const input_u8 = @as([*]const u8, @ptrCast(input));
|
||||
strings.copyU16IntoU8(to[0..written], []const u8, input_u8[0..fixed_len]);
|
||||
bun.memmove(to[0..written], input_u8[0..fixed_len]);
|
||||
return fixed_len;
|
||||
}
|
||||
},
|
||||
@@ -503,7 +503,7 @@ pub const Encoder = struct {
|
||||
},
|
||||
.latin1, .buffer, .ascii => {
|
||||
var to = allocator.alloc(u8, len) catch return &[_]u8{};
|
||||
strings.copyU16IntoU8(to[0..len], []const u16, input[0..len]);
|
||||
strings.copyU16IntoU8(to[0..len], input[0..len]);
|
||||
return to;
|
||||
},
|
||||
// string is already encoded, just need to copy the data
|
||||
|
||||
@@ -3595,3 +3595,4 @@ pub fn freeSensitive(allocator: std.mem.Allocator, slice: anytype) void {
|
||||
pub const server = @import("./bun.js/api/server.zig");
|
||||
pub const macho = @import("./macho.zig");
|
||||
pub const valkey = @import("./valkey/index.zig");
|
||||
pub const highway = @import("./highway.zig");
|
||||
|
||||
@@ -2986,7 +2986,7 @@ pub fn parse_attribute_selector(comptime Impl: type, parser: *SelectorParser, in
|
||||
};
|
||||
const never_matches = switch (operator) {
|
||||
.equal, .dash_match => false,
|
||||
.includes => value_str.len == 0 or std.mem.indexOfAny(u8, value_str, SELECTOR_WHITESPACE) != null,
|
||||
.includes => value_str.len == 0 or bun.strings.indexOfAny(value_str, SELECTOR_WHITESPACE) != null,
|
||||
.prefix, .substring, .suffix => value_str.len == 0,
|
||||
};
|
||||
|
||||
|
||||
@@ -1422,43 +1422,10 @@ pub fn GlobWalker_(
|
||||
return filepath.len > 0 and filepath[0] == '.';
|
||||
}
|
||||
|
||||
const syntax_tokens = "*[{?!";
|
||||
|
||||
fn checkSpecialSyntax(pattern: []const u8) bool {
|
||||
if (pattern.len < 16) {
|
||||
for (pattern[0..]) |c| {
|
||||
switch (c) {
|
||||
'*', '[', '{', '?', '!' => return true,
|
||||
else => {},
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
const syntax_tokens = comptime [_]u8{ '*', '[', '{', '?', '!' };
|
||||
const needles: [syntax_tokens.len]@Vector(16, u8) = comptime needles: {
|
||||
var needles: [syntax_tokens.len]@Vector(16, u8) = undefined;
|
||||
for (syntax_tokens, 0..) |tok, i| {
|
||||
needles[i] = @splat(tok);
|
||||
}
|
||||
break :needles needles;
|
||||
};
|
||||
|
||||
var i: usize = 0;
|
||||
while (i + 16 <= pattern.len) : (i += 16) {
|
||||
const haystack: @Vector(16, u8) = pattern[i..][0..16].*;
|
||||
inline for (needles) |needle| {
|
||||
if (std.simd.firstTrue(needle == haystack) != null) return true;
|
||||
}
|
||||
}
|
||||
|
||||
if (i < pattern.len) {
|
||||
for (pattern[i..]) |c| {
|
||||
inline for (syntax_tokens) |tok| {
|
||||
if (c == tok) return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
return bun.strings.indexOfAny(pattern, syntax_tokens) != null;
|
||||
}
|
||||
|
||||
fn makeComponent(
|
||||
|
||||
305
src/highway.zig
Normal file
305
src/highway.zig
Normal file
@@ -0,0 +1,305 @@
|
||||
const std = @import("std");
|
||||
const bun = @import("bun");
|
||||
const strings = bun.strings;
|
||||
const string = bun.string;
|
||||
const Environment = bun.Environment;
|
||||
|
||||
extern "c" fn highway_char_frequency(
|
||||
text: [*]const u8,
|
||||
text_len: usize,
|
||||
freqs: [*]i32,
|
||||
delta: i32,
|
||||
) void;
|
||||
|
||||
extern "c" fn highway_index_of_char(
|
||||
haystack: [*]const u8,
|
||||
haystack_len: usize,
|
||||
needle: u8,
|
||||
) usize;
|
||||
|
||||
extern "c" fn highway_index_of_interesting_character_in_string_literal(
|
||||
noalias text: [*]const u8,
|
||||
text_len: usize,
|
||||
quote: u8,
|
||||
) usize;
|
||||
|
||||
extern "c" fn highway_index_of_newline_or_non_ascii(
|
||||
noalias haystack: [*]const u8,
|
||||
haystack_len: usize,
|
||||
) usize;
|
||||
|
||||
extern "c" fn highway_index_of_newline_or_non_ascii_or_ansi(
|
||||
noalias haystack: [*]const u8,
|
||||
haystack_len: usize,
|
||||
) usize;
|
||||
|
||||
extern "c" fn highway_index_of_newline_or_non_ascii_or_hash_or_at(
|
||||
noalias haystack: [*]const u8,
|
||||
haystack_len: usize,
|
||||
) usize;
|
||||
|
||||
extern "c" fn highway_index_of_space_or_newline_or_non_ascii(
|
||||
noalias haystack: [*]const u8,
|
||||
haystack_len: usize,
|
||||
) usize;
|
||||
|
||||
extern "c" fn highway_contains_newline_or_non_ascii_or_quote(
|
||||
noalias text: [*]const u8,
|
||||
text_len: usize,
|
||||
) bool;
|
||||
|
||||
extern "c" fn highway_index_of_needs_escape_for_javascript_string(
|
||||
noalias text: [*]const u8,
|
||||
text_len: usize,
|
||||
quote_char: u8,
|
||||
) usize;
|
||||
|
||||
extern "c" fn highway_index_of_any_char(
|
||||
noalias text: [*]const u8,
|
||||
text_len: usize,
|
||||
noalias chars: [*]const u8,
|
||||
chars_len: usize,
|
||||
) usize;
|
||||
|
||||
extern "c" fn highway_fill_with_skip_mask(
|
||||
mask: [*]const u8,
|
||||
mask_len: usize,
|
||||
output: [*]u8,
|
||||
input: [*]const u8,
|
||||
length: usize,
|
||||
skip_mask: bool,
|
||||
) void;
|
||||
|
||||
/// Count frequencies of [a-zA-Z0-9_$] characters in a string
|
||||
/// Updates the provided frequency array with counts (adds delta for each occurrence)
|
||||
pub fn scanCharFrequency(text: string, freqs: *[64]i32, delta: i32) void {
|
||||
if (text.len == 0 or delta == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
highway_char_frequency(
|
||||
text.ptr,
|
||||
text.len,
|
||||
freqs.ptr,
|
||||
delta,
|
||||
);
|
||||
}
|
||||
|
||||
pub fn indexOfChar(haystack: string, needle: u8) ?usize {
|
||||
if (haystack.len == 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const result = highway_index_of_char(
|
||||
haystack.ptr,
|
||||
haystack.len,
|
||||
needle,
|
||||
);
|
||||
|
||||
if (result == haystack.len) {
|
||||
return null;
|
||||
}
|
||||
|
||||
bun.debugAssert(haystack[result] == needle);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
pub fn indexOfInterestingCharacterInStringLiteral(slice: string, quote_type: u8) ?usize {
|
||||
if (slice.len == 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const result = highway_index_of_interesting_character_in_string_literal(
|
||||
slice.ptr,
|
||||
slice.len,
|
||||
quote_type,
|
||||
);
|
||||
|
||||
if (result == slice.len) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
pub fn indexOfNewlineOrNonASCII(haystack: string) ?usize {
|
||||
bun.debugAssert(haystack.len > 0);
|
||||
|
||||
const result = highway_index_of_newline_or_non_ascii(
|
||||
haystack.ptr,
|
||||
haystack.len,
|
||||
);
|
||||
|
||||
if (result == haystack.len) {
|
||||
return null;
|
||||
}
|
||||
if (comptime Environment.isDebug) {
|
||||
const haystack_char = haystack[result];
|
||||
if (!(haystack_char > 127 or haystack_char < 0x20 or haystack_char == '\r' or haystack_char == '\n')) {
|
||||
@panic("Invalid character found in indexOfNewlineOrNonASCII");
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
pub fn indexOfNewlineOrNonASCIIOrANSI(haystack: string) ?usize {
|
||||
bun.debugAssert(haystack.len > 0);
|
||||
|
||||
const result = highway_index_of_newline_or_non_ascii_or_ansi(
|
||||
haystack.ptr,
|
||||
haystack.len,
|
||||
);
|
||||
|
||||
if (result == haystack.len) {
|
||||
return null;
|
||||
}
|
||||
if (comptime Environment.isDebug) {
|
||||
const haystack_char = haystack[result];
|
||||
if (!(haystack_char > 127 or haystack_char < 0x20 or haystack_char == '\r' or haystack_char == '\n')) {
|
||||
@panic("Invalid character found in indexOfNewlineOrNonASCIIOrANSI");
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/// Checks if the string contains any newlines, non-ASCII characters, or quotes
|
||||
pub fn containsNewlineOrNonASCIIOrQuote(text: string) bool {
|
||||
if (text.len == 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return highway_contains_newline_or_non_ascii_or_quote(
|
||||
text.ptr,
|
||||
text.len,
|
||||
);
|
||||
}
|
||||
|
||||
/// Finds the first character that needs escaping in a JavaScript string
|
||||
/// Looks for characters above ASCII (> 127), control characters (< 0x20),
|
||||
/// backslash characters (`\`), the quote character itself, and for backtick
|
||||
/// strings also the dollar sign (`$`)
|
||||
pub fn indexOfNeedsEscapeForJavaScriptString(slice: string, quote_char: u8) ?u32 {
|
||||
if (slice.len == 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const result = highway_index_of_needs_escape_for_javascript_string(
|
||||
slice.ptr,
|
||||
slice.len,
|
||||
quote_char,
|
||||
);
|
||||
|
||||
if (result == slice.len) {
|
||||
return null;
|
||||
}
|
||||
|
||||
if (comptime Environment.isDebug) {
|
||||
const haystack_char = slice[result];
|
||||
if (!(haystack_char > 127 or haystack_char < 0x20 or haystack_char == '\\' or haystack_char == quote_char or haystack_char == '$' or haystack_char == '\r' or haystack_char == '\n')) {
|
||||
@panic("Invalid character found in indexOfNeedsEscapeForJavaScriptString");
|
||||
}
|
||||
}
|
||||
|
||||
return @truncate(result);
|
||||
}
|
||||
|
||||
pub fn indexOfAnyChar(haystack: string, chars: string) ?usize {
|
||||
if (haystack.len == 0 or chars.len == 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const result = highway_index_of_any_char(haystack.ptr, haystack.len, chars.ptr, chars.len);
|
||||
|
||||
if (result == haystack.len) {
|
||||
return null;
|
||||
}
|
||||
|
||||
if (comptime Environment.isDebug) {
|
||||
const haystack_char = haystack[result];
|
||||
var found = false;
|
||||
for (chars) |c| {
|
||||
if (c == haystack_char) {
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!found) {
|
||||
@panic("Invalid character found in indexOfAnyChar");
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
extern "c" fn highway_copy_u16_to_u8(
|
||||
input: [*]align(1) const u16,
|
||||
count: usize,
|
||||
output: [*]u8,
|
||||
) void;
|
||||
|
||||
pub fn copyU16ToU8(input: []align(1) const u16, output: []u8) void {
|
||||
highway_copy_u16_to_u8(input.ptr, input.len, output.ptr);
|
||||
}
|
||||
|
||||
/// Apply a WebSocket mask to data using SIMD acceleration
|
||||
/// If skip_mask is true, data is copied without masking
|
||||
pub fn fillWithSkipMask(mask: [4]u8, output: []u8, input: []const u8, skip_mask: bool) void {
|
||||
if (input.len == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
highway_fill_with_skip_mask(
|
||||
&mask,
|
||||
4,
|
||||
output.ptr,
|
||||
input.ptr,
|
||||
input.len,
|
||||
skip_mask,
|
||||
);
|
||||
}
|
||||
|
||||
/// Useful for single-line JavaScript comments.
|
||||
/// Scans for:
|
||||
/// - `\n`, `\r`
|
||||
/// - Non-ASCII characters (which implicitly include `\n`, `\r`)
|
||||
/// - `#`
|
||||
/// - `@`
|
||||
pub fn indexOfNewlineOrNonASCIIOrHashOrAt(haystack: string) ?usize {
|
||||
if (haystack.len == 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const result = highway_index_of_newline_or_non_ascii_or_hash_or_at(
|
||||
haystack.ptr,
|
||||
haystack.len,
|
||||
);
|
||||
|
||||
if (result == haystack.len) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/// Scans for:
|
||||
/// - " "
|
||||
/// - Non-ASCII characters (which implicitly include `\n`, `\r`, '\t')
|
||||
pub fn indexOfSpaceOrNewlineOrNonASCII(haystack: string) ?usize {
|
||||
if (haystack.len == 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const result = highway_index_of_space_or_newline_or_non_ascii(
|
||||
haystack.ptr,
|
||||
haystack.len,
|
||||
);
|
||||
|
||||
if (result == haystack.len) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
@@ -731,80 +731,17 @@ pub const Mask = struct {
|
||||
const mask = mask_buf.*;
|
||||
|
||||
const skip_mask = @as(u32, @bitCast(mask)) == 0;
|
||||
if (!skip_mask) {
|
||||
fillWithSkipMask(mask, output_, input_, false);
|
||||
} else {
|
||||
fillWithSkipMask(mask, output_, input_, true);
|
||||
}
|
||||
fillWithSkipMask(mask, output_, input_, skip_mask);
|
||||
}
|
||||
|
||||
fn fillWithSkipMask(mask: [4]u8, output_: []u8, input_: []const u8, comptime skip_mask: bool) void {
|
||||
var input = input_;
|
||||
var output = output_;
|
||||
|
||||
if (comptime Environment.enableSIMD) {
|
||||
if (input.len >= strings.ascii_vector_size) {
|
||||
const vec: strings.AsciiVector = brk: {
|
||||
var in: [strings.ascii_vector_size]u8 = undefined;
|
||||
comptime var i: usize = 0;
|
||||
inline while (i < strings.ascii_vector_size) : (i += 4) {
|
||||
in[i..][0..4].* = mask;
|
||||
}
|
||||
break :brk @as(strings.AsciiVector, in);
|
||||
};
|
||||
const end_ptr_wrapped_to_last_16 = input.ptr + input.len - (input.len % strings.ascii_vector_size);
|
||||
|
||||
if (comptime skip_mask) {
|
||||
while (input.ptr != end_ptr_wrapped_to_last_16) {
|
||||
const input_vec: strings.AsciiVector = @as(strings.AsciiVector, input[0..strings.ascii_vector_size].*);
|
||||
output.ptr[0..strings.ascii_vector_size].* = input_vec;
|
||||
output = output[strings.ascii_vector_size..];
|
||||
input = input[strings.ascii_vector_size..];
|
||||
}
|
||||
} else {
|
||||
while (input.ptr != end_ptr_wrapped_to_last_16) {
|
||||
const input_vec: strings.AsciiVector = @as(strings.AsciiVector, input[0..strings.ascii_vector_size].*);
|
||||
output.ptr[0..strings.ascii_vector_size].* = input_vec ^ vec;
|
||||
output = output[strings.ascii_vector_size..];
|
||||
input = input[strings.ascii_vector_size..];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// hint to the compiler not to vectorize the next loop
|
||||
bun.assert(input.len < strings.ascii_vector_size);
|
||||
}
|
||||
|
||||
if (comptime !skip_mask) {
|
||||
while (input.len >= 4) {
|
||||
const input_vec: [4]u8 = input[0..4].*;
|
||||
output.ptr[0..4].* = [4]u8{
|
||||
input_vec[0] ^ mask[0],
|
||||
input_vec[1] ^ mask[1],
|
||||
input_vec[2] ^ mask[2],
|
||||
input_vec[3] ^ mask[3],
|
||||
};
|
||||
output = output[4..];
|
||||
input = input[4..];
|
||||
}
|
||||
} else {
|
||||
while (input.len >= 4) {
|
||||
const input_vec: [4]u8 = input[0..4].*;
|
||||
output.ptr[0..4].* = input_vec;
|
||||
output = output[4..];
|
||||
input = input[4..];
|
||||
}
|
||||
}
|
||||
|
||||
if (comptime !skip_mask) {
|
||||
for (input, 0..) |c, i| {
|
||||
output[i] = c ^ mask[i % 4];
|
||||
}
|
||||
} else {
|
||||
for (input, 0..) |c, i| {
|
||||
output[i] = c;
|
||||
}
|
||||
fn fillWithSkipMask(mask: [4]u8, output_: []u8, input_: []const u8, skip_mask: bool) void {
|
||||
const input = input_;
|
||||
const output = output_;
|
||||
if (input.len == 0) {
|
||||
@branchHint(.unlikely);
|
||||
return;
|
||||
}
|
||||
return bun.highway.fillWithSkipMask(mask, output, input, skip_mask);
|
||||
}
|
||||
};
|
||||
|
||||
@@ -902,7 +839,7 @@ const Copy = union(enum) {
|
||||
return WebsocketHeader.frameSizeIncludingMask(byte_len.*);
|
||||
},
|
||||
.latin1 => {
|
||||
byte_len.* = strings.elementLengthLatin1IntoUTF8([]const u8, this.latin1);
|
||||
byte_len.* = strings.elementLengthLatin1IntoUTF8(this.latin1);
|
||||
return WebsocketHeader.frameSizeIncludingMask(byte_len.*);
|
||||
},
|
||||
.bytes => {
|
||||
|
||||
310
src/js_lexer.zig
310
src/js_lexer.zig
@@ -806,6 +806,10 @@ fn NewLexer_(
|
||||
return if (!(cp_len + it.current > it.source.contents.len)) it.source.contents[it.current .. cp_len + it.current] else "";
|
||||
}
|
||||
|
||||
fn remaining(it: *const LexerType) []const u8 {
|
||||
return it.source.contents[it.current..];
|
||||
}
|
||||
|
||||
inline fn nextCodepoint(it: *LexerType) CodePoint {
|
||||
if (it.current >= it.source.contents.len) {
|
||||
it.end = it.source.contents.len;
|
||||
@@ -1498,26 +1502,14 @@ fn NewLexer_(
|
||||
lexer.token = .t_slash_equals;
|
||||
},
|
||||
'/' => {
|
||||
singleLineComment: while (true) {
|
||||
lexer.step();
|
||||
switch (lexer.code_point) {
|
||||
'\r', '\n', 0x2028, 0x2029 => {
|
||||
break :singleLineComment;
|
||||
},
|
||||
-1 => {
|
||||
break :singleLineComment;
|
||||
},
|
||||
else => {},
|
||||
}
|
||||
}
|
||||
|
||||
lexer.scanSingleLineComment();
|
||||
if (comptime is_json) {
|
||||
if (!json.allow_comments) {
|
||||
try lexer.addRangeError(lexer.range(), "JSON does not support comments", .{}, true);
|
||||
return;
|
||||
}
|
||||
}
|
||||
lexer.scanCommentText();
|
||||
lexer.scanCommentText(false);
|
||||
continue;
|
||||
},
|
||||
'*' => {
|
||||
@@ -1571,7 +1563,7 @@ fn NewLexer_(
|
||||
return;
|
||||
}
|
||||
}
|
||||
lexer.scanCommentText();
|
||||
lexer.scanCommentText(true);
|
||||
continue;
|
||||
},
|
||||
else => {
|
||||
@@ -1890,7 +1882,7 @@ fn NewLexer_(
|
||||
}
|
||||
}
|
||||
|
||||
fn scanCommentText(lexer: *LexerType) void {
|
||||
fn scanCommentText(lexer: *LexerType, for_pragma: bool) void {
|
||||
const text = lexer.source.contents[lexer.start..lexer.end];
|
||||
const has_legal_annotation = text.len > 2 and text[2] == '!';
|
||||
const is_multiline_comment = text.len > 1 and text[1] == '*';
|
||||
@@ -1922,120 +1914,132 @@ fn NewLexer_(
|
||||
if (comptime is_json)
|
||||
return;
|
||||
|
||||
var rest = text[0..end_comment_text];
|
||||
const end = rest.ptr + rest.len;
|
||||
|
||||
if (comptime Environment.enableSIMD) {
|
||||
const wrapped_len = rest.len - (rest.len % strings.ascii_vector_size);
|
||||
const comment_end = rest.ptr + wrapped_len;
|
||||
while (rest.ptr != comment_end) {
|
||||
const vec: strings.AsciiVector = rest.ptr[0..strings.ascii_vector_size].*;
|
||||
|
||||
// lookahead for any # or @ characters
|
||||
const hashtag = @as(strings.AsciiVectorU1, @bitCast(vec == @as(strings.AsciiVector, @splat(@as(u8, '#')))));
|
||||
const at = @as(strings.AsciiVectorU1, @bitCast(vec == @as(strings.AsciiVector, @splat(@as(u8, '@')))));
|
||||
|
||||
if (@reduce(.Max, hashtag + at) == 1) {
|
||||
rest.len = @intFromPtr(end) - @intFromPtr(rest.ptr);
|
||||
if (comptime Environment.allow_assert) {
|
||||
bun.assert(
|
||||
strings.containsChar(&@as([strings.ascii_vector_size]u8, vec), '#') or
|
||||
strings.containsChar(&@as([strings.ascii_vector_size]u8, vec), '@'),
|
||||
);
|
||||
}
|
||||
|
||||
for (@as([strings.ascii_vector_size]u8, vec), 0..) |c, i| {
|
||||
switch (c) {
|
||||
'@', '#' => {
|
||||
const chunk = rest[i + 1 ..];
|
||||
if (!lexer.has_pure_comment_before) {
|
||||
if (strings.hasPrefixWithWordBoundary(chunk, "__PURE__")) {
|
||||
lexer.has_pure_comment_before = true;
|
||||
continue;
|
||||
}
|
||||
// TODO: implement NO_SIDE_EFFECTS
|
||||
// else if (strings.hasPrefixWithWordBoundary(chunk, "__NO_SIDE_EFFECTS__")) {
|
||||
// lexer.has_no_side_effect_comment_before = true;
|
||||
// continue;
|
||||
// }
|
||||
}
|
||||
|
||||
if (strings.hasPrefixWithWordBoundary(chunk, "jsx")) {
|
||||
if (PragmaArg.scan(.skip_space_first, lexer.start + i + 1, "jsx", chunk)) |span| {
|
||||
lexer.jsx_pragma._jsx = span;
|
||||
}
|
||||
} else if (strings.hasPrefixWithWordBoundary(chunk, "jsxFrag")) {
|
||||
if (PragmaArg.scan(.skip_space_first, lexer.start + i + 1, "jsxFrag", chunk)) |span| {
|
||||
lexer.jsx_pragma._jsxFrag = span;
|
||||
}
|
||||
} else if (strings.hasPrefixWithWordBoundary(chunk, "jsxRuntime")) {
|
||||
if (PragmaArg.scan(.skip_space_first, lexer.start + i + 1, "jsxRuntime", chunk)) |span| {
|
||||
lexer.jsx_pragma._jsxRuntime = span;
|
||||
}
|
||||
} else if (strings.hasPrefixWithWordBoundary(chunk, "jsxImportSource")) {
|
||||
if (PragmaArg.scan(.skip_space_first, lexer.start + i + 1, "jsxImportSource", chunk)) |span| {
|
||||
lexer.jsx_pragma._jsxImportSource = span;
|
||||
}
|
||||
} else if (i == 2 and strings.hasPrefixComptime(chunk, " sourceMappingURL=")) {
|
||||
if (PragmaArg.scan(.no_space_first, lexer.start + i + 1, " sourceMappingURL=", chunk)) |span| {
|
||||
lexer.source_mapping_url = span;
|
||||
}
|
||||
}
|
||||
},
|
||||
else => {},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
rest.ptr += strings.ascii_vector_size;
|
||||
}
|
||||
rest.len = @intFromPtr(end) - @intFromPtr(rest.ptr);
|
||||
if (!for_pragma) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (comptime Environment.allow_assert)
|
||||
bun.assert(rest.len == 0 or bun.isSliceInBuffer(rest, text));
|
||||
var rest = text[0..end_comment_text];
|
||||
|
||||
while (rest.len > 0) {
|
||||
const c = rest[0];
|
||||
rest = rest[1..];
|
||||
while (strings.indexOfAny(rest, "@#")) |i| {
|
||||
const c = rest[i];
|
||||
rest = rest[@min(i + 1, rest.len)..];
|
||||
switch (c) {
|
||||
'@', '#' => {
|
||||
const chunk = rest;
|
||||
const i = @intFromPtr(chunk.ptr) - @intFromPtr(text.ptr);
|
||||
if (!lexer.has_pure_comment_before) {
|
||||
if (strings.hasPrefixWithWordBoundary(chunk, "__PURE__")) {
|
||||
lexer.has_pure_comment_before = true;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
const offset = lexer.scanPragma(lexer.start + i + (text.len - rest.len), chunk, false);
|
||||
|
||||
if (strings.hasPrefixWithWordBoundary(chunk, "jsx")) {
|
||||
if (PragmaArg.scan(.skip_space_first, lexer.start + i + 1, "jsx", chunk)) |span| {
|
||||
lexer.jsx_pragma._jsx = span;
|
||||
}
|
||||
} else if (strings.hasPrefixWithWordBoundary(chunk, "jsxFrag")) {
|
||||
if (PragmaArg.scan(.skip_space_first, lexer.start + i + 1, "jsxFrag", chunk)) |span| {
|
||||
lexer.jsx_pragma._jsxFrag = span;
|
||||
}
|
||||
} else if (strings.hasPrefixWithWordBoundary(chunk, "jsxRuntime")) {
|
||||
if (PragmaArg.scan(.skip_space_first, lexer.start + i + 1, "jsxRuntime", chunk)) |span| {
|
||||
lexer.jsx_pragma._jsxRuntime = span;
|
||||
}
|
||||
} else if (strings.hasPrefixWithWordBoundary(chunk, "jsxImportSource")) {
|
||||
if (PragmaArg.scan(.skip_space_first, lexer.start + i + 1, "jsxImportSource", chunk)) |span| {
|
||||
lexer.jsx_pragma._jsxImportSource = span;
|
||||
}
|
||||
} else if (i == 2 and strings.hasPrefixComptime(chunk, " sourceMappingURL=")) {
|
||||
if (PragmaArg.scan(.no_space_first, lexer.start + i + 1, " sourceMappingURL=", chunk)) |span| {
|
||||
lexer.source_mapping_url = span;
|
||||
}
|
||||
}
|
||||
rest = rest[
|
||||
// The @min is necessary because the file could end
|
||||
// with a pragma and hasPrefixWithWordBoundary
|
||||
// returns true when that "word boundary" is EOF
|
||||
@min(offset, rest.len)..];
|
||||
},
|
||||
else => {},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// This scans a "// comment" in a single pass over the input.
|
||||
fn scanSingleLineComment(lexer: *LexerType) void {
|
||||
while (true) {
|
||||
// Find index of newline (ASCII/Unicode), non-ASCII, '#', or '@'.
|
||||
if (bun.highway.indexOfNewlineOrNonASCIIOrHashOrAt(lexer.remaining())) |relative_index| {
|
||||
const absolute_index = lexer.current + relative_index;
|
||||
lexer.current = absolute_index; // Move TO the interesting char
|
||||
|
||||
lexer.step(); // Consume the interesting char, sets code_point, advances current
|
||||
|
||||
switch (lexer.code_point) {
|
||||
'\r', '\n', 0x2028, 0x2029 => { // Is it a line terminator?
|
||||
// Found the end of the comment line.
|
||||
return; // Stop scanning. Lexer state is ready for the next token.
|
||||
},
|
||||
-1 => {
|
||||
return;
|
||||
}, // EOF? Stop.
|
||||
|
||||
'#', '@' => {
|
||||
if (comptime !is_json) {
|
||||
const pragma_trigger_pos = lexer.end; // Position OF #/@
|
||||
// Use remaining() which starts *after* the consumed #/@
|
||||
const chunk = lexer.remaining();
|
||||
|
||||
const offset = lexer.scanPragma(pragma_trigger_pos, chunk, true);
|
||||
|
||||
if (offset > 0) {
|
||||
// Pragma found (e.g., __PURE__).
|
||||
// Advance current past the pragma's argument text.
|
||||
// 'current' is already after the #/@ trigger.
|
||||
lexer.current += offset;
|
||||
// Do NOT consume the character immediately after the pragma.
|
||||
// Let the main loop find the actual line terminator.
|
||||
|
||||
// Continue the outer loop from the position AFTER the pragma arg.
|
||||
continue;
|
||||
}
|
||||
// If offset == 0, it wasn't a valid pragma start.
|
||||
}
|
||||
// Not a pragma or is_json. Treat #/@ as a normal comment character.
|
||||
// The character was consumed by step(). Let the outer loop continue.
|
||||
continue;
|
||||
},
|
||||
else => {
|
||||
// Non-ASCII (but not LS/PS), etc. Treat as normal comment char.
|
||||
// The character was consumed by step(). Let the outer loop continue.
|
||||
continue;
|
||||
},
|
||||
}
|
||||
} else { // Highway found nothing until EOF
|
||||
// Consume the rest of the line.
|
||||
lexer.end = lexer.source.contents.len;
|
||||
lexer.current = lexer.source.contents.len;
|
||||
lexer.code_point = -1; // Set EOF state
|
||||
return;
|
||||
}
|
||||
}
|
||||
unreachable;
|
||||
}
|
||||
/// Scans the string for a pragma.
|
||||
/// offset is used when there's an issue with the JSX pragma later on.
|
||||
/// Returns the byte length to advance by if found, otherwise 0.
|
||||
fn scanPragma(lexer: *LexerType, offset_for_errors: usize, chunk: string, allow_newline: bool) usize {
|
||||
if (!lexer.has_pure_comment_before) {
|
||||
if (strings.hasPrefixWithWordBoundary(chunk, "__PURE__")) {
|
||||
lexer.has_pure_comment_before = true;
|
||||
return "__PURE__".len;
|
||||
}
|
||||
}
|
||||
|
||||
if (strings.hasPrefixWithWordBoundary(chunk, "jsx")) {
|
||||
if (PragmaArg.scan(.skip_space_first, lexer.start + offset_for_errors, "jsx", chunk, allow_newline)) |span| {
|
||||
lexer.jsx_pragma._jsx = span;
|
||||
return "jsx".len +
|
||||
if (span.range.len > 0) @as(usize, @intCast(span.range.len)) else 0;
|
||||
}
|
||||
} else if (strings.hasPrefixWithWordBoundary(chunk, "jsxFrag")) {
|
||||
if (PragmaArg.scan(.skip_space_first, lexer.start + offset_for_errors, "jsxFrag", chunk, allow_newline)) |span| {
|
||||
lexer.jsx_pragma._jsxFrag = span;
|
||||
return "jsxFrag".len +
|
||||
if (span.range.len > 0) @as(usize, @intCast(span.range.len)) else 0;
|
||||
}
|
||||
} else if (strings.hasPrefixWithWordBoundary(chunk, "jsxRuntime")) {
|
||||
if (PragmaArg.scan(.skip_space_first, lexer.start + offset_for_errors, "jsxRuntime", chunk, allow_newline)) |span| {
|
||||
lexer.jsx_pragma._jsxRuntime = span;
|
||||
return "jsxRuntime".len +
|
||||
if (span.range.len > 0) @as(usize, @intCast(span.range.len)) else 0;
|
||||
}
|
||||
} else if (strings.hasPrefixWithWordBoundary(chunk, "jsxImportSource")) {
|
||||
if (PragmaArg.scan(.skip_space_first, lexer.start + offset_for_errors, "jsxImportSource", chunk, allow_newline)) |span| {
|
||||
lexer.jsx_pragma._jsxImportSource = span;
|
||||
return "jsxImportSource".len +
|
||||
if (span.range.len > 0) @as(usize, @intCast(span.range.len)) else 0;
|
||||
}
|
||||
} else if (chunk.len >= " sourceMappingURL=".len + 1 and strings.hasPrefixComptime(chunk, " sourceMappingURL=")) { // Check includes space for prefix
|
||||
return PragmaArg.scanSourceMappingURLValue(lexer.start, offset_for_errors, chunk, &lexer.source_mapping_url);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
// TODO: implement this
|
||||
pub fn removeMultilineCommentIndent(_: *LexerType, _: string, text: string) string {
|
||||
return text;
|
||||
@@ -2123,7 +2127,7 @@ fn NewLexer_(
|
||||
return js_ast.E.String.init(try lexer.allocator.dupe(u16, lexer.temp_buffer_u16.items));
|
||||
} else {
|
||||
const result = try lexer.allocator.alloc(u8, lexer.temp_buffer_u16.items.len);
|
||||
strings.copyU16IntoU8(result, []const u16, lexer.temp_buffer_u16.items);
|
||||
strings.copyU16IntoU8(result, lexer.temp_buffer_u16.items);
|
||||
return js_ast.E.String.init(result);
|
||||
}
|
||||
},
|
||||
@@ -3267,7 +3271,48 @@ pub const PragmaArg = enum {
|
||||
no_space_first,
|
||||
skip_space_first,
|
||||
|
||||
pub fn scan(kind: PragmaArg, offset_: usize, pragma: string, text_: string) ?js_ast.Span {
|
||||
pub fn isNewline(c: CodePoint) bool {
|
||||
return c == '\r' or c == '\n' or c == 0x2028 or c == 0x2029;
|
||||
}
|
||||
|
||||
// These can be extremely long, so we use SIMD.
|
||||
/// "//# sourceMappingURL=data:/adspaoksdpkz"
|
||||
/// ^^^^^^^^^^^^^^^^^^
|
||||
pub fn scanSourceMappingURLValue(start: usize, offset_for_errors: usize, chunk: string, result: *?js_ast.Span) usize {
|
||||
const prefix: u32 = " sourceMappingURL=".len;
|
||||
const url_and_rest_of_code = chunk[prefix..]; // Slice containing only the potential argument
|
||||
|
||||
const url_len: usize = brk: {
|
||||
if (bun.strings.indexOfSpaceOrNewlineOrNonASCII(url_and_rest_of_code, 0)) |delimiter_pos_in_arg| {
|
||||
// SIMD found the delimiter at index 'delimiter_pos_in_arg' relative to url start.
|
||||
// The argument's length is exactly this index.
|
||||
break :brk delimiter_pos_in_arg;
|
||||
} else {
|
||||
// SIMD found no delimiter in the entire url.
|
||||
// The argument is the whole chunk.
|
||||
break :brk url_and_rest_of_code.len;
|
||||
}
|
||||
};
|
||||
|
||||
// Now we have the correct argument length (url_len) and the argument text.
|
||||
const url = url_and_rest_of_code[0..url_len];
|
||||
|
||||
// Calculate absolute start location of the argument
|
||||
const absolute_arg_start = start + offset_for_errors + prefix;
|
||||
|
||||
result.* = js_ast.Span{
|
||||
.range = logger.Range{
|
||||
.len = @as(i32, @intCast(url_len)), // Correct length
|
||||
.loc = .{ .start = @as(i32, @intCast(absolute_arg_start)) }, // Correct start
|
||||
},
|
||||
.text = url,
|
||||
};
|
||||
|
||||
// Return total length consumed from the start of the chunk
|
||||
return prefix + url_len; // Correct total length
|
||||
}
|
||||
|
||||
pub fn scan(kind: PragmaArg, offset_: usize, pragma: string, text_: string, allow_newline: bool) ?js_ast.Span {
|
||||
var text = text_[pragma.len..];
|
||||
var iter = strings.CodepointIterator.init(text);
|
||||
|
||||
@@ -3297,7 +3342,7 @@ pub const PragmaArg = enum {
|
||||
}
|
||||
|
||||
var i: usize = 0;
|
||||
while (!isWhitespace(cursor.c)) {
|
||||
while (!isWhitespace(cursor.c) and (!allow_newline or !isNewline(cursor.c))) {
|
||||
i += cursor.width;
|
||||
if (i >= text.len) {
|
||||
break;
|
||||
@@ -3356,28 +3401,5 @@ fn skipToInterestingCharacterInMultilineComment(text_: []const u8) ?u32 {
|
||||
}
|
||||
|
||||
fn indexOfInterestingCharacterInStringLiteral(text_: []const u8, quote: u8) ?usize {
|
||||
var text = text_;
|
||||
const quote_: @Vector(strings.ascii_vector_size, u8) = @splat(@as(u8, quote));
|
||||
const backslash: @Vector(strings.ascii_vector_size, u8) = @splat(@as(u8, '\\'));
|
||||
const V1x16 = strings.AsciiVectorU1;
|
||||
|
||||
while (text.len >= strings.ascii_vector_size) {
|
||||
const vec: strings.AsciiVector = text[0..strings.ascii_vector_size].*;
|
||||
|
||||
const any_significant =
|
||||
@as(V1x16, @bitCast(vec > strings.max_16_ascii)) |
|
||||
@as(V1x16, @bitCast(vec < strings.min_16_ascii)) |
|
||||
@as(V1x16, @bitCast(quote_ == vec)) |
|
||||
@as(V1x16, @bitCast(backslash == vec));
|
||||
|
||||
if (@reduce(.Max, any_significant) > 0) {
|
||||
const bitmask = @as(u16, @bitCast(any_significant));
|
||||
const first = @ctz(bitmask);
|
||||
bun.assert(first < strings.ascii_vector_size);
|
||||
return first + (@intFromPtr(text.ptr) - @intFromPtr(text_.ptr));
|
||||
}
|
||||
text = text[strings.ascii_vector_size..];
|
||||
}
|
||||
|
||||
return null;
|
||||
return bun.highway.indexOfInterestingCharacterInStringLiteral(text_, quote);
|
||||
}
|
||||
|
||||
@@ -165,7 +165,7 @@ pub fn estimateLengthForUTF8(input: []const u8, comptime ascii_only: bool, compt
|
||||
var remaining = input;
|
||||
var len: usize = 2; // for quotes
|
||||
|
||||
while (strings.indexOfNeedsEscape(remaining, quote_char)) |i| {
|
||||
while (strings.indexOfNeedsEscapeForJavaScriptString(remaining, quote_char)) |i| {
|
||||
len += i;
|
||||
remaining = remaining[i..];
|
||||
const char_len = strings.wtf8ByteSequenceLengthWithInvalid(remaining[0]);
|
||||
@@ -249,7 +249,7 @@ pub fn writePreQuotedString(text_in: []const u8, comptime Writer: type, writer:
|
||||
|
||||
switch (encoding) {
|
||||
.ascii, .utf8 => {
|
||||
if (strings.indexOfNeedsEscape(remain, quote_char)) |j| {
|
||||
if (strings.indexOfNeedsEscapeForJavaScriptString(remain, quote_char)) |j| {
|
||||
const text_chunk = text[i .. i + clamped_width];
|
||||
try writer.writeAll(text_chunk);
|
||||
i += clamped_width;
|
||||
|
||||
@@ -95,60 +95,6 @@ fn StackStack(comptime T: type, comptime SizeType: type, comptime N: SizeType) t
|
||||
};
|
||||
}
|
||||
|
||||
/// This may have false positives but it is fast
|
||||
fn fastDetect(src: []const u8) bool {
|
||||
var has_open = false;
|
||||
var has_close = false;
|
||||
if (src.len < 16) {
|
||||
for (src) |char| {
|
||||
switch (char) {
|
||||
'{' => {
|
||||
has_open = true;
|
||||
},
|
||||
'}' => {
|
||||
has_close = true;
|
||||
},
|
||||
}
|
||||
if (has_close and has_close) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
const needles = comptime [2]@Vector(16, u8){
|
||||
@splat('{'),
|
||||
@splat('}'),
|
||||
@splat('"'),
|
||||
};
|
||||
|
||||
const i: usize = 0;
|
||||
while (i + 16 <= src.len) {
|
||||
const haystack = src[i .. i + 16].*;
|
||||
if (std.simd.firstTrue(needles[0] == haystack)) {
|
||||
has_open = true;
|
||||
}
|
||||
if (std.simd.firstTrue(needles[1] == haystack)) {
|
||||
has_close = true;
|
||||
}
|
||||
if (has_open and has_close) return true;
|
||||
}
|
||||
|
||||
if (i < src.len) {
|
||||
for (src) |char| {
|
||||
switch (char) {
|
||||
'{' => {
|
||||
has_open = true;
|
||||
},
|
||||
'}' => {
|
||||
has_close = true;
|
||||
},
|
||||
}
|
||||
if (has_close and has_open) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
const ExpandError = StackError || ParserError;
|
||||
|
||||
/// `out` is preallocated by using the result from `calculateExpandedAmount`
|
||||
|
||||
@@ -3563,26 +3563,10 @@ var stderr_mutex = bun.Mutex{};
|
||||
|
||||
pub fn hasEqSign(str: []const u8) ?u32 {
|
||||
if (isAllAscii(str)) {
|
||||
if (str.len < 16)
|
||||
return hasEqSignAsciiSlow(str);
|
||||
|
||||
const needles: @Vector(16, u8) = @splat('=');
|
||||
|
||||
var i: u32 = 0;
|
||||
while (i + 16 <= str.len) : (i += 16) {
|
||||
const haystack = str[i..][0..16].*;
|
||||
const result = haystack == needles;
|
||||
|
||||
if (std.simd.firstTrue(result)) |idx| {
|
||||
return @intCast(i + idx);
|
||||
}
|
||||
}
|
||||
|
||||
return i + (hasEqSignAsciiSlow(str[i..]) orelse return null);
|
||||
return bun.strings.indexOfChar(str, '=');
|
||||
}
|
||||
|
||||
// TODO actually i think that this can also use the simd stuff
|
||||
|
||||
var iter = CodepointIterator.init(str);
|
||||
var cursor = CodepointIterator.Cursor{};
|
||||
while (iter.next(&cursor)) {
|
||||
@@ -3594,11 +3578,6 @@ pub fn hasEqSign(str: []const u8) ?u32 {
|
||||
return null;
|
||||
}
|
||||
|
||||
pub fn hasEqSignAsciiSlow(str: []const u8) ?u32 {
|
||||
for (str, 0..) |c, i| if (c == '=') return @intCast(i);
|
||||
return null;
|
||||
}
|
||||
|
||||
pub const CmdEnvIter = struct {
|
||||
env: *const bun.StringArrayHashMap([:0]const u8),
|
||||
iter: bun.StringArrayHashMap([:0]const u8).Iterator,
|
||||
|
||||
@@ -96,43 +96,14 @@ fn literalLength(comptime T: type, comptime str: string) usize {
|
||||
|
||||
pub const OptionalUsize = std.meta.Int(.unsigned, @bitSizeOf(usize) - 1);
|
||||
pub fn indexOfAny(slice: string, comptime str: []const u8) ?OptionalUsize {
|
||||
switch (comptime str.len) {
|
||||
return switch (comptime str.len) {
|
||||
0 => @compileError("str cannot be empty"),
|
||||
1 => return indexOfChar(slice, str[0]),
|
||||
else => {},
|
||||
}
|
||||
|
||||
var remaining = slice;
|
||||
if (remaining.len == 0) return null;
|
||||
|
||||
if (comptime Environment.enableSIMD) {
|
||||
while (remaining.len >= ascii_vector_size) {
|
||||
const vec: AsciiVector = remaining[0..ascii_vector_size].*;
|
||||
var cmp: AsciiVectorU1 = @bitCast(vec == @as(AsciiVector, @splat(@as(u8, str[0]))));
|
||||
inline for (str[1..]) |c| {
|
||||
cmp |= @bitCast(vec == @as(AsciiVector, @splat(@as(u8, c))));
|
||||
}
|
||||
|
||||
if (@reduce(.Max, cmp) > 0) {
|
||||
const bitmask = @as(AsciiVectorInt, @bitCast(cmp));
|
||||
const first = @ctz(bitmask);
|
||||
|
||||
return @as(OptionalUsize, @intCast(first + slice.len - remaining.len));
|
||||
}
|
||||
|
||||
remaining = remaining[ascii_vector_size..];
|
||||
}
|
||||
|
||||
if (comptime Environment.allow_assert) assert(remaining.len < ascii_vector_size);
|
||||
}
|
||||
|
||||
for (remaining, 0..) |c, i| {
|
||||
if (strings.indexOfChar(str, c) != null) {
|
||||
return @as(OptionalUsize, @intCast(i + slice.len - remaining.len));
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
else => if (bun.highway.indexOfAnyChar(slice, str)) |i|
|
||||
@intCast(i)
|
||||
else
|
||||
null,
|
||||
};
|
||||
}
|
||||
|
||||
pub fn indexOfAny16(self: []const u16, comptime str: anytype) ?OptionalUsize {
|
||||
@@ -177,7 +148,7 @@ pub fn inMapCaseInsensitive(self: []const u8, comptime ComptimeStringMap: anytyp
|
||||
return bun.String.ascii(self).inMapCaseInsensitive(ComptimeStringMap);
|
||||
}
|
||||
|
||||
pub inline fn containsAny(in: anytype, target: string) bool {
|
||||
pub inline fn containsAny(in: anytype, target: anytype) bool {
|
||||
for (in) |str| if (contains(if (@TypeOf(str) == u8) &[1]u8{str} else bun.span(str), target)) return true;
|
||||
return false;
|
||||
}
|
||||
@@ -496,7 +467,7 @@ pub inline fn lastIndexOf(self: string, str: string) ?usize {
|
||||
return std.mem.lastIndexOf(u8, self, str);
|
||||
}
|
||||
|
||||
pub inline fn indexOf(self: string, str: string) ?usize {
|
||||
pub fn indexOf(self: string, str: string) ?usize {
|
||||
if (comptime !bun.Environment.isNative) {
|
||||
return std.mem.indexOf(u8, self, str);
|
||||
}
|
||||
@@ -990,16 +961,13 @@ pub fn endsWithAnyComptime(self: string, comptime str: string) bool {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn eql(self: string, other: anytype) bool {
|
||||
pub fn eql(self: string, other: []const u8) bool {
|
||||
if (self.len != other.len) return false;
|
||||
if (comptime @TypeOf(other) == *string) {
|
||||
return eql(self, other.*);
|
||||
}
|
||||
|
||||
for (self, 0..) |c, i| {
|
||||
if (other[i] != c) return false;
|
||||
}
|
||||
return true;
|
||||
return eqlLong(self, other, false);
|
||||
}
|
||||
|
||||
pub fn eqlComptimeT(comptime T: type, self: []const T, comptime alt: anytype) bool {
|
||||
@@ -1367,43 +1335,11 @@ pub fn copyU8IntoU16WithAlignment(comptime alignment: u21, output_: []align(alig
|
||||
// }
|
||||
// }
|
||||
|
||||
pub inline fn copyU16IntoU8(output_: []u8, comptime InputType: type, input_: InputType) void {
|
||||
if (comptime Environment.allow_assert) assert(input_.len <= output_.len);
|
||||
var output = output_;
|
||||
var input = input_;
|
||||
pub inline fn copyU16IntoU8(output: []u8, input: []align(1) const u16) void {
|
||||
if (comptime Environment.allow_assert) assert(input.len <= output.len);
|
||||
const count = @min(input.len, output.len);
|
||||
|
||||
// https://zig.godbolt.org/z/9rTn1orcY
|
||||
|
||||
const group = @as(usize, 16);
|
||||
// end at the last group of 16 bytes
|
||||
var input_ptr = input.ptr;
|
||||
var output_ptr = output.ptr;
|
||||
|
||||
if (comptime Environment.enableSIMD) {
|
||||
const end_len = (@min(input.len, output.len) & ~(group - 1));
|
||||
const last_vector_ptr = input.ptr + end_len;
|
||||
while (last_vector_ptr != input_ptr) {
|
||||
const input_vec1: @Vector(group, u16) = input_ptr[0..group].*;
|
||||
inline for (0..group) |i| {
|
||||
output_ptr[i] = @as(u8, @truncate(input_vec1[i]));
|
||||
}
|
||||
|
||||
output_ptr += group;
|
||||
input_ptr += group;
|
||||
}
|
||||
|
||||
input.len -= end_len;
|
||||
output.len -= end_len;
|
||||
}
|
||||
|
||||
const last_input_ptr = input_ptr + @min(input.len, output.len);
|
||||
|
||||
while (last_input_ptr != input_ptr) {
|
||||
output_ptr[0] = @as(u8, @truncate(input_ptr[0]));
|
||||
output_ptr += 1;
|
||||
input_ptr += 1;
|
||||
}
|
||||
bun.highway.copyU16ToU8(input[0..count], output[0..count]);
|
||||
}
|
||||
|
||||
const strings = @This();
|
||||
@@ -2353,11 +2289,7 @@ pub fn toUTF8ListWithTypeBun(list: *std.ArrayList(u8), comptime Type: type, utf1
|
||||
}
|
||||
list.items.len += i;
|
||||
|
||||
copyU16IntoU8(
|
||||
list.items[list.items.len - i ..],
|
||||
Type,
|
||||
to_copy,
|
||||
);
|
||||
copyU16IntoU8(list.items[list.items.len - i ..], to_copy);
|
||||
|
||||
if (comptime skip_trailing_replacement) {
|
||||
if (replacement.is_lead and utf16_remaining.len == 0) {
|
||||
@@ -2377,7 +2309,7 @@ pub fn toUTF8ListWithTypeBun(list: *std.ArrayList(u8), comptime Type: type, utf1
|
||||
try list.ensureTotalCapacityPrecise(utf16_remaining.len + list.items.len);
|
||||
const old_len = list.items.len;
|
||||
list.items.len += utf16_remaining.len;
|
||||
copyU16IntoU8(list.items[old_len..], Type, utf16_remaining);
|
||||
copyU16IntoU8(list.items[old_len..], utf16_remaining);
|
||||
}
|
||||
|
||||
log("UTF16 {d} -> {d} UTF8", .{ utf16.len, list.items.len });
|
||||
@@ -2794,43 +2726,8 @@ pub fn replaceLatin1WithUTF8(buf_: []u8) void {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn elementLengthLatin1IntoUTF8(comptime Type: type, latin1_: Type) usize {
|
||||
// https://zig.godbolt.org/z/zzYexPPs9
|
||||
|
||||
var latin1 = latin1_;
|
||||
const input_len = latin1.len;
|
||||
var total_non_ascii_count: usize = 0;
|
||||
|
||||
// This is about 30% faster on large input compared to auto-vectorization
|
||||
if (comptime Environment.enableSIMD) {
|
||||
const end = latin1.ptr + (latin1.len - (latin1.len % ascii_vector_size));
|
||||
while (latin1.ptr != end) {
|
||||
const vec: AsciiVector = latin1[0..ascii_vector_size].*;
|
||||
|
||||
// Shifting a unsigned 8 bit integer to the right by 7 bits always produces a value of 0 or 1.
|
||||
const cmp = vec >> @as(AsciiVector, @splat(
|
||||
@as(u8, 7),
|
||||
));
|
||||
|
||||
// Anding that value rather than converting it into a @Vector(16, u1) produces better code from LLVM.
|
||||
const mask: AsciiVector = cmp & @as(AsciiVector, @splat(
|
||||
@as(u8, 1),
|
||||
));
|
||||
|
||||
total_non_ascii_count += @as(usize, @reduce(.Add, mask));
|
||||
latin1 = latin1[ascii_vector_size..];
|
||||
}
|
||||
|
||||
// an important hint to the compiler to not auto-vectorize the loop below
|
||||
if (latin1.len >= ascii_vector_size) unreachable;
|
||||
}
|
||||
|
||||
for (latin1) |c| {
|
||||
total_non_ascii_count += @as(usize, @intFromBool(c > 127));
|
||||
}
|
||||
|
||||
// each non-ascii latin1 character becomes 2 UTF8 characters
|
||||
return input_len + total_non_ascii_count;
|
||||
pub fn elementLengthLatin1IntoUTF8(slice: []const u8) usize {
|
||||
return bun.simdutf.length.utf8.from.latin1(slice);
|
||||
}
|
||||
|
||||
pub fn copyLatin1IntoUTF16(comptime Buffer: type, buf_: Buffer, comptime Type: type, latin1_: Type) EncodeIntoResult {
|
||||
@@ -2865,20 +2762,7 @@ pub fn elementLengthLatin1IntoUTF16(comptime Type: type, latin1_: Type) usize {
|
||||
return latin1_.len;
|
||||
}
|
||||
|
||||
var count: usize = 0;
|
||||
var latin1 = latin1_;
|
||||
while (latin1.len > 0) {
|
||||
const function = comptime if (std.meta.Child(Type) == u8) strings.firstNonASCIIWithType else strings.firstNonASCII16;
|
||||
const to_write = function(Type, latin1) orelse @as(u32, @truncate(latin1.len));
|
||||
count += to_write;
|
||||
latin1 = latin1[to_write..];
|
||||
if (latin1.len > 0) {
|
||||
count += comptime if (std.meta.Child(Type) == u8) 2 else 1;
|
||||
latin1 = latin1[1..];
|
||||
}
|
||||
}
|
||||
|
||||
return count;
|
||||
return bun.simdutf.length.utf16.from.latin1(latin1_);
|
||||
}
|
||||
|
||||
pub fn escapeHTMLForLatin1Input(allocator: std.mem.Allocator, latin1: []const u8) !Escaped(u8) {
|
||||
@@ -3605,7 +3489,7 @@ pub fn copyUTF16IntoUTF8WithBuffer(buf: []u8, comptime Type: type, utf16: Type,
|
||||
|
||||
while (firstNonASCII16(Type, utf16_remaining)) |i| {
|
||||
const end = @min(i, remaining.len);
|
||||
if (end > 0) copyU16IntoU8(remaining, Type, utf16_remaining[0..end]);
|
||||
if (end > 0) copyU16IntoU8(remaining, utf16_remaining[0..end]);
|
||||
remaining = remaining[end..];
|
||||
utf16_remaining = utf16_remaining[end..];
|
||||
|
||||
@@ -3674,7 +3558,7 @@ pub fn copyUTF16IntoUTF8WithBuffer(buf: []u8, comptime Type: type, utf16: Type,
|
||||
|
||||
if (remaining.len > 0 and !ended_on_non_ascii and utf16_remaining.len > 0) {
|
||||
const len = @min(remaining.len, utf16_remaining.len);
|
||||
copyU16IntoU8(remaining[0..len], Type, utf16_remaining[0..len]);
|
||||
copyU16IntoU8(remaining[0..len], utf16_remaining[0..len]);
|
||||
utf16_remaining = utf16_remaining[len..];
|
||||
remaining = remaining[len..];
|
||||
}
|
||||
@@ -4014,44 +3898,7 @@ pub fn isAllASCII(slice: []const u8) bool {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (bun.FeatureFlags.use_simdutf)
|
||||
return bun.simdutf.validate.ascii(slice);
|
||||
|
||||
var remaining = slice;
|
||||
|
||||
// The NEON SIMD unit is 128-bit wide and includes 16 128-bit registers that can be used as 32 64-bit registers
|
||||
if (comptime Environment.enableSIMD) {
|
||||
const remaining_end_ptr = remaining.ptr + remaining.len - (remaining.len % ascii_vector_size);
|
||||
while (remaining.ptr != remaining_end_ptr) : (remaining.ptr += ascii_vector_size) {
|
||||
const vec: AsciiVector = remaining[0..ascii_vector_size].*;
|
||||
|
||||
if (@reduce(.Max, vec) > 127) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const Int = u64;
|
||||
const size = @sizeOf(Int);
|
||||
const remaining_last8 = slice.ptr + slice.len - (slice.len % size);
|
||||
while (remaining.ptr != remaining_last8) : (remaining.ptr += size) {
|
||||
const bytes = @as(Int, @bitCast(remaining[0..size].*));
|
||||
// https://dotat.at/@/2022-06-27-tolower-swar.html
|
||||
const mask = bytes & 0x8080808080808080;
|
||||
|
||||
if (mask > 0) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
const final = slice.ptr + slice.len;
|
||||
while (remaining.ptr != final) : (remaining.ptr += 1) {
|
||||
if (remaining[0] > 127) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
return bun.simdutf.validate.ascii(slice);
|
||||
}
|
||||
|
||||
// #define U16_LEAD(supplementary) (UChar)(((supplementary)>>10)+0xd7c0)
|
||||
@@ -4085,296 +3932,67 @@ pub inline fn u16GetSupplementary(lead: u32, trail: u32) u32 {
|
||||
pub const u16_surrogate_offset = 56613888;
|
||||
|
||||
pub fn firstNonASCII(slice: []const u8) ?u32 {
|
||||
return firstNonASCIIWithType([]const u8, slice);
|
||||
}
|
||||
|
||||
pub fn firstNonASCIIWithType(comptime Type: type, slice: Type) ?u32 {
|
||||
var remaining = slice;
|
||||
|
||||
if (comptime bun.FeatureFlags.use_simdutf) {
|
||||
const result = bun.simdutf.validate.with_errors.ascii(slice);
|
||||
if (result.status == .success) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return @as(u32, @truncate(result.count));
|
||||
}
|
||||
|
||||
if (comptime Environment.enableSIMD) {
|
||||
if (remaining.len >= ascii_vector_size) {
|
||||
const remaining_start = remaining.ptr;
|
||||
const remaining_end = remaining.ptr + remaining.len - (remaining.len % ascii_vector_size);
|
||||
|
||||
while (remaining.ptr != remaining_end) {
|
||||
const vec: AsciiVector = remaining[0..ascii_vector_size].*;
|
||||
|
||||
if (@reduce(.Max, vec) > 127) {
|
||||
const Int = u64;
|
||||
const size = @sizeOf(Int);
|
||||
remaining.len -= @intFromPtr(remaining.ptr) - @intFromPtr(remaining_start);
|
||||
|
||||
{
|
||||
const bytes = @as(Int, @bitCast(remaining[0..size].*));
|
||||
// https://dotat.at/@/2022-06-27-tolower-swar.html
|
||||
const mask = bytes & 0x8080808080808080;
|
||||
|
||||
if (mask > 0) {
|
||||
const first_set_byte = @ctz(mask) / 8;
|
||||
if (comptime Environment.isDebug) {
|
||||
bun.assert(remaining[first_set_byte] > 127);
|
||||
for (0..first_set_byte) |j| {
|
||||
bun.assert(remaining[j] <= 127);
|
||||
}
|
||||
}
|
||||
|
||||
return @as(u32, first_set_byte) + @as(u32, @intCast(slice.len - remaining.len));
|
||||
}
|
||||
remaining = remaining[size..];
|
||||
}
|
||||
{
|
||||
const bytes = @as(Int, @bitCast(remaining[0..size].*));
|
||||
const mask = bytes & 0x8080808080808080;
|
||||
|
||||
if (mask > 0) {
|
||||
const first_set_byte = @ctz(mask) / 8;
|
||||
if (comptime Environment.isDebug) {
|
||||
bun.assert(remaining[first_set_byte] > 127);
|
||||
for (0..first_set_byte) |j| {
|
||||
bun.assert(remaining[j] <= 127);
|
||||
}
|
||||
}
|
||||
|
||||
return @as(u32, first_set_byte) + @as(u32, @intCast(slice.len - remaining.len));
|
||||
}
|
||||
}
|
||||
unreachable;
|
||||
}
|
||||
|
||||
// the more intuitive way, using slices, produces worse codegen
|
||||
// specifically: it subtracts the length at the end of the loop
|
||||
// we don't need to do that
|
||||
// we only need to subtract the length once at the very end
|
||||
remaining.ptr += ascii_vector_size;
|
||||
}
|
||||
remaining.len -= @intFromPtr(remaining.ptr) - @intFromPtr(remaining_start);
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
const Int = u64;
|
||||
const size = @sizeOf(Int);
|
||||
const remaining_start = remaining.ptr;
|
||||
const remaining_end = remaining.ptr + remaining.len - (remaining.len % size);
|
||||
|
||||
if (comptime Environment.enableSIMD) {
|
||||
// these assertions exist more so for LLVM
|
||||
bun.unsafeAssert(remaining.len < ascii_vector_size);
|
||||
bun.unsafeAssert(@intFromPtr(remaining.ptr + ascii_vector_size) > @intFromPtr(remaining_end));
|
||||
}
|
||||
|
||||
if (remaining.len >= size) {
|
||||
while (remaining.ptr != remaining_end) {
|
||||
const bytes = @as(Int, @bitCast(remaining[0..size].*));
|
||||
// https://dotat.at/@/2022-06-27-tolower-swar.html
|
||||
const mask = bytes & 0x8080808080808080;
|
||||
|
||||
if (mask > 0) {
|
||||
remaining.len -= @intFromPtr(remaining.ptr) - @intFromPtr(remaining_start);
|
||||
const first_set_byte = @ctz(mask) / 8;
|
||||
if (comptime Environment.isDebug) {
|
||||
bun.unsafeAssert(remaining[first_set_byte] > 127);
|
||||
for (0..first_set_byte) |j| {
|
||||
bun.unsafeAssert(remaining[j] <= 127);
|
||||
}
|
||||
}
|
||||
|
||||
return @as(u32, first_set_byte) + @as(u32, @intCast(slice.len - remaining.len));
|
||||
}
|
||||
|
||||
remaining.ptr += size;
|
||||
}
|
||||
remaining.len -= @intFromPtr(remaining.ptr) - @intFromPtr(remaining_start);
|
||||
}
|
||||
}
|
||||
|
||||
if (comptime Environment.allow_assert) assert(remaining.len < 8);
|
||||
|
||||
for (remaining) |*char| {
|
||||
if (char.* > 127) {
|
||||
// try to prevent it from reading the length of the slice
|
||||
return @as(u32, @truncate(@intFromPtr(char) - @intFromPtr(slice.ptr)));
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
pub fn indexOfNewlineOrNonASCIIOrANSI(slice_: []const u8, offset: u32) ?u32 {
|
||||
const slice = slice_[offset..];
|
||||
var remaining = slice;
|
||||
|
||||
if (remaining.len == 0)
|
||||
const result = bun.simdutf.validate.with_errors.ascii(slice);
|
||||
if (result.status == .success) {
|
||||
return null;
|
||||
|
||||
if (comptime Environment.enableSIMD) {
|
||||
while (remaining.len >= ascii_vector_size) {
|
||||
const vec: AsciiVector = remaining[0..ascii_vector_size].*;
|
||||
const cmp = @as(AsciiVectorU1, @bitCast((vec > max_16_ascii))) | @as(AsciiVectorU1, @bitCast((vec < min_16_ascii))) |
|
||||
@as(AsciiVectorU1, @bitCast(vec == @as(AsciiVector, @splat(@as(u8, '\r'))))) |
|
||||
@as(AsciiVectorU1, @bitCast(vec == @as(AsciiVector, @splat(@as(u8, '\n'))))) |
|
||||
@as(AsciiVectorU1, @bitCast(vec == @as(AsciiVector, @splat(@as(u8, '\x1b')))));
|
||||
|
||||
if (@reduce(.Max, cmp) > 0) {
|
||||
const bitmask = @as(AsciiVectorInt, @bitCast(cmp));
|
||||
const first = @ctz(bitmask);
|
||||
|
||||
return @as(u32, first) + @as(u32, @intCast(slice.len - remaining.len)) + offset;
|
||||
}
|
||||
|
||||
remaining = remaining[ascii_vector_size..];
|
||||
}
|
||||
|
||||
if (comptime Environment.allow_assert) assert(remaining.len < ascii_vector_size);
|
||||
}
|
||||
|
||||
for (remaining) |*char_| {
|
||||
const char = char_.*;
|
||||
if (char > 127 or char < 0x20 or char == '\n' or char == '\r' or char == '\x1b') {
|
||||
return @as(u32, @truncate((@intFromPtr(char_) - @intFromPtr(slice.ptr)))) + offset;
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
return @as(u32, @truncate(result.count));
|
||||
}
|
||||
|
||||
pub const indexOfNewlineOrNonASCIIOrANSI = indexOfNewlineOrNonASCII;
|
||||
|
||||
/// Checks if slice[offset..] has any < 0x20 or > 127 characters
|
||||
pub fn indexOfNewlineOrNonASCII(slice_: []const u8, offset: u32) ?u32 {
|
||||
return indexOfNewlineOrNonASCIICheckStart(slice_, offset, true);
|
||||
}
|
||||
|
||||
pub fn indexOfSpaceOrNewlineOrNonASCII(slice_: []const u8, offset: u32) ?u32 {
|
||||
const slice = slice_[offset..];
|
||||
const remaining = slice;
|
||||
|
||||
if (remaining.len == 0)
|
||||
return null;
|
||||
|
||||
if (remaining[0] > 127 or (remaining[0] < 0x20 and remaining[0] != 0x09)) {
|
||||
return offset;
|
||||
}
|
||||
|
||||
const i = bun.highway.indexOfSpaceOrNewlineOrNonASCII(remaining) orelse return null;
|
||||
return @as(u32, @truncate(i)) + offset;
|
||||
}
|
||||
|
||||
pub fn indexOfNewlineOrNonASCIICheckStart(slice_: []const u8, offset: u32, comptime check_start: bool) ?u32 {
|
||||
const slice = slice_[offset..];
|
||||
var remaining = slice;
|
||||
const remaining = slice;
|
||||
|
||||
if (remaining.len == 0)
|
||||
return null;
|
||||
|
||||
if (comptime check_start) {
|
||||
// this shows up in profiling
|
||||
if (remaining[0] > 127 or remaining[0] < 0x20 or remaining[0] == '\r' or remaining[0] == '\n') {
|
||||
if (remaining[0] > 127 or (remaining[0] < 0x20 and remaining[0] != 0x09)) {
|
||||
return offset;
|
||||
}
|
||||
}
|
||||
|
||||
if (comptime Environment.enableSIMD) {
|
||||
while (remaining.len >= ascii_vector_size) {
|
||||
const vec: AsciiVector = remaining[0..ascii_vector_size].*;
|
||||
const cmp = @as(AsciiVectorU1, @bitCast((vec > max_16_ascii))) | @as(AsciiVectorU1, @bitCast((vec < min_16_ascii))) |
|
||||
@as(AsciiVectorU1, @bitCast(vec == @as(AsciiVector, @splat(@as(u8, '\r'))))) |
|
||||
@as(AsciiVectorU1, @bitCast(vec == @as(AsciiVector, @splat(@as(u8, '\n')))));
|
||||
|
||||
if (@reduce(.Max, cmp) > 0) {
|
||||
const bitmask = @as(AsciiVectorInt, @bitCast(cmp));
|
||||
const first = @ctz(bitmask);
|
||||
|
||||
return @as(u32, first) + @as(u32, @intCast(slice.len - remaining.len)) + offset;
|
||||
}
|
||||
|
||||
remaining = remaining[ascii_vector_size..];
|
||||
}
|
||||
|
||||
if (comptime Environment.allow_assert) assert(remaining.len < ascii_vector_size);
|
||||
}
|
||||
|
||||
for (remaining) |*char_| {
|
||||
const char = char_.*;
|
||||
if (char > 127 or char < 0x20 or char == '\n' or char == '\r') {
|
||||
return @as(u32, @truncate((@intFromPtr(char_) - @intFromPtr(slice.ptr)))) + offset;
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
const i = bun.highway.indexOfNewlineOrNonASCII(remaining) orelse return null;
|
||||
return @as(u32, @truncate(i)) + offset;
|
||||
}
|
||||
|
||||
pub fn containsNewlineOrNonASCIIOrQuote(slice_: []const u8) bool {
|
||||
const slice = slice_;
|
||||
var remaining = slice;
|
||||
|
||||
if (remaining.len == 0)
|
||||
return false;
|
||||
|
||||
if (comptime Environment.enableSIMD) {
|
||||
while (remaining.len >= ascii_vector_size) {
|
||||
const vec: AsciiVector = remaining[0..ascii_vector_size].*;
|
||||
const cmp = @as(AsciiVectorU1, @bitCast((vec > max_16_ascii))) | @as(AsciiVectorU1, @bitCast((vec < min_16_ascii))) |
|
||||
@as(AsciiVectorU1, @bitCast(vec == @as(AsciiVector, @splat(@as(u8, '\r'))))) |
|
||||
@as(AsciiVectorU1, @bitCast(vec == @as(AsciiVector, @splat(@as(u8, '\n'))))) |
|
||||
@as(AsciiVectorU1, @bitCast(vec == @as(AsciiVector, @splat(@as(u8, '"')))));
|
||||
|
||||
if (@reduce(.Max, cmp) > 0) {
|
||||
return true;
|
||||
}
|
||||
|
||||
remaining = remaining[ascii_vector_size..];
|
||||
}
|
||||
|
||||
if (comptime Environment.allow_assert) assert(remaining.len < ascii_vector_size);
|
||||
}
|
||||
|
||||
for (remaining) |*char_| {
|
||||
const char = char_.*;
|
||||
if (char > 127 or char < 0x20 or char == '\n' or char == '\r' or char == '"') {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
pub fn containsNewlineOrNonASCIIOrQuote(text: []const u8) bool {
|
||||
return bun.highway.containsNewlineOrNonASCIIOrQuote(text);
|
||||
}
|
||||
|
||||
/// JSON escape
|
||||
pub fn indexOfNeedsEscape(slice: []const u8, comptime quote_char: u8) ?u32 {
|
||||
var remaining = slice;
|
||||
if (remaining.len == 0)
|
||||
/// Supports:
|
||||
/// - `"`
|
||||
/// - `'`
|
||||
/// - "`"
|
||||
pub fn indexOfNeedsEscapeForJavaScriptString(slice: []const u8, quote_char: u8) ?u32 {
|
||||
if (slice.len == 0)
|
||||
return null;
|
||||
|
||||
if (remaining[0] >= 127 or remaining[0] < 0x20 or remaining[0] == '\\' or remaining[0] == quote_char or (quote_char == '`' and remaining[0] == '$')) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (comptime Environment.enableSIMD) {
|
||||
while (remaining.len >= ascii_vector_size) {
|
||||
const vec: AsciiVector = remaining[0..ascii_vector_size].*;
|
||||
const cmp: AsciiVectorU1 = if (comptime quote_char == '`') ( //
|
||||
@as(AsciiVectorU1, @bitCast((vec > max_16_ascii))) |
|
||||
@as(AsciiVectorU1, @bitCast((vec < min_16_ascii))) |
|
||||
@as(AsciiVectorU1, @bitCast(vec == @as(AsciiVector, @splat(@as(u8, '\\'))))) |
|
||||
@as(AsciiVectorU1, @bitCast(vec == @as(AsciiVector, @splat(@as(u8, quote_char))))) |
|
||||
@as(AsciiVectorU1, @bitCast(vec == @as(AsciiVector, @splat(@as(u8, '$'))))) //
|
||||
) else ( //
|
||||
@as(AsciiVectorU1, @bitCast((vec > max_16_ascii))) |
|
||||
@as(AsciiVectorU1, @bitCast((vec < min_16_ascii))) |
|
||||
@as(AsciiVectorU1, @bitCast(vec == @as(AsciiVector, @splat(@as(u8, '\\'))))) |
|
||||
@as(AsciiVectorU1, @bitCast(vec == @as(AsciiVector, @splat(@as(u8, quote_char))))) //
|
||||
);
|
||||
|
||||
if (@reduce(.Max, cmp) > 0) {
|
||||
const bitmask = @as(AsciiVectorInt, @bitCast(cmp));
|
||||
const first = @ctz(bitmask);
|
||||
|
||||
return @as(u32, first) + @as(u32, @truncate(@intFromPtr(remaining.ptr) - @intFromPtr(slice.ptr)));
|
||||
}
|
||||
|
||||
remaining = remaining[ascii_vector_size..];
|
||||
}
|
||||
}
|
||||
|
||||
for (remaining) |*char_| {
|
||||
const char = char_.*;
|
||||
if (char > 127 or char < 0x20 or char == '\\' or char == quote_char or (quote_char == '`' and char == '$')) {
|
||||
return @as(u32, @truncate(@intFromPtr(char_) - @intFromPtr(slice.ptr)));
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
return bun.highway.indexOfNeedsEscapeForJavaScriptString(slice, quote_char);
|
||||
}
|
||||
|
||||
pub fn indexOfNeedsURLEncode(slice: []const u8) ?u32 {
|
||||
@@ -4447,15 +4065,7 @@ pub fn indexOfNeedsURLEncode(slice: []const u8) ?u32 {
|
||||
}
|
||||
|
||||
pub fn indexOfCharZ(sliceZ: [:0]const u8, char: u8) ?u63 {
|
||||
const ptr = bun.C.strchr(sliceZ.ptr, char) orelse return null;
|
||||
const pos = @intFromPtr(ptr) - @intFromPtr(sliceZ.ptr);
|
||||
|
||||
if (comptime Environment.isDebug)
|
||||
bun.assert(@intFromPtr(sliceZ.ptr) <= @intFromPtr(ptr) and
|
||||
@intFromPtr(ptr) < @intFromPtr(sliceZ.ptr + sliceZ.len) and
|
||||
pos <= sliceZ.len);
|
||||
|
||||
return @as(u63, @truncate(pos));
|
||||
return @truncate(bun.highway.indexOfChar(sliceZ, char) orelse return null);
|
||||
}
|
||||
|
||||
pub fn indexOfChar(slice: []const u8, char: u8) ?u32 {
|
||||
@@ -4463,19 +4073,11 @@ pub fn indexOfChar(slice: []const u8, char: u8) ?u32 {
|
||||
}
|
||||
|
||||
pub fn indexOfCharUsize(slice: []const u8, char: u8) ?usize {
|
||||
if (slice.len == 0)
|
||||
return null;
|
||||
|
||||
if (comptime !Environment.isNative) {
|
||||
return std.mem.indexOfScalar(u8, slice, char);
|
||||
}
|
||||
|
||||
const ptr = bun.C.memchr(slice.ptr, char, slice.len) orelse return null;
|
||||
const i = @intFromPtr(ptr) - @intFromPtr(slice.ptr);
|
||||
bun.assert(i < slice.len);
|
||||
bun.assert(slice[i] == char);
|
||||
|
||||
return i;
|
||||
return bun.highway.indexOfChar(slice, char);
|
||||
}
|
||||
|
||||
pub fn indexOfCharPos(slice: []const u8, char: u8, start_index: usize) ?usize {
|
||||
@@ -4485,13 +4087,9 @@ pub fn indexOfCharPos(slice: []const u8, char: u8, start_index: usize) ?usize {
|
||||
|
||||
if (start_index >= slice.len) return null;
|
||||
|
||||
const ptr = bun.C.memchr(slice.ptr + start_index, char, slice.len - start_index) orelse
|
||||
return null;
|
||||
const i = @intFromPtr(ptr) - @intFromPtr(slice.ptr);
|
||||
bun.assert(i < slice.len);
|
||||
bun.assert(slice[i] == char);
|
||||
|
||||
return i;
|
||||
const result = bun.highway.indexOfChar(slice[start_index..], char) orelse return null;
|
||||
bun.debugAssert(slice.len > result + start_index);
|
||||
return result + start_index;
|
||||
}
|
||||
|
||||
pub fn indexOfAnyPosComptime(slice: []const u8, comptime chars: []const u8, start_index: usize) ?usize {
|
||||
@@ -4934,47 +4532,6 @@ pub fn firstNonASCII16(comptime Slice: type, slice: Slice) ?u32 {
|
||||
return null;
|
||||
}
|
||||
|
||||
/// Fast path for printing template literal strings
|
||||
pub fn @"nextUTF16NonASCIIOr$`\\"(
|
||||
comptime Slice: type,
|
||||
slice: Slice,
|
||||
) ?u32 {
|
||||
var remaining = slice;
|
||||
|
||||
if (comptime Environment.enableSIMD and Environment.isNative) {
|
||||
while (remaining.len >= ascii_u16_vector_size) {
|
||||
const vec: AsciiU16Vector = remaining[0..ascii_u16_vector_size].*;
|
||||
|
||||
const cmp = @as(AsciiVectorU16U1, @bitCast((vec > max_u16_ascii))) |
|
||||
@as(AsciiVectorU16U1, @bitCast((vec < min_u16_ascii))) |
|
||||
@as(AsciiVectorU16U1, @bitCast((vec == @as(AsciiU16Vector, @splat(@as(u16, '$')))))) |
|
||||
@as(AsciiVectorU16U1, @bitCast((vec == @as(AsciiU16Vector, @splat(@as(u16, '`')))))) |
|
||||
@as(AsciiVectorU16U1, @bitCast((vec == @as(AsciiU16Vector, @splat(@as(u16, '\\'))))));
|
||||
|
||||
const bitmask = @as(u8, @bitCast(cmp));
|
||||
const first = @ctz(bitmask);
|
||||
if (first < ascii_u16_vector_size) {
|
||||
return @as(u32, @intCast(@as(u32, first) +
|
||||
@as(u32, @intCast(slice.len - remaining.len))));
|
||||
}
|
||||
|
||||
remaining = remaining[ascii_u16_vector_size..];
|
||||
}
|
||||
}
|
||||
|
||||
for (remaining, 0..) |char, i| {
|
||||
switch (char) {
|
||||
'$', '`', '\\', 0...0x20 - 1, 128...std.math.maxInt(u16) => {
|
||||
return @as(u32, @truncate(i + (slice.len - remaining.len)));
|
||||
},
|
||||
|
||||
else => {},
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
/// Convert potentially ill-formed UTF-8 or UTF-16 bytes to a Unicode Codepoint.
|
||||
/// - Invalid codepoints are replaced with `zero` parameter
|
||||
/// - Null bytes return 0
|
||||
@@ -5097,31 +4654,6 @@ pub fn lengthOfLeadingWhitespaceASCII(slice: string) usize {
|
||||
return slice.len;
|
||||
}
|
||||
|
||||
pub fn containsNonBmpCodePointUTF16(_text: []const u16) bool {
|
||||
const n = _text.len;
|
||||
if (n > 0) {
|
||||
var i: usize = 0;
|
||||
const text = _text[0 .. n - 1];
|
||||
while (i < n - 1) : (i += 1) {
|
||||
switch (text[i]) {
|
||||
// Check for a high surrogate
|
||||
0xD800...0xDBFF => {
|
||||
// Check for a low surrogate
|
||||
switch (text[i + 1]) {
|
||||
0xDC00...0xDFFF => {
|
||||
return true;
|
||||
},
|
||||
else => {},
|
||||
}
|
||||
},
|
||||
else => {},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
pub fn join(slices: []const string, delimiter: string, allocator: std.mem.Allocator) !string {
|
||||
return try std.mem.join(allocator, delimiter, slices);
|
||||
}
|
||||
@@ -5238,6 +4770,75 @@ pub fn NewCodePointIterator(comptime CodePointType_: type, comptime zeroValue: c
|
||||
return Iterator{ .bytes = str, .i = i, .c = zeroValue };
|
||||
}
|
||||
|
||||
const SkipResult = enum {
|
||||
eof,
|
||||
found,
|
||||
not_found,
|
||||
};
|
||||
|
||||
/// Advance forward until the scalar function returns true.
|
||||
/// THe simd function is "best effort" and expected to sometimes return a result which `scalar` will return false for.
|
||||
/// This is because we don't decode UTF-8 in the SIMD code path.
|
||||
pub fn skip(it: *const Iterator, cursor: *Cursor, simd: *const fn (input: []const u8) ?usize, scalar: *const fn (CodePointType) bool) SkipResult {
|
||||
while (true) {
|
||||
// 1. Get current position. Check for EOF.
|
||||
const current_byte_index = cursor.i;
|
||||
if (current_byte_index >= it.bytes.len) {
|
||||
return .not_found; // Reached end without finding
|
||||
}
|
||||
|
||||
// 2. Decode the *next* character using the standard iterator method.
|
||||
if (!next(it, cursor)) {
|
||||
return .not_found; // Reached end or error during decode
|
||||
}
|
||||
|
||||
// 3. Check if the character just decoded matches the scalar condition.
|
||||
if (scalar(it.c)) {
|
||||
return .found; // Found it!
|
||||
}
|
||||
|
||||
// 4. Optimization: Can we skip ahead using SIMD?
|
||||
// Scan starting from the byte *after* the character we just decoded.
|
||||
const next_scan_start_index = cursor.i;
|
||||
if (next_scan_start_index >= it.bytes.len) {
|
||||
// Just decoded the last character and it didn't match.
|
||||
return .not_found;
|
||||
}
|
||||
const remaining_slice = it.bytes[next_scan_start_index..];
|
||||
if (remaining_slice.len == 0) {
|
||||
return .not_found;
|
||||
}
|
||||
|
||||
// Ask SIMD for the next potential candidate.
|
||||
if (simd(remaining_slice)) |pos| {
|
||||
// SIMD found a potential candidate `pos` bytes ahead.
|
||||
if (pos > 0) {
|
||||
// Jump the byte index to the start of the potential candidate.
|
||||
cursor.i = next_scan_start_index + @as(u32, @intCast(pos));
|
||||
// Reset width so next() decodes correctly from the jumped position.
|
||||
cursor.width = 0;
|
||||
// Loop will continue, starting the decode from the new cursor.i.
|
||||
continue;
|
||||
}
|
||||
// If pos == 0, SIMD suggests the *immediate next* character.
|
||||
// No jump needed, just let the loop iterate naturally.
|
||||
// Fallthrough to the end of the loop.
|
||||
} else {
|
||||
// SIMD found no potential candidates in the rest of the string.
|
||||
// Since the SIMD search set is a superset of the scalar check set,
|
||||
// we can guarantee that no character satisfying `scalar` exists further.
|
||||
// Since the current character (decoded in step 2) also didn't match,
|
||||
// we can conclude the target character is not found.
|
||||
return .not_found;
|
||||
}
|
||||
|
||||
// If we reach here, it means SIMD returned pos=0.
|
||||
// Loop continues to the next iteration, processing the immediate next char.
|
||||
} // End while true
|
||||
|
||||
unreachable;
|
||||
}
|
||||
|
||||
pub inline fn next(it: *const Iterator, cursor: *Cursor) bool {
|
||||
const pos: u32 = @as(u32, cursor.width) + cursor.i;
|
||||
if (pos >= it.bytes.len) {
|
||||
@@ -5527,6 +5128,16 @@ pub fn leftHasAnyInRight(to_check: []const string, against: []const string) bool
|
||||
return false;
|
||||
}
|
||||
|
||||
/// Returns true if the input has the prefix and the next character is not an identifier character
|
||||
/// Also returns true if the input ends with the prefix (i.e. EOF)
|
||||
///
|
||||
/// Example:
|
||||
/// ```zig
|
||||
/// // returns true
|
||||
/// hasPrefixWithWordBoundary("console.log", "console") // true
|
||||
/// hasPrefixWithWordBoundary("console.log", "log") // false
|
||||
/// hasPrefixWithWordBoundary("console.log", "console.log") // true
|
||||
/// ```
|
||||
pub fn hasPrefixWithWordBoundary(input: []const u8, comptime prefix: []const u8) bool {
|
||||
if (hasPrefixComptime(input, prefix)) {
|
||||
if (input.len == prefix.len) return true;
|
||||
@@ -5708,7 +5319,6 @@ pub fn mustEscapeYAMLString(contents: []const u8) bool {
|
||||
else => true,
|
||||
};
|
||||
}
|
||||
|
||||
pub fn pathContainsNodeModulesFolder(path: []const u8) bool {
|
||||
return strings.contains(path, comptime std.fs.path.sep_str ++ "node_modules" ++ std.fs.path.sep_str);
|
||||
}
|
||||
|
||||
353
test/bundler/bundler_comments.test.ts
Normal file
353
test/bundler/bundler_comments.test.ts
Normal file
@@ -0,0 +1,353 @@
|
||||
import { describe } from "bun:test";
|
||||
import { itBundled } from "./expectBundled";
|
||||
|
||||
describe("single-line comments", () => {
|
||||
itBundled("unix newlines", {
|
||||
files: {
|
||||
"/entry.js": `// This is a comment\nconsole.log("hello");\n// Another comment\n`,
|
||||
},
|
||||
onAfterBundle(api) {
|
||||
const output = api.readFile("/out.js");
|
||||
api.expectFile("/out.js").toContain("hello");
|
||||
},
|
||||
});
|
||||
|
||||
itBundled("windows newlines", {
|
||||
files: {
|
||||
"/entry.js": `// This is a comment\r\nconsole.log("hello");\r\n// Another comment\r\n`,
|
||||
},
|
||||
onAfterBundle(api) {
|
||||
api.expectFile("/out.js").toContain("hello");
|
||||
},
|
||||
});
|
||||
|
||||
itBundled("no trailing newline", {
|
||||
files: {
|
||||
"/entry.js": `// This is a comment\nconsole.log("hello");\n// No newline at end`,
|
||||
},
|
||||
onAfterBundle(api) {
|
||||
api.expectFile("/out.js").toContain("hello");
|
||||
},
|
||||
});
|
||||
|
||||
itBundled("non-ascii characters", {
|
||||
files: {
|
||||
"/entry.js": `// 你好,世界\n// Привет, мир\n// こんにちは世界\nconsole.log("hello");\n`,
|
||||
},
|
||||
onAfterBundle(api) {
|
||||
api.expectFile("/out.js").toContain("hello");
|
||||
},
|
||||
});
|
||||
|
||||
itBundled("emoji", {
|
||||
files: {
|
||||
"/entry.js": `// 🚀 🔥 💯\nconsole.log("hello");\n`,
|
||||
},
|
||||
onAfterBundle(api) {
|
||||
api.expectFile("/out.js").toContain("hello");
|
||||
},
|
||||
});
|
||||
|
||||
itBundled("invalid surrogate pair at beginning", {
|
||||
files: {
|
||||
"/entry.js": `// \uDC00 invalid surrogate\nconsole.log("hello");\n`,
|
||||
},
|
||||
onAfterBundle(api) {
|
||||
api.expectFile("/out.js").toContain("hello");
|
||||
},
|
||||
});
|
||||
|
||||
itBundled("invalid surrogate pair at end", {
|
||||
files: {
|
||||
"/entry.js": `// invalid surrogate \uD800\nconsole.log("hello");\n`,
|
||||
},
|
||||
onAfterBundle(api) {
|
||||
api.expectFile("/out.js").toContain("hello");
|
||||
},
|
||||
});
|
||||
|
||||
itBundled("invalid surrogate pair in middle", {
|
||||
files: {
|
||||
"/entry.js": `// invalid \uD800\uDC00\uD800 surrogate\nconsole.log("hello");\n`,
|
||||
},
|
||||
onAfterBundle(api) {
|
||||
api.expectFile("/out.js").toContain("hello");
|
||||
},
|
||||
});
|
||||
|
||||
itBundled("multiple comments on same line", {
|
||||
files: {
|
||||
"/entry.js": `const x = 5; // first comment // second comment\nconsole.log(x);\n`,
|
||||
},
|
||||
onAfterBundle(api) {
|
||||
api.expectFile("/out.js").toContain("console.log(x)");
|
||||
},
|
||||
});
|
||||
|
||||
itBundled("comment with ASI", {
|
||||
files: {
|
||||
"/entry.js": `const x = 5// first comment // second comment\nconsole.log(x)`,
|
||||
},
|
||||
onAfterBundle(api) {
|
||||
api.expectFile("/out.js").toContain("console.log(x)");
|
||||
},
|
||||
});
|
||||
|
||||
itBundled("comment at end of file without newline", {
|
||||
files: {
|
||||
"/entry.js": `console.log("hello"); //`,
|
||||
},
|
||||
onAfterBundle(api) {
|
||||
api.expectFile("/out.js").toContain("hello");
|
||||
},
|
||||
});
|
||||
|
||||
itBundled("empty comments", {
|
||||
files: {
|
||||
"/entry.js": `//\n//\nconsole.log("hello");\n//`,
|
||||
},
|
||||
onAfterBundle(api) {
|
||||
api.expectFile("/out.js").toContain("hello");
|
||||
},
|
||||
});
|
||||
|
||||
itBundled("comments with special characters", {
|
||||
files: {
|
||||
"/entry.js": `// Comment with \\ backslash\n// Comment with \" quote\n// Comment with \t tab\nconsole.log("hello");\n`,
|
||||
},
|
||||
onAfterBundle(api) {
|
||||
api.expectFile("/out.js").toContain("hello");
|
||||
},
|
||||
});
|
||||
|
||||
itBundled("comments with control characters", {
|
||||
files: {
|
||||
"/entry.js": `// Comment with \u0000 NULL\n// Comment with \u0001 SOH\nconsole.log("hello");\n`,
|
||||
},
|
||||
onAfterBundle(api) {
|
||||
api.expectFile("/out.js").toContain("hello");
|
||||
},
|
||||
});
|
||||
|
||||
itBundled("comments with minification", {
|
||||
files: {
|
||||
"/entry.js": `// This should be removed\nconsole.log("hello");\n// This too`,
|
||||
},
|
||||
minifyWhitespace: true,
|
||||
minifySyntax: true,
|
||||
onAfterBundle(api) {
|
||||
api.expectFile("/out.js").toEqualIgnoringWhitespace('console.log("hello");');
|
||||
},
|
||||
});
|
||||
|
||||
for (const minify of [true, false]) {
|
||||
itBundled(
|
||||
`some code and an empty comment without newline preceding ${minify ? "with minification" : "without minification"}`,
|
||||
{
|
||||
files: {
|
||||
"/entry.js": `console.log("hello");//`,
|
||||
},
|
||||
minifyWhitespace: minify,
|
||||
minifySyntax: minify,
|
||||
run: {
|
||||
stdout: "hello",
|
||||
},
|
||||
},
|
||||
);
|
||||
itBundled(`some code and then only an empty comment ${minify ? "with minification" : "without minification"}`, {
|
||||
files: {
|
||||
"/entry.js": `console.log("hello");\n//`,
|
||||
},
|
||||
minifyWhitespace: minify,
|
||||
minifySyntax: minify,
|
||||
run: {
|
||||
stdout: "hello",
|
||||
},
|
||||
});
|
||||
itBundled(`only an empty comment ${minify ? "with minification" : "without minification"}`, {
|
||||
files: {
|
||||
"/entry.js": `//`,
|
||||
},
|
||||
minifyWhitespace: minify,
|
||||
minifySyntax: minify,
|
||||
run: {
|
||||
stdout: "",
|
||||
},
|
||||
});
|
||||
itBundled("only a comment", {
|
||||
files: {
|
||||
"/entry.js": `// This is a comment`,
|
||||
},
|
||||
minifyWhitespace: true,
|
||||
minifySyntax: true,
|
||||
run: {
|
||||
stdout: "",
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
itBundled("trailing //# sourceMappingURL=", {
|
||||
files: {
|
||||
"/entry.js": `// This is a comment\nconsole.log("hello");\n//# sourceMappingURL=data:application/json;base64,eyJ2ZXJzaW9uIjozLCJmaWxlIjoiZXhhbXBsZS5qcyIsInNvdXJjZSI6Ii8vZXhhbXBsZS5qcyJ9`,
|
||||
},
|
||||
onAfterBundle(api) {
|
||||
api.expectFile("/out.js").toContain("hello");
|
||||
},
|
||||
});
|
||||
|
||||
itBundled("trailing //# sourceMappingURL= with == at end", {
|
||||
files: {
|
||||
"/entry.js": `// This is a comment\nconsole.log("hello");\n//# sourceMappingURL=data:application/json;base64,eyJ2ZXJzaW9uIjozLCJmaWxlIjoiZXhhbXBsZS5qcyIsInNvdXJjZSI6Ii8vZXhhbXBsZS5qcyJ9==`,
|
||||
},
|
||||
onAfterBundle(api) {
|
||||
api.expectFile("/out.js").toContain("hello");
|
||||
},
|
||||
});
|
||||
|
||||
itBundled("trailing //# sourceMappingURL= with = at end", {
|
||||
files: {
|
||||
"/entry.js": `// This is a comment\nconsole.log("hello");\n//# sourceMappingURL=data:application/json;base64,eyJ2ZXJzaW9uIjozLCJmaWxlIjoiZXhhbXBsZS5qcyIsInNvdXJjZSI6Ii8vZXhhbXBsZS5qcyJ9=`,
|
||||
},
|
||||
onAfterBundle(api) {
|
||||
api.expectFile("/out.js").toContain("hello");
|
||||
},
|
||||
});
|
||||
|
||||
itBundled("leading //# sourceMappingURL= with = at end", {
|
||||
files: {
|
||||
"/entry.js": `//# sourceMappingURL=data:application/json;base64,eyJ2ZXJzaW9uIjozLCJmaWxlIjoiZXhhbXBsZS5qcyIsInNvdXJjZSI6Ii8vZXhhbXBsZS5qcyJ9=\n// This is a comment\nconsole.log("hello");`,
|
||||
},
|
||||
onAfterBundle(api) {
|
||||
api.expectFile("/out.js").toContain("hello");
|
||||
},
|
||||
});
|
||||
|
||||
itBundled("leading trailing newline //# sourceMappingURL= with = at end", {
|
||||
files: {
|
||||
"/entry.js": `//# sourceMappingURL=data:application/json;base64,eyJ2ZXJzaW9uIjozLCJmaWxlIjoiZXhhbXBsZS5qcyIsInNvdXJjZSI6Ii8vZXhhbXBsZS5qcyJ9=\n// This is a comment\nconsole.log("hello");\n`,
|
||||
},
|
||||
onAfterBundle(api) {
|
||||
api.expectFile("/out.js").toContain("hello");
|
||||
},
|
||||
});
|
||||
|
||||
itBundled("leading newline and sourcemap, trailing newline //# sourceMappingURL= with = at end", {
|
||||
files: {
|
||||
"/entry.js": `\n//# sourceMappingURL=data:application/json;base64,eyJ2ZXJzaW9uIjozLCJmaWxlIjoiZXhhbXBsZS5qcyIsInNvdXJjZSI6Ii8vZXhhbXBsZS5qcyJ9=\n// This is a comment\nconsole.log("hello");\n`,
|
||||
},
|
||||
onAfterBundle(api) {
|
||||
api.expectFile("/out.js").toContain("hello");
|
||||
},
|
||||
});
|
||||
|
||||
itBundled("__PURE__ comment in single-line comment basic", {
|
||||
files: {
|
||||
"/entry.js": `//#__PURE__\nconsole.log("hello");`,
|
||||
},
|
||||
onAfterBundle(api) {
|
||||
api.expectFile("/out.js").not.toContain("hello");
|
||||
},
|
||||
});
|
||||
|
||||
itBundled("__PURE__ comment in single-line comment with spaces", {
|
||||
files: {
|
||||
"/entry.js": `// #__PURE__ \nconsole.log("hello");`,
|
||||
},
|
||||
onAfterBundle(api) {
|
||||
api.expectFile("/out.js").not.toContain("hello");
|
||||
},
|
||||
});
|
||||
|
||||
itBundled("__PURE__ comment in single-line comment with text before", {
|
||||
files: {
|
||||
"/entry.js": `// some text #__PURE__\nconsole.log("hello");`,
|
||||
},
|
||||
onAfterBundle(api) {
|
||||
api.expectFile("/out.js").not.toContain("hello");
|
||||
},
|
||||
});
|
||||
|
||||
itBundled("__PURE__ comment in single-line comment with text after", {
|
||||
files: {
|
||||
"/entry.js": `// #__PURE__ some text\nconsole.log("hello");`,
|
||||
},
|
||||
onAfterBundle(api) {
|
||||
api.expectFile("/out.js").not.toContain("hello");
|
||||
},
|
||||
});
|
||||
|
||||
itBundled("__PURE__ comment in single-line comment with unicode characters", {
|
||||
files: {
|
||||
"/entry.js": `// 你好 #__PURE__ 世界\nconsole.log("hello");`,
|
||||
},
|
||||
onAfterBundle(api) {
|
||||
api.expectFile("/out.js").not.toContain("hello");
|
||||
},
|
||||
});
|
||||
|
||||
itBundled("__PURE__ comment in single-line comment with emoji", {
|
||||
files: {
|
||||
"/entry.js": `// 🚀 #__PURE__ 🔥\nconsole.log("hello");`,
|
||||
},
|
||||
onAfterBundle(api) {
|
||||
api.expectFile("/out.js").not.toContain("hello");
|
||||
},
|
||||
});
|
||||
|
||||
itBundled("__PURE__ comment in single-line comment with invalid surrogate pair", {
|
||||
files: {
|
||||
"/entry.js": `// \uD800 #__PURE__ \uDC00\nconsole.log("hello");`,
|
||||
},
|
||||
onAfterBundle(api) {
|
||||
api.expectFile("/out.js").not.toContain("hello");
|
||||
},
|
||||
});
|
||||
|
||||
itBundled("multiple __PURE__ comments in single-line comments", {
|
||||
files: {
|
||||
"/entry.js": `//#__PURE__\nconsole.log("hello");\n//#__PURE__\nconsole.log("world");`,
|
||||
},
|
||||
onAfterBundle(api) {
|
||||
api.expectFile("/out.js").not.toContain("hello");
|
||||
api.expectFile("/out.js").not.toContain("world");
|
||||
},
|
||||
});
|
||||
|
||||
itBundled("__PURE__ comment in single-line comment with minification", {
|
||||
files: {
|
||||
"/entry.js": `//#__PURE__\nconsole.log("hello");`,
|
||||
},
|
||||
minifyWhitespace: true,
|
||||
minifySyntax: true,
|
||||
onAfterBundle(api) {
|
||||
api.expectFile("/out.js").not.toContain("hello");
|
||||
},
|
||||
});
|
||||
|
||||
itBundled("__PURE__ comment in single-line comment with windows newlines", {
|
||||
files: {
|
||||
"/entry.js": `//#__PURE__\r\nconsole.log("hello");`,
|
||||
},
|
||||
onAfterBundle(api) {
|
||||
api.expectFile("/out.js").not.toContain("hello");
|
||||
},
|
||||
});
|
||||
|
||||
itBundled("__PURE__ comment in single-line comment at end of file", {
|
||||
files: {
|
||||
"/entry.js": `console.log("hello");\n//#__PURE__`,
|
||||
},
|
||||
onAfterBundle(api) {
|
||||
api.expectFile("/out.js").toContain("hello");
|
||||
},
|
||||
});
|
||||
|
||||
itBundled("__PURE__ comment in single-line comment in middle of a statement", {
|
||||
files: {
|
||||
"/entry.js": `console.log(//#__PURE__\n123);`,
|
||||
},
|
||||
run: {
|
||||
stdout: "123",
|
||||
},
|
||||
});
|
||||
});
|
||||
@@ -204,7 +204,6 @@ describe("bundler", () => {
|
||||
`,
|
||||
});
|
||||
itBundledDevAndProd("jsx/Classic", {
|
||||
todo: true,
|
||||
files: {
|
||||
"/index.jsx": /* js*/ `
|
||||
import { print } from 'bun-test-helpers'
|
||||
@@ -226,7 +225,6 @@ describe("bundler", () => {
|
||||
},
|
||||
});
|
||||
itBundledDevAndProd("jsx/ClassicPragma", {
|
||||
todo: true,
|
||||
files: {
|
||||
"/index.jsx": /* js*/ `
|
||||
// @jsx fn
|
||||
@@ -298,7 +296,6 @@ describe("bundler", () => {
|
||||
`,
|
||||
});
|
||||
itBundledDevAndProd("jsx/Factory", {
|
||||
todo: true,
|
||||
files: {
|
||||
"/index.jsx": /* js*/ `
|
||||
const h = () => 'hello'
|
||||
@@ -322,7 +319,6 @@ describe("bundler", () => {
|
||||
},
|
||||
});
|
||||
itBundledDevAndProd("jsx/FactoryImport", {
|
||||
todo: false,
|
||||
files: {
|
||||
"/index.jsx": /* js*/ `
|
||||
import { h, fragment } from './jsx.ts';
|
||||
@@ -353,7 +349,6 @@ describe("bundler", () => {
|
||||
},
|
||||
});
|
||||
itBundledDevAndProd("jsx/FactoryImportExplicitReactDefault", {
|
||||
todo: false,
|
||||
files: {
|
||||
"/index.jsx": /* js*/ `
|
||||
import { print } from 'bun-test-helpers'
|
||||
@@ -374,7 +369,6 @@ describe("bundler", () => {
|
||||
},
|
||||
});
|
||||
itBundledDevAndProd("jsx/FactoryImportExplicitReactDefaultExternal", {
|
||||
todo: false,
|
||||
files: {
|
||||
"/index.jsx": /* js*/ `
|
||||
import { print } from 'bun-test-helpers'
|
||||
@@ -397,4 +391,24 @@ describe("bundler", () => {
|
||||
expect(file).toContain('import * as React from "react"');
|
||||
},
|
||||
});
|
||||
itBundled("jsx/jsxImportSource pragma works", {
|
||||
files: {
|
||||
"/index.jsx": /* jsx */ `
|
||||
// @jsxImportSource hello
|
||||
console.log(<div>Hello World</div>);
|
||||
`,
|
||||
"/node_modules/hello/jsx-dev-runtime.js": /* js */ `
|
||||
export function jsxDEV(type, props, key) {
|
||||
return {
|
||||
$$typeof: Symbol("hello_jsxDEV"), type, props, key
|
||||
}
|
||||
}
|
||||
`,
|
||||
},
|
||||
outdir: "/out",
|
||||
target: "browser",
|
||||
run: {
|
||||
stdout: `{\n $$typeof: Symbol(hello_jsxDEV),\n type: \"div\",\n props: {\n children: \"Hello World\",\n },\n key: undefined,\n}`,
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
@@ -12,7 +12,7 @@ const words: Record<string, { reason: string; limit?: number; regex?: boolean }>
|
||||
"std.debug.assert": { reason: "Use bun.assert instead", limit: 26 },
|
||||
"std.debug.dumpStackTrace": { reason: "Use bun.handleErrorReturnTrace or bun.crash_handler.dumpStackTrace instead" },
|
||||
"std.debug.print": { reason: "Don't let this be committed", limit: 0 },
|
||||
"std.mem.indexOfAny(u8": { reason: "Use bun.strings.indexOfAny", limit: 3 },
|
||||
"std.mem.indexOfAny(u8": { reason: "Use bun.strings.indexOfAny", limit: 2 },
|
||||
"std.StringArrayHashMapUnmanaged(": { reason: "bun.StringArrayHashMapUnmanaged has a faster `eql`", limit: 12 },
|
||||
"std.StringArrayHashMap(": { reason: "bun.StringArrayHashMap has a faster `eql`", limit: 1 },
|
||||
"std.StringHashMapUnmanaged(": { reason: "bun.StringHashMapUnmanaged has a faster `eql`" },
|
||||
|
||||
Reference in New Issue
Block a user