Fix unicode imports, unicode-escaped variable names, and printClauseAlias not working for utf-8 (#15009)

This commit is contained in:
pfg
2024-11-11 13:27:42 -08:00
committed by GitHub
parent 62cabe9003
commit 56f7c8887b
28 changed files with 646 additions and 995 deletions

View File

@@ -2152,6 +2152,20 @@ pub fn convertUTF16ToUTF8(list_: std.ArrayList(u8), comptime Type: type, utf16:
return list;
}
pub fn convertUTF16ToUTF8WithoutInvalidSurrogatePairs(list_: std.ArrayList(u8), comptime Type: type, utf16: Type) !std.ArrayList(u8) {
var list = list_;
const result = bun.simdutf.convert.utf16.to.utf8.with_errors.le(
utf16,
list.items.ptr[0..list.capacity],
);
if (result.status == .surrogate) {
return error.SurrogatePair;
}
list.items.len = result.count;
return list;
}
pub fn convertUTF16ToUTF8Append(list: *std.ArrayList(u8), utf16: []const u16) !void {
const result = bun.simdutf.convert.utf16.to.utf8.with_errors.le(
utf16,
@@ -2167,6 +2181,20 @@ pub fn convertUTF16ToUTF8Append(list: *std.ArrayList(u8), utf16: []const u16) !v
list.items.len += result.count;
}
pub fn toUTF8AllocWithTypeWithoutInvalidSurrogatePairs(allocator: std.mem.Allocator, comptime Type: type, utf16: Type) ![]u8 {
if (bun.FeatureFlags.use_simdutf and comptime Type == []const u16) {
const length = bun.simdutf.length.utf8.from.utf16.le(utf16);
// add 16 bytes of padding for SIMDUTF
var list = try std.ArrayList(u8).initCapacity(allocator, length + 16);
list = try convertUTF16ToUTF8(list, Type, utf16);
return list.items;
}
var list = try std.ArrayList(u8).initCapacity(allocator, utf16.len);
list = try toUTF8ListWithType(list, Type, utf16);
return list.items;
}
pub fn toUTF8AllocWithType(allocator: std.mem.Allocator, comptime Type: type, utf16: Type) ![]u8 {
if (bun.FeatureFlags.use_simdutf and comptime Type == []const u16) {
const length = bun.simdutf.length.utf8.from.utf16.le(utf16);
@@ -4230,21 +4258,30 @@ pub fn containsNewlineOrNonASCIIOrQuote(slice_: []const u8) bool {
return false;
}
pub fn indexOfNeedsEscape(slice: []const u8) ?u32 {
pub fn indexOfNeedsEscape(slice: []const u8, comptime quote_char: u8) ?u32 {
var remaining = slice;
if (remaining.len == 0)
return null;
if (remaining[0] >= 127 or remaining[0] < 0x20 or remaining[0] == '\\' or remaining[0] == '"') {
if (remaining[0] >= 127 or remaining[0] < 0x20 or remaining[0] == '\\' or remaining[0] == quote_char or (quote_char == '`' and remaining[0] == '$')) {
return 0;
}
if (comptime Environment.enableSIMD) {
while (remaining.len >= ascii_vector_size) {
const vec: AsciiVector = remaining[0..ascii_vector_size].*;
const cmp = @as(AsciiVectorU1, @bitCast((vec > max_16_ascii))) | @as(AsciiVectorU1, @bitCast((vec < min_16_ascii))) |
const cmp: AsciiVectorU1 = if (comptime quote_char == '`') ( //
@as(AsciiVectorU1, @bitCast((vec > max_16_ascii))) |
@as(AsciiVectorU1, @bitCast((vec < min_16_ascii))) |
@as(AsciiVectorU1, @bitCast(vec == @as(AsciiVector, @splat(@as(u8, '\\'))))) |
@as(AsciiVectorU1, @bitCast(vec == @as(AsciiVector, @splat(@as(u8, '"')))));
@as(AsciiVectorU1, @bitCast(vec == @as(AsciiVector, @splat(@as(u8, quote_char))))) |
@as(AsciiVectorU1, @bitCast(vec == @as(AsciiVector, @splat(@as(u8, '$'))))) //
) else ( //
@as(AsciiVectorU1, @bitCast((vec > max_16_ascii))) |
@as(AsciiVectorU1, @bitCast((vec < min_16_ascii))) |
@as(AsciiVectorU1, @bitCast(vec == @as(AsciiVector, @splat(@as(u8, '\\'))))) |
@as(AsciiVectorU1, @bitCast(vec == @as(AsciiVector, @splat(@as(u8, quote_char))))) //
);
if (@reduce(.Max, cmp) > 0) {
const bitmask = @as(AsciiVectorInt, @bitCast(cmp));
@@ -4259,7 +4296,7 @@ pub fn indexOfNeedsEscape(slice: []const u8) ?u32 {
for (remaining) |*char_| {
const char = char_.*;
if (char > 127 or char < 0x20 or char == '\\' or char == '"') {
if (char > 127 or char < 0x20 or char == '\\' or char == quote_char or (quote_char == '`' and char == '$')) {
return @as(u32, @truncate(@intFromPtr(char_) - @intFromPtr(slice.ptr)));
}
}