mirror of
https://github.com/oven-sh/bun
synced 2026-02-15 13:22:07 +00:00
Fix unicode imports, unicode-escaped variable names, and printClauseAlias not working for utf-8 (#15009)
This commit is contained in:
@@ -2152,6 +2152,20 @@ pub fn convertUTF16ToUTF8(list_: std.ArrayList(u8), comptime Type: type, utf16:
|
||||
return list;
|
||||
}
|
||||
|
||||
pub fn convertUTF16ToUTF8WithoutInvalidSurrogatePairs(list_: std.ArrayList(u8), comptime Type: type, utf16: Type) !std.ArrayList(u8) {
|
||||
var list = list_;
|
||||
const result = bun.simdutf.convert.utf16.to.utf8.with_errors.le(
|
||||
utf16,
|
||||
list.items.ptr[0..list.capacity],
|
||||
);
|
||||
if (result.status == .surrogate) {
|
||||
return error.SurrogatePair;
|
||||
}
|
||||
|
||||
list.items.len = result.count;
|
||||
return list;
|
||||
}
|
||||
|
||||
pub fn convertUTF16ToUTF8Append(list: *std.ArrayList(u8), utf16: []const u16) !void {
|
||||
const result = bun.simdutf.convert.utf16.to.utf8.with_errors.le(
|
||||
utf16,
|
||||
@@ -2167,6 +2181,20 @@ pub fn convertUTF16ToUTF8Append(list: *std.ArrayList(u8), utf16: []const u16) !v
|
||||
list.items.len += result.count;
|
||||
}
|
||||
|
||||
pub fn toUTF8AllocWithTypeWithoutInvalidSurrogatePairs(allocator: std.mem.Allocator, comptime Type: type, utf16: Type) ![]u8 {
|
||||
if (bun.FeatureFlags.use_simdutf and comptime Type == []const u16) {
|
||||
const length = bun.simdutf.length.utf8.from.utf16.le(utf16);
|
||||
// add 16 bytes of padding for SIMDUTF
|
||||
var list = try std.ArrayList(u8).initCapacity(allocator, length + 16);
|
||||
list = try convertUTF16ToUTF8(list, Type, utf16);
|
||||
return list.items;
|
||||
}
|
||||
|
||||
var list = try std.ArrayList(u8).initCapacity(allocator, utf16.len);
|
||||
list = try toUTF8ListWithType(list, Type, utf16);
|
||||
return list.items;
|
||||
}
|
||||
|
||||
pub fn toUTF8AllocWithType(allocator: std.mem.Allocator, comptime Type: type, utf16: Type) ![]u8 {
|
||||
if (bun.FeatureFlags.use_simdutf and comptime Type == []const u16) {
|
||||
const length = bun.simdutf.length.utf8.from.utf16.le(utf16);
|
||||
@@ -4230,21 +4258,30 @@ pub fn containsNewlineOrNonASCIIOrQuote(slice_: []const u8) bool {
|
||||
return false;
|
||||
}
|
||||
|
||||
pub fn indexOfNeedsEscape(slice: []const u8) ?u32 {
|
||||
pub fn indexOfNeedsEscape(slice: []const u8, comptime quote_char: u8) ?u32 {
|
||||
var remaining = slice;
|
||||
if (remaining.len == 0)
|
||||
return null;
|
||||
|
||||
if (remaining[0] >= 127 or remaining[0] < 0x20 or remaining[0] == '\\' or remaining[0] == '"') {
|
||||
if (remaining[0] >= 127 or remaining[0] < 0x20 or remaining[0] == '\\' or remaining[0] == quote_char or (quote_char == '`' and remaining[0] == '$')) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (comptime Environment.enableSIMD) {
|
||||
while (remaining.len >= ascii_vector_size) {
|
||||
const vec: AsciiVector = remaining[0..ascii_vector_size].*;
|
||||
const cmp = @as(AsciiVectorU1, @bitCast((vec > max_16_ascii))) | @as(AsciiVectorU1, @bitCast((vec < min_16_ascii))) |
|
||||
const cmp: AsciiVectorU1 = if (comptime quote_char == '`') ( //
|
||||
@as(AsciiVectorU1, @bitCast((vec > max_16_ascii))) |
|
||||
@as(AsciiVectorU1, @bitCast((vec < min_16_ascii))) |
|
||||
@as(AsciiVectorU1, @bitCast(vec == @as(AsciiVector, @splat(@as(u8, '\\'))))) |
|
||||
@as(AsciiVectorU1, @bitCast(vec == @as(AsciiVector, @splat(@as(u8, '"')))));
|
||||
@as(AsciiVectorU1, @bitCast(vec == @as(AsciiVector, @splat(@as(u8, quote_char))))) |
|
||||
@as(AsciiVectorU1, @bitCast(vec == @as(AsciiVector, @splat(@as(u8, '$'))))) //
|
||||
) else ( //
|
||||
@as(AsciiVectorU1, @bitCast((vec > max_16_ascii))) |
|
||||
@as(AsciiVectorU1, @bitCast((vec < min_16_ascii))) |
|
||||
@as(AsciiVectorU1, @bitCast(vec == @as(AsciiVector, @splat(@as(u8, '\\'))))) |
|
||||
@as(AsciiVectorU1, @bitCast(vec == @as(AsciiVector, @splat(@as(u8, quote_char))))) //
|
||||
);
|
||||
|
||||
if (@reduce(.Max, cmp) > 0) {
|
||||
const bitmask = @as(AsciiVectorInt, @bitCast(cmp));
|
||||
@@ -4259,7 +4296,7 @@ pub fn indexOfNeedsEscape(slice: []const u8) ?u32 {
|
||||
|
||||
for (remaining) |*char_| {
|
||||
const char = char_.*;
|
||||
if (char > 127 or char < 0x20 or char == '\\' or char == '"') {
|
||||
if (char > 127 or char < 0x20 or char == '\\' or char == quote_char or (quote_char == '`' and char == '$')) {
|
||||
return @as(u32, @truncate(@intFromPtr(char_) - @intFromPtr(slice.ptr)));
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user