mirror of
https://github.com/oven-sh/bun
synced 2026-02-10 19:08:50 +00:00
Split up string_immutable into more files (#20446)
Co-authored-by: Jarred-Sumner <709451+Jarred-Sumner@users.noreply.github.com>
This commit is contained in:
640
src/string/escapeHTML.zig
Normal file
640
src/string/escapeHTML.zig
Normal file
@@ -0,0 +1,640 @@
|
||||
pub fn escapeHTMLForLatin1Input(allocator: std.mem.Allocator, latin1: []const u8) !Escaped(u8) {
|
||||
const Scalar = struct {
|
||||
pub const lengths: [std.math.maxInt(u8) + 1]u4 = brk: {
|
||||
var values: [std.math.maxInt(u8) + 1]u4 = undefined;
|
||||
for (values, 0..) |_, i| {
|
||||
switch (i) {
|
||||
'"' => {
|
||||
values[i] = """.len;
|
||||
},
|
||||
'&' => {
|
||||
values[i] = "&".len;
|
||||
},
|
||||
'\'' => {
|
||||
values[i] = "'".len;
|
||||
},
|
||||
'<' => {
|
||||
values[i] = "<".len;
|
||||
},
|
||||
'>' => {
|
||||
values[i] = ">".len;
|
||||
},
|
||||
else => {
|
||||
values[i] = 1;
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
break :brk values;
|
||||
};
|
||||
|
||||
fn appendString(buf: [*]u8, comptime str: []const u8) callconv(bun.callconv_inline) usize {
|
||||
buf[0..str.len].* = str[0..str.len].*;
|
||||
return str.len;
|
||||
}
|
||||
|
||||
pub fn append(buf: [*]u8, char: u8) callconv(bun.callconv_inline) usize {
|
||||
if (lengths[char] == 1) {
|
||||
buf[0] = char;
|
||||
return 1;
|
||||
}
|
||||
|
||||
return switch (char) {
|
||||
'"' => appendString(buf, """),
|
||||
'&' => appendString(buf, "&"),
|
||||
'\'' => appendString(buf, "'"),
|
||||
'<' => appendString(buf, "<"),
|
||||
'>' => appendString(buf, ">"),
|
||||
else => unreachable,
|
||||
};
|
||||
}
|
||||
|
||||
pub fn push(comptime len: anytype, chars_: *const [len]u8, allo: std.mem.Allocator) callconv(bun.callconv_inline) Escaped(u8) {
|
||||
const chars = chars_.*;
|
||||
var total: usize = 0;
|
||||
|
||||
comptime var remain_to_comp = len;
|
||||
comptime var comp_i = 0;
|
||||
|
||||
inline while (remain_to_comp > 0) : (remain_to_comp -= 1) {
|
||||
total += lengths[chars[comp_i]];
|
||||
comp_i += 1;
|
||||
}
|
||||
|
||||
if (total == len) {
|
||||
return .{ .original = {} };
|
||||
}
|
||||
|
||||
const output = allo.alloc(u8, total) catch unreachable;
|
||||
var head = output.ptr;
|
||||
inline for (comptime bun.range(0, len)) |i| {
|
||||
head += @This().append(head, chars[i]);
|
||||
}
|
||||
|
||||
return Escaped(u8){ .allocated = output };
|
||||
}
|
||||
};
|
||||
@setEvalBranchQuota(5000);
|
||||
switch (latin1.len) {
|
||||
0 => return Escaped(u8){ .static = "" },
|
||||
1 => return switch (latin1[0]) {
|
||||
'"' => Escaped(u8){ .static = """ },
|
||||
'&' => Escaped(u8){ .static = "&" },
|
||||
'\'' => Escaped(u8){ .static = "'" },
|
||||
'<' => Escaped(u8){ .static = "<" },
|
||||
'>' => Escaped(u8){ .static = ">" },
|
||||
else => Escaped(u8){ .original = {} },
|
||||
},
|
||||
2 => {
|
||||
const first: []const u8 = switch (latin1[0]) {
|
||||
'"' => """,
|
||||
'&' => "&",
|
||||
'\'' => "'",
|
||||
'<' => "<",
|
||||
'>' => ">",
|
||||
else => latin1[0..1],
|
||||
};
|
||||
const second: []const u8 = switch (latin1[1]) {
|
||||
'"' => """,
|
||||
'&' => "&",
|
||||
'\'' => "'",
|
||||
'<' => "<",
|
||||
'>' => ">",
|
||||
else => latin1[1..2],
|
||||
};
|
||||
if (first.len == 1 and second.len == 1) {
|
||||
return Escaped(u8){ .original = {} };
|
||||
}
|
||||
|
||||
return Escaped(u8){ .allocated = strings.append(allocator, first, second) catch unreachable };
|
||||
},
|
||||
|
||||
// The simd implementation is slower for inputs less than 32 bytes.
|
||||
3 => return Scalar.push(3, latin1[0..3], allocator),
|
||||
4 => return Scalar.push(4, latin1[0..4], allocator),
|
||||
5 => return Scalar.push(5, latin1[0..5], allocator),
|
||||
6 => return Scalar.push(6, latin1[0..6], allocator),
|
||||
7 => return Scalar.push(7, latin1[0..7], allocator),
|
||||
8 => return Scalar.push(8, latin1[0..8], allocator),
|
||||
9 => return Scalar.push(9, latin1[0..9], allocator),
|
||||
10 => return Scalar.push(10, latin1[0..10], allocator),
|
||||
11 => return Scalar.push(11, latin1[0..11], allocator),
|
||||
12 => return Scalar.push(12, latin1[0..12], allocator),
|
||||
13 => return Scalar.push(13, latin1[0..13], allocator),
|
||||
14 => return Scalar.push(14, latin1[0..14], allocator),
|
||||
15 => return Scalar.push(15, latin1[0..15], allocator),
|
||||
16 => return Scalar.push(16, latin1[0..16], allocator),
|
||||
17 => return Scalar.push(17, latin1[0..17], allocator),
|
||||
18 => return Scalar.push(18, latin1[0..18], allocator),
|
||||
19 => return Scalar.push(19, latin1[0..19], allocator),
|
||||
20 => return Scalar.push(20, latin1[0..20], allocator),
|
||||
21 => return Scalar.push(21, latin1[0..21], allocator),
|
||||
22 => return Scalar.push(22, latin1[0..22], allocator),
|
||||
23 => return Scalar.push(23, latin1[0..23], allocator),
|
||||
24 => return Scalar.push(24, latin1[0..24], allocator),
|
||||
25 => return Scalar.push(25, latin1[0..25], allocator),
|
||||
26 => return Scalar.push(26, latin1[0..26], allocator),
|
||||
27 => return Scalar.push(27, latin1[0..27], allocator),
|
||||
28 => return Scalar.push(28, latin1[0..28], allocator),
|
||||
29 => return Scalar.push(29, latin1[0..29], allocator),
|
||||
30 => return Scalar.push(30, latin1[0..30], allocator),
|
||||
31 => return Scalar.push(31, latin1[0..31], allocator),
|
||||
32 => return Scalar.push(32, latin1[0..32], allocator),
|
||||
|
||||
else => {
|
||||
var remaining = latin1;
|
||||
|
||||
const vec_chars = "\"&'<>";
|
||||
const vecs: [vec_chars.len]AsciiVector = comptime brk: {
|
||||
var _vecs: [vec_chars.len]AsciiVector = undefined;
|
||||
for (vec_chars, 0..) |c, i| {
|
||||
_vecs[i] = @splat(c);
|
||||
}
|
||||
break :brk _vecs;
|
||||
};
|
||||
|
||||
var any_needs_escape = false;
|
||||
var buf: std.ArrayList(u8) = std.ArrayList(u8){
|
||||
.items = &.{},
|
||||
.capacity = 0,
|
||||
.allocator = allocator,
|
||||
};
|
||||
|
||||
if (comptime Environment.enableSIMD) {
|
||||
// pass #1: scan for any characters that need escaping
|
||||
// assume most strings won't need any escaping, so don't actually allocate the buffer
|
||||
scan_and_allocate_lazily: while (remaining.len >= ascii_vector_size) {
|
||||
if (comptime Environment.allow_assert) assert(!any_needs_escape);
|
||||
const vec: AsciiVector = remaining[0..ascii_vector_size].*;
|
||||
if (@reduce(.Max, @as(AsciiVectorU1, @bitCast((vec == vecs[0]))) |
|
||||
@as(AsciiVectorU1, @bitCast((vec == vecs[1]))) |
|
||||
@as(AsciiVectorU1, @bitCast((vec == vecs[2]))) |
|
||||
@as(AsciiVectorU1, @bitCast((vec == vecs[3]))) |
|
||||
@as(AsciiVectorU1, @bitCast((vec == vecs[4])))) == 1)
|
||||
{
|
||||
if (comptime Environment.allow_assert) assert(buf.capacity == 0);
|
||||
|
||||
buf = try std.ArrayList(u8).initCapacity(allocator, latin1.len + 6);
|
||||
const copy_len = @intFromPtr(remaining.ptr) - @intFromPtr(latin1.ptr);
|
||||
buf.appendSliceAssumeCapacity(latin1[0..copy_len]);
|
||||
any_needs_escape = true;
|
||||
inline for (0..ascii_vector_size) |i| {
|
||||
switch (vec[i]) {
|
||||
'"' => {
|
||||
buf.ensureUnusedCapacity((ascii_vector_size - i) + """.len) catch unreachable;
|
||||
buf.items.ptr[buf.items.len .. buf.items.len + """.len][0..""".len].* = """.*;
|
||||
buf.items.len += """.len;
|
||||
},
|
||||
'&' => {
|
||||
buf.ensureUnusedCapacity((ascii_vector_size - i) + "&".len) catch unreachable;
|
||||
buf.items.ptr[buf.items.len .. buf.items.len + "&".len][0.."&".len].* = "&".*;
|
||||
buf.items.len += "&".len;
|
||||
},
|
||||
'\'' => {
|
||||
buf.ensureUnusedCapacity((ascii_vector_size - i) + "'".len) catch unreachable;
|
||||
buf.items.ptr[buf.items.len .. buf.items.len + "'".len][0.."'".len].* = "'".*;
|
||||
buf.items.len += "'".len;
|
||||
},
|
||||
'<' => {
|
||||
buf.ensureUnusedCapacity((ascii_vector_size - i) + "<".len) catch unreachable;
|
||||
buf.items.ptr[buf.items.len .. buf.items.len + "<".len][0.."<".len].* = "<".*;
|
||||
buf.items.len += "<".len;
|
||||
},
|
||||
'>' => {
|
||||
buf.ensureUnusedCapacity((ascii_vector_size - i) + ">".len) catch unreachable;
|
||||
buf.items.ptr[buf.items.len .. buf.items.len + ">".len][0..">".len].* = ">".*;
|
||||
buf.items.len += ">".len;
|
||||
},
|
||||
else => |c| {
|
||||
buf.appendAssumeCapacity(c);
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
remaining = remaining[ascii_vector_size..];
|
||||
break :scan_and_allocate_lazily;
|
||||
}
|
||||
|
||||
remaining = remaining[ascii_vector_size..];
|
||||
}
|
||||
}
|
||||
|
||||
if (any_needs_escape) {
|
||||
// pass #2: we found something that needed an escape
|
||||
// so we'll go ahead and copy the buffer into a new buffer
|
||||
while (remaining.len >= ascii_vector_size) {
|
||||
const vec: AsciiVector = remaining[0..ascii_vector_size].*;
|
||||
if (@reduce(.Max, @as(AsciiVectorU1, @bitCast((vec == vecs[0]))) |
|
||||
@as(AsciiVectorU1, @bitCast((vec == vecs[1]))) |
|
||||
@as(AsciiVectorU1, @bitCast((vec == vecs[2]))) |
|
||||
@as(AsciiVectorU1, @bitCast((vec == vecs[3]))) |
|
||||
@as(AsciiVectorU1, @bitCast((vec == vecs[4])))) == 1)
|
||||
{
|
||||
buf.ensureUnusedCapacity(ascii_vector_size + 6) catch unreachable;
|
||||
inline for (0..ascii_vector_size) |i| {
|
||||
switch (vec[i]) {
|
||||
'"' => {
|
||||
buf.ensureUnusedCapacity((ascii_vector_size - i) + """.len) catch unreachable;
|
||||
buf.items.ptr[buf.items.len .. buf.items.len + """.len][0..""".len].* = """.*;
|
||||
buf.items.len += """.len;
|
||||
},
|
||||
'&' => {
|
||||
buf.ensureUnusedCapacity((ascii_vector_size - i) + "&".len) catch unreachable;
|
||||
buf.items.ptr[buf.items.len .. buf.items.len + "&".len][0.."&".len].* = "&".*;
|
||||
buf.items.len += "&".len;
|
||||
},
|
||||
'\'' => {
|
||||
buf.ensureUnusedCapacity((ascii_vector_size - i) + "'".len) catch unreachable;
|
||||
buf.items.ptr[buf.items.len .. buf.items.len + "'".len][0.."'".len].* = "'".*;
|
||||
buf.items.len += "'".len;
|
||||
},
|
||||
'<' => {
|
||||
buf.ensureUnusedCapacity((ascii_vector_size - i) + "<".len) catch unreachable;
|
||||
buf.items.ptr[buf.items.len .. buf.items.len + "<".len][0.."<".len].* = "<".*;
|
||||
buf.items.len += "<".len;
|
||||
},
|
||||
'>' => {
|
||||
buf.ensureUnusedCapacity((ascii_vector_size - i) + ">".len) catch unreachable;
|
||||
buf.items.ptr[buf.items.len .. buf.items.len + ">".len][0..">".len].* = ">".*;
|
||||
buf.items.len += ">".len;
|
||||
},
|
||||
else => |c| {
|
||||
buf.appendAssumeCapacity(c);
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
remaining = remaining[ascii_vector_size..];
|
||||
continue;
|
||||
}
|
||||
|
||||
try buf.ensureUnusedCapacity(ascii_vector_size);
|
||||
buf.items.ptr[buf.items.len .. buf.items.len + ascii_vector_size][0..ascii_vector_size].* = remaining[0..ascii_vector_size].*;
|
||||
buf.items.len += ascii_vector_size;
|
||||
remaining = remaining[ascii_vector_size..];
|
||||
}
|
||||
}
|
||||
|
||||
var ptr = remaining.ptr;
|
||||
const end = remaining.ptr + remaining.len;
|
||||
|
||||
if (!any_needs_escape) {
|
||||
scan_and_allocate_lazily: while (ptr != end) : (ptr += 1) {
|
||||
switch (ptr[0]) {
|
||||
'"', '&', '\'', '<', '>' => |c| {
|
||||
if (comptime Environment.allow_assert) assert(buf.capacity == 0);
|
||||
|
||||
buf = try std.ArrayList(u8).initCapacity(allocator, latin1.len + @as(usize, Scalar.lengths[c]));
|
||||
const copy_len = @intFromPtr(ptr) - @intFromPtr(latin1.ptr);
|
||||
if (comptime Environment.allow_assert) assert(copy_len <= buf.capacity);
|
||||
buf.items.len = copy_len;
|
||||
@memcpy(buf.items[0..copy_len], latin1[0..copy_len]);
|
||||
any_needs_escape = true;
|
||||
break :scan_and_allocate_lazily;
|
||||
},
|
||||
else => {},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
while (ptr != end) : (ptr += 1) {
|
||||
switch (ptr[0]) {
|
||||
'"' => {
|
||||
buf.appendSlice(""") catch unreachable;
|
||||
},
|
||||
'&' => {
|
||||
buf.appendSlice("&") catch unreachable;
|
||||
},
|
||||
'\'' => {
|
||||
buf.appendSlice("'") catch unreachable; // modified from escape-html; used to be '''
|
||||
},
|
||||
'<' => {
|
||||
buf.appendSlice("<") catch unreachable;
|
||||
},
|
||||
'>' => {
|
||||
buf.appendSlice(">") catch unreachable;
|
||||
},
|
||||
else => |c| {
|
||||
buf.append(c) catch unreachable;
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
if (!any_needs_escape) {
|
||||
if (comptime Environment.allow_assert) assert(buf.capacity == 0);
|
||||
return Escaped(u8){ .original = {} };
|
||||
}
|
||||
|
||||
return Escaped(u8){ .allocated = try buf.toOwnedSlice() };
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
fn Escaped(comptime T: type) type {
|
||||
return union(enum) {
|
||||
static: []const u8,
|
||||
original: void,
|
||||
allocated: []T,
|
||||
};
|
||||
}
|
||||
|
||||
pub fn escapeHTMLForUTF16Input(allocator: std.mem.Allocator, utf16: []const u16) !Escaped(u16) {
|
||||
const Scalar = struct {
|
||||
pub const lengths: [std.math.maxInt(u8) + 1]u4 = brk: {
|
||||
var values: [std.math.maxInt(u8) + 1]u4 = undefined;
|
||||
for (values, 0..) |_, i| {
|
||||
values[i] = switch (i) {
|
||||
'"' => """.len,
|
||||
'&' => "&".len,
|
||||
'\'' => "'".len,
|
||||
'<' => "<".len,
|
||||
'>' => ">".len,
|
||||
else => 1,
|
||||
};
|
||||
}
|
||||
|
||||
break :brk values;
|
||||
};
|
||||
};
|
||||
switch (utf16.len) {
|
||||
0 => return Escaped(u16){ .static = &[_]u8{} },
|
||||
1 => {
|
||||
switch (utf16[0]) {
|
||||
'"' => return Escaped(u16){ .static = """ },
|
||||
'&' => return Escaped(u16){ .static = "&" },
|
||||
'\'' => return Escaped(u16){ .static = "'" },
|
||||
'<' => return Escaped(u16){ .static = "<" },
|
||||
'>' => return Escaped(u16){ .static = ">" },
|
||||
else => return Escaped(u16){ .original = {} },
|
||||
}
|
||||
},
|
||||
2 => {
|
||||
const first_16 = switch (utf16[0]) {
|
||||
'"' => toUTF16Literal("""),
|
||||
'&' => toUTF16Literal("&"),
|
||||
'\'' => toUTF16Literal("'"),
|
||||
'<' => toUTF16Literal("<"),
|
||||
'>' => toUTF16Literal(">"),
|
||||
else => @as([]const u16, utf16[0..1]),
|
||||
};
|
||||
|
||||
const second_16 = switch (utf16[1]) {
|
||||
'"' => toUTF16Literal("""),
|
||||
'&' => toUTF16Literal("&"),
|
||||
'\'' => toUTF16Literal("'"),
|
||||
'<' => toUTF16Literal("<"),
|
||||
'>' => toUTF16Literal(">"),
|
||||
else => @as([]const u16, utf16[1..2]),
|
||||
};
|
||||
|
||||
if (first_16.ptr == utf16.ptr and second_16.ptr == utf16.ptr + 1) {
|
||||
return Escaped(u16){ .original = {} };
|
||||
}
|
||||
|
||||
var buf = allocator.alloc(u16, first_16.len + second_16.len) catch unreachable;
|
||||
bun.copy(u16, buf, first_16);
|
||||
bun.copy(u16, buf[first_16.len..], second_16);
|
||||
return Escaped(u16){ .allocated = buf };
|
||||
},
|
||||
|
||||
else => {
|
||||
var remaining = utf16;
|
||||
|
||||
var any_needs_escape = false;
|
||||
var buf: std.ArrayList(u16) = undefined;
|
||||
|
||||
if (comptime Environment.enableSIMD) {
|
||||
const vec_chars = "\"&'<>";
|
||||
const vecs: [vec_chars.len]AsciiU16Vector = brk: {
|
||||
var _vecs: [vec_chars.len]AsciiU16Vector = undefined;
|
||||
for (vec_chars, 0..) |c, i| {
|
||||
_vecs[i] = @splat(@as(u16, c));
|
||||
}
|
||||
break :brk _vecs;
|
||||
};
|
||||
// pass #1: scan for any characters that need escaping
|
||||
// assume most strings won't need any escaping, so don't actually allocate the buffer
|
||||
scan_and_allocate_lazily: while (remaining.len >= ascii_u16_vector_size) {
|
||||
if (comptime Environment.allow_assert) assert(!any_needs_escape);
|
||||
const vec: AsciiU16Vector = remaining[0..ascii_u16_vector_size].*;
|
||||
if (@reduce(.Max, @as(AsciiVectorU16U1, @bitCast(vec > @as(AsciiU16Vector, @splat(@as(u16, 127))))) |
|
||||
@as(AsciiVectorU16U1, @bitCast((vec == vecs[0]))) |
|
||||
@as(AsciiVectorU16U1, @bitCast((vec == vecs[1]))) |
|
||||
@as(AsciiVectorU16U1, @bitCast((vec == vecs[2]))) |
|
||||
@as(AsciiVectorU16U1, @bitCast((vec == vecs[3]))) |
|
||||
@as(AsciiVectorU16U1, @bitCast((vec == vecs[4])))) == 1)
|
||||
{
|
||||
var i: u16 = 0;
|
||||
lazy: {
|
||||
while (i < ascii_u16_vector_size) {
|
||||
switch (remaining[i]) {
|
||||
'"', '&', '\'', '<', '>' => {
|
||||
any_needs_escape = true;
|
||||
break :lazy;
|
||||
},
|
||||
128...std.math.maxInt(u16) => {
|
||||
const cp = utf16Codepoint([]const u16, remaining[i..]);
|
||||
i += @as(u16, cp.len);
|
||||
},
|
||||
else => {
|
||||
i += 1;
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!any_needs_escape) {
|
||||
remaining = remaining[i..];
|
||||
continue :scan_and_allocate_lazily;
|
||||
}
|
||||
|
||||
if (comptime Environment.allow_assert) assert(@intFromPtr(remaining.ptr + i) >= @intFromPtr(utf16.ptr));
|
||||
const to_copy = std.mem.sliceAsBytes(utf16)[0 .. @intFromPtr(remaining.ptr + i) - @intFromPtr(utf16.ptr)];
|
||||
const to_copy_16 = std.mem.bytesAsSlice(u16, to_copy);
|
||||
buf = try std.ArrayList(u16).initCapacity(allocator, utf16.len + 6);
|
||||
try buf.appendSlice(to_copy_16);
|
||||
|
||||
while (i < ascii_u16_vector_size) {
|
||||
switch (remaining[i]) {
|
||||
'"', '&', '\'', '<', '>' => |c| {
|
||||
const result = switch (c) {
|
||||
'"' => toUTF16Literal("""),
|
||||
'&' => toUTF16Literal("&"),
|
||||
'\'' => toUTF16Literal("'"),
|
||||
'<' => toUTF16Literal("<"),
|
||||
'>' => toUTF16Literal(">"),
|
||||
else => unreachable,
|
||||
};
|
||||
|
||||
buf.appendSlice(result) catch unreachable;
|
||||
i += 1;
|
||||
},
|
||||
128...std.math.maxInt(u16) => {
|
||||
const cp = utf16Codepoint([]const u16, remaining[i..]);
|
||||
|
||||
buf.appendSlice(remaining[i..][0..@as(usize, cp.len)]) catch unreachable;
|
||||
i += @as(u16, cp.len);
|
||||
},
|
||||
else => |c| {
|
||||
i += 1;
|
||||
buf.append(c) catch unreachable;
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// edgecase: code point width could exceed asdcii_u16_vector_size
|
||||
remaining = remaining[i..];
|
||||
break :scan_and_allocate_lazily;
|
||||
}
|
||||
|
||||
remaining = remaining[ascii_u16_vector_size..];
|
||||
}
|
||||
|
||||
if (any_needs_escape) {
|
||||
// pass #2: we found something that needed an escape
|
||||
// but there's still some more text to
|
||||
// so we'll go ahead and copy the buffer into a new buffer
|
||||
while (remaining.len >= ascii_u16_vector_size) {
|
||||
const vec: AsciiU16Vector = remaining[0..ascii_u16_vector_size].*;
|
||||
if (@reduce(.Max, @as(AsciiVectorU16U1, @bitCast(vec > @as(AsciiU16Vector, @splat(@as(u16, 127))))) |
|
||||
@as(AsciiVectorU16U1, @bitCast((vec == vecs[0]))) |
|
||||
@as(AsciiVectorU16U1, @bitCast((vec == vecs[1]))) |
|
||||
@as(AsciiVectorU16U1, @bitCast((vec == vecs[2]))) |
|
||||
@as(AsciiVectorU16U1, @bitCast((vec == vecs[3]))) |
|
||||
@as(AsciiVectorU16U1, @bitCast((vec == vecs[4])))) == 1)
|
||||
{
|
||||
buf.ensureUnusedCapacity(ascii_u16_vector_size) catch unreachable;
|
||||
var i: u16 = 0;
|
||||
while (i < ascii_u16_vector_size) {
|
||||
switch (remaining[i]) {
|
||||
'"' => {
|
||||
buf.appendSlice(toUTF16Literal(""")) catch unreachable;
|
||||
i += 1;
|
||||
},
|
||||
'&' => {
|
||||
buf.appendSlice(toUTF16Literal("&")) catch unreachable;
|
||||
i += 1;
|
||||
},
|
||||
'\'' => {
|
||||
buf.appendSlice(toUTF16Literal("'")) catch unreachable; // modified from escape-html; used to be '''
|
||||
i += 1;
|
||||
},
|
||||
'<' => {
|
||||
buf.appendSlice(toUTF16Literal("<")) catch unreachable;
|
||||
i += 1;
|
||||
},
|
||||
'>' => {
|
||||
buf.appendSlice(toUTF16Literal(">")) catch unreachable;
|
||||
i += 1;
|
||||
},
|
||||
128...std.math.maxInt(u16) => {
|
||||
const cp = utf16Codepoint([]const u16, remaining[i..]);
|
||||
|
||||
buf.appendSlice(remaining[i..][0..@as(usize, cp.len)]) catch unreachable;
|
||||
i += @as(u16, cp.len);
|
||||
},
|
||||
else => |c| {
|
||||
buf.append(c) catch unreachable;
|
||||
i += 1;
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
remaining = remaining[i..];
|
||||
continue;
|
||||
}
|
||||
|
||||
try buf.ensureUnusedCapacity(ascii_u16_vector_size);
|
||||
buf.items.ptr[buf.items.len .. buf.items.len + ascii_u16_vector_size][0..ascii_u16_vector_size].* = remaining[0..ascii_u16_vector_size].*;
|
||||
buf.items.len += ascii_u16_vector_size;
|
||||
remaining = remaining[ascii_u16_vector_size..];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var ptr = remaining.ptr;
|
||||
const end = remaining.ptr + remaining.len;
|
||||
|
||||
if (!any_needs_escape) {
|
||||
scan_and_allocate_lazily: while (ptr != end) {
|
||||
switch (ptr[0]) {
|
||||
'"', '&', '\'', '<', '>' => |c| {
|
||||
buf = try std.ArrayList(u16).initCapacity(allocator, utf16.len + @as(usize, Scalar.lengths[c]));
|
||||
if (comptime Environment.allow_assert) assert(@intFromPtr(ptr) >= @intFromPtr(utf16.ptr));
|
||||
|
||||
const to_copy = std.mem.sliceAsBytes(utf16)[0 .. @intFromPtr(ptr) - @intFromPtr(utf16.ptr)];
|
||||
const to_copy_16 = std.mem.bytesAsSlice(u16, to_copy);
|
||||
try buf.appendSlice(to_copy_16);
|
||||
any_needs_escape = true;
|
||||
break :scan_and_allocate_lazily;
|
||||
},
|
||||
128...std.math.maxInt(u16) => {
|
||||
const cp = utf16Codepoint([]const u16, ptr[0..if (ptr + 1 == end) 1 else 2]);
|
||||
|
||||
ptr += @as(u16, cp.len);
|
||||
},
|
||||
else => {
|
||||
ptr += 1;
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
while (ptr != end) {
|
||||
switch (ptr[0]) {
|
||||
'"' => {
|
||||
buf.appendSlice(toUTF16Literal(""")) catch unreachable;
|
||||
ptr += 1;
|
||||
},
|
||||
'&' => {
|
||||
buf.appendSlice(toUTF16Literal("&")) catch unreachable;
|
||||
ptr += 1;
|
||||
},
|
||||
'\'' => {
|
||||
buf.appendSlice(toUTF16Literal("'")) catch unreachable; // modified from escape-html; used to be '''
|
||||
ptr += 1;
|
||||
},
|
||||
'<' => {
|
||||
buf.appendSlice(toUTF16Literal("<")) catch unreachable;
|
||||
ptr += 1;
|
||||
},
|
||||
'>' => {
|
||||
buf.appendSlice(toUTF16Literal(">")) catch unreachable;
|
||||
ptr += 1;
|
||||
},
|
||||
128...std.math.maxInt(u16) => {
|
||||
const cp = utf16Codepoint([]const u16, ptr[0..if (ptr + 1 == end) 1 else 2]);
|
||||
|
||||
buf.appendSlice(ptr[0..@as(usize, cp.len)]) catch unreachable;
|
||||
ptr += @as(u16, cp.len);
|
||||
},
|
||||
|
||||
else => |c| {
|
||||
buf.append(c) catch unreachable;
|
||||
ptr += 1;
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
if (!any_needs_escape) {
|
||||
return Escaped(u16){ .original = {} };
|
||||
}
|
||||
|
||||
return Escaped(u16){ .allocated = try buf.toOwnedSlice() };
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
const std = @import("std");
|
||||
const bun = @import("bun");
|
||||
const Environment = bun.Environment;
|
||||
const assert = bun.assert;
|
||||
const ascii_u16_vector_size = strings.ascii_u16_vector_size;
|
||||
const AsciiU16Vector = strings.AsciiU16Vector;
|
||||
const utf16Codepoint = strings.utf16Codepoint;
|
||||
const toUTF16Literal = strings.toUTF16Literal;
|
||||
const strings = bun.strings;
|
||||
const AsciiVectorU16U1 = strings.AsciiVectorU16U1;
|
||||
const AsciiVector = strings.AsciiVector;
|
||||
const ascii_vector_size = strings.ascii_vector_size;
|
||||
const AsciiVectorU1 = strings.AsciiVectorU1;
|
||||
Reference in New Issue
Block a user