mirror of
https://github.com/oven-sh/bun
synced 2026-02-12 20:09:04 +00:00
Web Streams API (#176)
* [bun.js] `WritableStream`, `ReadableStream`, `TransformStream`, `WritableStreamDefaultController`, `ReadableStreamDefaultController` & more * Implement `Blob.stream()` * Update streams.test.js * Fix sourcemaps crash * [TextEncoder] 3x faster in hot loops * reading almost works * start to implement native streams * Implement `Blob.stream()` * Implement `Bun.file(pathOrFd).stream()` * Add an extra function * [fs.readFile] Improve performance * make jsc bindings a little easier to work with * fix segfault * faster async/await + readablestream optimizations * WebKit updates * More WebKit updates * Add releaseWEakrefs binding * `bun:jsc` * More streams * Update streams.test.js * Update Makefile * Update mimalloc * Update WebKit * Create bun-jsc.test.js * Faster ReadableStream * Fix off by one & exceptions * Handle empty files/blobs * Update streams.test.js * Move streams to it's own file * temp * impl #1 * take two * good enough for now * Implement `readableStreamToArray`, `readableStreamToArrayBuffer`, `concatArrayBuffers` * jsxOptimizationInlining * Fix crash * Add `jsxOptimizationInline` to Bun.Transpiler * Update Transpiler types * Update js_ast.zig * Automatically choose production mode when NODE_ENV="production" * Update cli.zig * [jsx] Handle defaultProps when inlining * Update transpiler.test.js * uncomment some tests Co-authored-by: Jarred Sumner <709451+Jarred-Sumner@users.noreply.github.com>
This commit is contained in:
@@ -15,6 +15,10 @@ pub inline fn contains(self: string, str: string) bool {
|
||||
return std.mem.indexOf(u8, self, str) != null;
|
||||
}
|
||||
|
||||
pub fn toUTF16Literal(comptime str: []const u8) []const u16 {
|
||||
return comptime std.unicode.utf8ToUtf16LeStringLiteral(str);
|
||||
}
|
||||
|
||||
const OptionalUsize = std.meta.Int(.unsigned, @bitSizeOf(usize) - 1);
|
||||
pub fn indexOfAny(self: string, comptime str: anytype) ?OptionalUsize {
|
||||
for (self) |c, i| {
|
||||
@@ -108,7 +112,7 @@ pub inline fn indexOf(self: string, str: string) ?usize {
|
||||
}
|
||||
|
||||
// --
|
||||
// This is faster when the string is found, by about 2x for a 4 MB file.
|
||||
// This is faster when the string is found, by about 2x for a 8 MB file.
|
||||
// It is slower when the string is NOT found
|
||||
// fn indexOfPosN(comptime T: type, buf: []const u8, start_index: usize, delimiter: []const u8, comptime n: comptime_int) ?usize {
|
||||
// const k = delimiter.len;
|
||||
@@ -324,7 +328,7 @@ test "eqlComptimeCheckLen" {
|
||||
}
|
||||
|
||||
test "eqlComptimeUTF16" {
|
||||
try std.testing.expectEqual(eqlComptimeUTF16(std.unicode.utf8ToUtf16LeStringLiteral("bun-darwin-aarch64.zip"), "bun-darwin-aarch64.zip"), true);
|
||||
try std.testing.expectEqual(eqlComptimeUTF16(toUTF16Literal("bun-darwin-aarch64.zip"), "bun-darwin-aarch64.zip"), true);
|
||||
const sizes = [_]u16{ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 23, 22, 24 };
|
||||
inline for (sizes) |size| {
|
||||
var buf: [size]u16 = undefined;
|
||||
@@ -542,7 +546,7 @@ pub fn eqlComptime(self: string, comptime alt: anytype) bool {
|
||||
}
|
||||
|
||||
pub fn eqlComptimeUTF16(self: []const u16, comptime alt: []const u8) bool {
|
||||
return eqlComptimeCheckLenWithType(u16, self, comptime std.unicode.utf8ToUtf16LeStringLiteral(alt), true);
|
||||
return eqlComptimeCheckLenWithType(u16, self, comptime toUTF16Literal(alt), true);
|
||||
}
|
||||
|
||||
pub fn eqlComptimeIgnoreLen(self: string, comptime alt: anytype) bool {
|
||||
@@ -703,7 +707,7 @@ pub fn index(self: string, str: string) i32 {
|
||||
}
|
||||
|
||||
pub fn eqlUtf16(comptime self: string, other: []const u16) bool {
|
||||
return std.mem.eql(u16, std.unicode.utf8ToUtf16LeStringLiteral(self), other);
|
||||
return std.mem.eql(u16, toUTF16Literal(self), other);
|
||||
}
|
||||
|
||||
pub fn toUTF8Alloc(allocator: std.mem.Allocator, js: []const u16) !string {
|
||||
@@ -974,7 +978,7 @@ pub fn toUTF8AllocWithType(allocator: std.mem.Allocator, comptime Type: type, ut
|
||||
utf16_remaining = utf16_remaining[replacement.len..];
|
||||
|
||||
const count: usize = replacement.utf8Width();
|
||||
try list.ensureUnusedCapacity(i + count);
|
||||
try list.ensureTotalCapacityPrecise(i + count + list.items.len + @floatToInt(usize, (@intToFloat(f64, @truncate(u52, utf16_remaining.len)) * 1.2)));
|
||||
list.items.len += i;
|
||||
|
||||
copyU16IntoU8(
|
||||
@@ -992,12 +996,13 @@ pub fn toUTF8AllocWithType(allocator: std.mem.Allocator, comptime Type: type, ut
|
||||
);
|
||||
}
|
||||
|
||||
try list.ensureUnusedCapacity(utf16_remaining.len);
|
||||
try list.ensureTotalCapacityPrecise(utf16_remaining.len + list.items.len);
|
||||
const old_len = list.items.len;
|
||||
list.items.len += utf16_remaining.len;
|
||||
copyU16IntoU8(list.items[old_len..], Type, utf16_remaining);
|
||||
|
||||
return list.toOwnedSlice();
|
||||
// don't call toOwnedSlice() because our
|
||||
return list.items;
|
||||
}
|
||||
|
||||
pub const EncodeIntoResult = struct {
|
||||
@@ -1005,6 +1010,12 @@ pub const EncodeIntoResult = struct {
|
||||
written: u32 = 0,
|
||||
};
|
||||
pub fn allocateLatin1IntoUTF8(allocator: std.mem.Allocator, comptime Type: type, latin1_: Type) ![]u8 {
|
||||
if (comptime bun.FeatureFlags.latin1_is_now_ascii) {
|
||||
var out = try allocator.alloc(u8, latin1_.len);
|
||||
@memcpy(out.ptr, latin1_.ptr, latin1_.len);
|
||||
return out;
|
||||
}
|
||||
|
||||
var list = try std.ArrayList(u8).initCapacity(allocator, latin1_.len);
|
||||
var latin1 = latin1_;
|
||||
while (latin1.len > 0) {
|
||||
@@ -1029,6 +1040,56 @@ pub fn allocateLatin1IntoUTF8(allocator: std.mem.Allocator, comptime Type: type,
|
||||
return list.toOwnedSlice();
|
||||
}
|
||||
|
||||
pub fn allocateLatin1IntoUTF8ForArrayBuffer(allocator: std.mem.Allocator, globalThis: *JSC.JSGlobalObject, comptime Type: type, latin1_: Type) !JSC.JSValue {
|
||||
if (comptime bun.FeatureFlags.latin1_is_now_ascii) {
|
||||
var out = try allocator.alloc(u8, latin1_.len);
|
||||
@memcpy(out.ptr, latin1_.ptr, latin1_.len);
|
||||
return out;
|
||||
}
|
||||
|
||||
var latin1 = latin1_;
|
||||
|
||||
if (firstNonASCII(latin1)) |start_i| {
|
||||
var list = try std.ArrayList(u8).initCapacity(allocator, latin1_.len + 2);
|
||||
list.items.len = start_i;
|
||||
@memcpy(list.items.ptr, latin1.ptr, start_i);
|
||||
{
|
||||
var buf = list.items.ptr[list.items.len .. list.items.len + 2][0..2];
|
||||
list.items.len += 2;
|
||||
buf[0..2].* = latin1ToCodepointBytesAssumeNotASCII(latin1[0]);
|
||||
latin1 = latin1[1..];
|
||||
}
|
||||
|
||||
while (latin1.len > 0) {
|
||||
const read = @as(usize, firstNonASCII(latin1) orelse @intCast(u32, latin1.len));
|
||||
try list.ensureTotalCapacityPrecise(
|
||||
list.items.len + read + if (read != latin1.len) @as(usize, 2) else @as(usize, 0),
|
||||
);
|
||||
const before = list.items.len;
|
||||
list.items.len += read;
|
||||
@memcpy(list.items[before..].ptr, latin1.ptr, read);
|
||||
latin1 = latin1[read..];
|
||||
|
||||
if (latin1.len > 0) {
|
||||
try list.ensureUnusedCapacity(2);
|
||||
var buf = list.items.ptr[list.items.len .. list.items.len + 2][0..2];
|
||||
list.items.len += 2;
|
||||
buf[0..2].* = latin1ToCodepointBytesAssumeNotASCII(latin1[0]);
|
||||
latin1 = latin1[1..];
|
||||
}
|
||||
}
|
||||
|
||||
return JSC.ArrayBuffer.fromBytes(list.toOwnedSlice(), .Uint8Array).toJS(globalThis, null);
|
||||
}
|
||||
|
||||
{
|
||||
const array_buffer = JSC.JSValue.createUninitializedUint8Array(globalThis, latin1.len);
|
||||
var bytes = array_buffer.asArrayBuffer(globalThis).?.slice();
|
||||
@memcpy(bytes.ptr, latin1.ptr, latin1.len);
|
||||
return array_buffer;
|
||||
}
|
||||
}
|
||||
|
||||
pub const UTF16Replacement = struct {
|
||||
code_point: u32 = unicode_replacement,
|
||||
len: u3 = 0,
|
||||
@@ -1132,6 +1193,12 @@ pub fn convertUTF8BytesIntoUTF16(sequence: *const [4]u8) UTF16Replacement {
|
||||
}
|
||||
|
||||
pub fn copyLatin1IntoUTF8(buf_: []u8, comptime Type: type, latin1_: Type) EncodeIntoResult {
|
||||
if (comptime bun.FeatureFlags.latin1_is_now_ascii) {
|
||||
const to_copy = @truncate(u32, @minimum(buf_.len, latin1_.len));
|
||||
@memcpy(buf_.ptr, latin1_.ptr, to_copy);
|
||||
return .{ .written = to_copy, .read = to_copy };
|
||||
}
|
||||
|
||||
var buf = buf_;
|
||||
var latin1 = latin1_;
|
||||
while (buf.len > 0 and latin1.len > 0) {
|
||||
@@ -1144,19 +1211,18 @@ pub fn copyLatin1IntoUTF8(buf_: []u8, comptime Type: type, latin1_: Type) Encode
|
||||
break;
|
||||
}
|
||||
|
||||
buf[0..8].* = @bitCast([ascii_vector_size]u8, vec)[0..8].*;
|
||||
buf[8..ascii_vector_size].* = @bitCast([ascii_vector_size]u8, vec)[8..ascii_vector_size].*;
|
||||
buf[0..ascii_vector_size].* = @bitCast([ascii_vector_size]u8, vec)[0..ascii_vector_size].*;
|
||||
latin1 = latin1[ascii_vector_size..];
|
||||
buf = buf[ascii_vector_size..];
|
||||
}
|
||||
|
||||
while (read < latin1.len and latin1[read] < 0x80) : (read += 1) {}
|
||||
|
||||
const written = @minimum(read, buf.len);
|
||||
if (written == 0) break;
|
||||
@memcpy(buf.ptr, latin1.ptr, written);
|
||||
latin1 = latin1[written..];
|
||||
buf = buf[written..];
|
||||
const to_copy = @minimum(read, buf.len);
|
||||
@memcpy(buf.ptr, latin1.ptr, to_copy);
|
||||
latin1 = latin1[to_copy..];
|
||||
buf = buf[to_copy..];
|
||||
|
||||
if (latin1.len > 0 and buf.len >= 2) {
|
||||
buf[0..2].* = latin1ToCodepointBytesAssumeNotASCII(latin1[0]);
|
||||
latin1 = latin1[1..];
|
||||
@@ -1165,11 +1231,19 @@ pub fn copyLatin1IntoUTF8(buf_: []u8, comptime Type: type, latin1_: Type) Encode
|
||||
}
|
||||
|
||||
return .{
|
||||
.read = @truncate(u32, buf_.len - buf.len),
|
||||
.written = @truncate(u32, latin1_.len - latin1.len),
|
||||
.written = @truncate(u32, buf_.len - buf.len),
|
||||
.read = @truncate(u32, latin1_.len - latin1.len),
|
||||
};
|
||||
}
|
||||
|
||||
pub fn replaceLatin1WithUTF8(buf_: []u8) void {
|
||||
var latin1 = buf_;
|
||||
while (strings.firstNonASCII(latin1)) |i| {
|
||||
latin1[i..][0..2].* = latin1ToCodepointBytesAssumeNotASCII(latin1[i]);
|
||||
latin1 = latin1[i + 2 ..];
|
||||
}
|
||||
}
|
||||
|
||||
pub fn elementLengthLatin1IntoUTF8(comptime Type: type, latin1_: Type) usize {
|
||||
var latin1 = latin1_;
|
||||
var count: usize = 0;
|
||||
@@ -1245,6 +1319,625 @@ pub fn elementLengthLatin1IntoUTF16(comptime Type: type, latin1_: Type) usize {
|
||||
return count;
|
||||
}
|
||||
|
||||
pub fn escapeHTMLForLatin1Input(allocator: std.mem.Allocator, latin1: []const u8) ![]const u8 {
|
||||
const Scalar = struct {
|
||||
pub const lengths: [std.math.maxInt(u8)]u4 = brk: {
|
||||
var values: [std.math.maxInt(u8)]u4 = undefined;
|
||||
for (values) |_, i| {
|
||||
switch (i) {
|
||||
'"' => {
|
||||
values[i] = """.len;
|
||||
},
|
||||
'&' => {
|
||||
values[i] = "&".len;
|
||||
},
|
||||
'\'' => {
|
||||
values[i] = "'".len;
|
||||
},
|
||||
'<' => {
|
||||
values[i] = "<".len;
|
||||
},
|
||||
'>' => {
|
||||
values[i] = ">".len;
|
||||
},
|
||||
else => {
|
||||
values[i] = 1;
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
break :brk values;
|
||||
};
|
||||
|
||||
inline fn appendString(buf: [*]u8, comptime str: []const u8) usize {
|
||||
buf[0..str.len].* = str[0..str.len].*;
|
||||
return str.len;
|
||||
}
|
||||
|
||||
pub inline fn append(buf: [*]u8, char: u8) usize {
|
||||
if (lengths[char] == 1) {
|
||||
buf[0] = char;
|
||||
return 1;
|
||||
}
|
||||
|
||||
return switch (char) {
|
||||
'"' => appendString(buf, """),
|
||||
'&' => appendString(buf, "&"),
|
||||
'\'' => appendString(buf, "'"),
|
||||
'<' => appendString(buf, "<"),
|
||||
'>' => appendString(buf, ">"),
|
||||
else => unreachable,
|
||||
};
|
||||
}
|
||||
|
||||
pub inline fn push(comptime len: anytype, chars_: *const [len]u8, allo: std.mem.Allocator) []const u8 {
|
||||
const chars = chars_.*;
|
||||
var total: usize = 0;
|
||||
|
||||
comptime var remain_to_comp = len;
|
||||
comptime var comp_i = 0;
|
||||
|
||||
inline while (remain_to_comp > 0) : (remain_to_comp -= 1) {
|
||||
total += lengths[chars[comp_i]];
|
||||
comp_i += 1;
|
||||
}
|
||||
|
||||
if (total == len) {
|
||||
return chars_;
|
||||
}
|
||||
|
||||
var output = allo.alloc(u8, total) catch unreachable;
|
||||
var head = output.ptr;
|
||||
inline for (comptime bun.range(0, len)) |i| {
|
||||
head += @This().append(head, chars[i]);
|
||||
}
|
||||
|
||||
return output;
|
||||
}
|
||||
};
|
||||
switch (latin1.len) {
|
||||
0 => return "",
|
||||
1 => return switch (latin1[0]) {
|
||||
'"' => """,
|
||||
'&' => "&",
|
||||
'\'' => "'",
|
||||
'<' => "<",
|
||||
'>' => ">",
|
||||
else => latin1,
|
||||
},
|
||||
2 => {
|
||||
const first: []const u8 = switch (latin1[0]) {
|
||||
'"' => """,
|
||||
'&' => "&",
|
||||
'\'' => "'",
|
||||
'<' => "<",
|
||||
'>' => ">",
|
||||
else => latin1[0..1],
|
||||
};
|
||||
const second: []const u8 = switch (latin1[1]) {
|
||||
'"' => """,
|
||||
'&' => "&",
|
||||
'\'' => "'",
|
||||
'<' => "<",
|
||||
'>' => ">",
|
||||
else => latin1[1..2],
|
||||
};
|
||||
if (first.len == 1 and second.len == 1) {
|
||||
return latin1;
|
||||
}
|
||||
|
||||
return strings.append(allocator, first, second);
|
||||
},
|
||||
|
||||
// The simd implementation is slower for inputs less than 32 bytes.
|
||||
3 => return Scalar.push(3, latin1[0..3], allocator),
|
||||
4 => return Scalar.push(4, latin1[0..4], allocator),
|
||||
5 => return Scalar.push(5, latin1[0..5], allocator),
|
||||
6 => return Scalar.push(6, latin1[0..6], allocator),
|
||||
7 => return Scalar.push(7, latin1[0..7], allocator),
|
||||
8 => return Scalar.push(8, latin1[0..8], allocator),
|
||||
9 => return Scalar.push(9, latin1[0..9], allocator),
|
||||
10 => return Scalar.push(10, latin1[0..10], allocator),
|
||||
11 => return Scalar.push(11, latin1[0..11], allocator),
|
||||
12 => return Scalar.push(12, latin1[0..12], allocator),
|
||||
13 => return Scalar.push(13, latin1[0..13], allocator),
|
||||
14 => return Scalar.push(14, latin1[0..14], allocator),
|
||||
15 => return Scalar.push(15, latin1[0..15], allocator),
|
||||
16 => return Scalar.push(16, latin1[0..16], allocator),
|
||||
17 => return Scalar.push(17, latin1[0..17], allocator),
|
||||
18 => return Scalar.push(18, latin1[0..18], allocator),
|
||||
19 => return Scalar.push(19, latin1[0..19], allocator),
|
||||
20 => return Scalar.push(20, latin1[0..20], allocator),
|
||||
21 => return Scalar.push(21, latin1[0..21], allocator),
|
||||
22 => return Scalar.push(22, latin1[0..22], allocator),
|
||||
23 => return Scalar.push(23, latin1[0..23], allocator),
|
||||
24 => return Scalar.push(24, latin1[0..24], allocator),
|
||||
25 => return Scalar.push(25, latin1[0..25], allocator),
|
||||
26 => return Scalar.push(26, latin1[0..26], allocator),
|
||||
27 => return Scalar.push(27, latin1[0..27], allocator),
|
||||
28 => return Scalar.push(28, latin1[0..28], allocator),
|
||||
29 => return Scalar.push(29, latin1[0..29], allocator),
|
||||
30 => return Scalar.push(30, latin1[0..30], allocator),
|
||||
31 => return Scalar.push(31, latin1[0..31], allocator),
|
||||
32 => return Scalar.push(32, latin1[0..32], allocator),
|
||||
|
||||
else => {
|
||||
var remaining = latin1;
|
||||
|
||||
const vec_chars = "\"&'<>";
|
||||
const vecs: [vec_chars.len]AsciiVector = comptime brk: {
|
||||
var _vecs: [vec_chars.len]AsciiVector = undefined;
|
||||
for (vec_chars) |c, i| {
|
||||
_vecs[i] = @splat(ascii_vector_size, c);
|
||||
}
|
||||
break :brk _vecs;
|
||||
};
|
||||
|
||||
var any_needs_escape = false;
|
||||
var buf: std.ArrayList(u8) = undefined;
|
||||
|
||||
if (comptime Environment.isAarch64 or Environment.isX64) {
|
||||
// pass #1: scan for any characters that need escaping
|
||||
// assume most strings won't need any escaping, so don't actually allocate the buffer
|
||||
scan_and_allocate_lazily: while (remaining.len >= ascii_vector_size) {
|
||||
if (comptime Environment.allow_assert) {
|
||||
std.debug.assert(!any_needs_escape);
|
||||
}
|
||||
const vec: AsciiVector = remaining[0..ascii_vector_size].*;
|
||||
if (@reduce(.Max, @bitCast(AsciiVectorU1, (vec == vecs[0])) |
|
||||
@bitCast(AsciiVectorU1, (vec == vecs[1])) |
|
||||
@bitCast(AsciiVectorU1, (vec == vecs[2])) |
|
||||
@bitCast(AsciiVectorU1, (vec == vecs[3])) |
|
||||
@bitCast(AsciiVectorU1, (vec == vecs[4]))) == 1)
|
||||
{
|
||||
buf = try std.ArrayList(u8).initCapacity(allocator, latin1.len + 6);
|
||||
const copy_len = @ptrToInt(remaining.ptr) - @ptrToInt(latin1.ptr);
|
||||
@memcpy(buf.items.ptr, latin1.ptr, copy_len);
|
||||
buf.items.len = copy_len;
|
||||
any_needs_escape = true;
|
||||
comptime var i: usize = 0;
|
||||
inline while (i < ascii_vector_size) : (i += 1) {
|
||||
switch (vec[i]) {
|
||||
'"' => {
|
||||
buf.ensureUnusedCapacity((ascii_vector_size - i) + """.len) catch unreachable;
|
||||
buf.items.ptr[buf.items.len .. buf.items.len + """.len][0..""".len].* = """.*;
|
||||
buf.items.len += """.len;
|
||||
},
|
||||
'&' => {
|
||||
buf.ensureUnusedCapacity((ascii_vector_size - i) + "&".len) catch unreachable;
|
||||
buf.items.ptr[buf.items.len .. buf.items.len + "&".len][0.."&".len].* = "&".*;
|
||||
buf.items.len += "&".len;
|
||||
},
|
||||
'\'' => {
|
||||
buf.ensureUnusedCapacity((ascii_vector_size - i) + "'".len) catch unreachable;
|
||||
buf.items.ptr[buf.items.len .. buf.items.len + "'".len][0.."'".len].* = "'".*;
|
||||
buf.items.len += "'".len;
|
||||
},
|
||||
'<' => {
|
||||
buf.ensureUnusedCapacity((ascii_vector_size - i) + "<".len) catch unreachable;
|
||||
buf.items.ptr[buf.items.len .. buf.items.len + "<".len][0.."<".len].* = "<".*;
|
||||
buf.items.len += "<".len;
|
||||
},
|
||||
'>' => {
|
||||
buf.ensureUnusedCapacity((ascii_vector_size - i) + ">".len) catch unreachable;
|
||||
buf.items.ptr[buf.items.len .. buf.items.len + ">".len][0..">".len].* = ">".*;
|
||||
buf.items.len += ">".len;
|
||||
},
|
||||
else => |c| {
|
||||
buf.appendAssumeCapacity(c);
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
remaining = remaining[ascii_vector_size..];
|
||||
break :scan_and_allocate_lazily;
|
||||
}
|
||||
|
||||
remaining = remaining[ascii_vector_size..];
|
||||
}
|
||||
}
|
||||
|
||||
if (any_needs_escape) {
|
||||
// pass #2: we found something that needed an escape
|
||||
// so we'll go ahead and copy the buffer into a new buffer
|
||||
while (remaining.len >= ascii_vector_size) {
|
||||
const vec: AsciiVector = remaining[0..ascii_vector_size].*;
|
||||
if (@reduce(.Max, @bitCast(AsciiVectorU1, (vec == vecs[0])) |
|
||||
@bitCast(AsciiVectorU1, (vec == vecs[1])) |
|
||||
@bitCast(AsciiVectorU1, (vec == vecs[2])) |
|
||||
@bitCast(AsciiVectorU1, (vec == vecs[3])) |
|
||||
@bitCast(AsciiVectorU1, (vec == vecs[4]))) == 1)
|
||||
{
|
||||
buf.ensureUnusedCapacity(ascii_vector_size + 6) catch unreachable;
|
||||
comptime var i: usize = 0;
|
||||
inline while (i < ascii_vector_size) : (i += 1) {
|
||||
switch (vec[i]) {
|
||||
'"' => {
|
||||
buf.ensureUnusedCapacity((ascii_vector_size - i) + """.len) catch unreachable;
|
||||
buf.items.ptr[buf.items.len .. buf.items.len + """.len][0..""".len].* = """.*;
|
||||
buf.items.len += """.len;
|
||||
},
|
||||
'&' => {
|
||||
buf.ensureUnusedCapacity((ascii_vector_size - i) + "&".len) catch unreachable;
|
||||
buf.items.ptr[buf.items.len .. buf.items.len + "&".len][0.."&".len].* = "&".*;
|
||||
buf.items.len += "&".len;
|
||||
},
|
||||
'\'' => {
|
||||
buf.ensureUnusedCapacity((ascii_vector_size - i) + "'".len) catch unreachable;
|
||||
buf.items.ptr[buf.items.len .. buf.items.len + "'".len][0.."'".len].* = "'".*;
|
||||
buf.items.len += "'".len;
|
||||
},
|
||||
'<' => {
|
||||
buf.ensureUnusedCapacity((ascii_vector_size - i) + "<".len) catch unreachable;
|
||||
buf.items.ptr[buf.items.len .. buf.items.len + "<".len][0.."<".len].* = "<".*;
|
||||
buf.items.len += "<".len;
|
||||
},
|
||||
'>' => {
|
||||
buf.ensureUnusedCapacity((ascii_vector_size - i) + ">".len) catch unreachable;
|
||||
buf.items.ptr[buf.items.len .. buf.items.len + ">".len][0..">".len].* = ">".*;
|
||||
buf.items.len += ">".len;
|
||||
},
|
||||
else => |c| {
|
||||
buf.appendAssumeCapacity(c);
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
remaining = remaining[ascii_vector_size..];
|
||||
continue;
|
||||
}
|
||||
|
||||
try buf.ensureUnusedCapacity(ascii_vector_size);
|
||||
buf.items.ptr[buf.items.len .. buf.items.len + ascii_vector_size][0..ascii_vector_size].* = remaining[0..ascii_vector_size].*;
|
||||
buf.items.len += ascii_vector_size;
|
||||
remaining = remaining[ascii_vector_size..];
|
||||
}
|
||||
}
|
||||
|
||||
var ptr = remaining.ptr;
|
||||
const end = remaining.ptr + remaining.len;
|
||||
|
||||
if (!any_needs_escape) {
|
||||
scan_and_allocate_lazily: while (ptr != end) : (ptr += 1) {
|
||||
switch (ptr[0]) {
|
||||
'"', '&', '\'', '<', '>' => |c| {
|
||||
buf = try std.ArrayList(u8).initCapacity(allocator, latin1.len + @as(usize, Scalar.lengths[c]));
|
||||
const copy_len = @ptrToInt(ptr) - @ptrToInt(latin1.ptr);
|
||||
@memcpy(buf.items.ptr, latin1.ptr, copy_len - 1);
|
||||
buf.items.len = copy_len;
|
||||
any_needs_escape = true;
|
||||
break :scan_and_allocate_lazily;
|
||||
},
|
||||
else => {},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
while (ptr != end) : (ptr += 1) {
|
||||
switch (ptr[0]) {
|
||||
'"' => {
|
||||
buf.appendSlice(""") catch unreachable;
|
||||
},
|
||||
'&' => {
|
||||
buf.appendSlice("&") catch unreachable;
|
||||
},
|
||||
'\'' => {
|
||||
buf.appendSlice("'") catch unreachable; // modified from escape-html; used to be '''
|
||||
},
|
||||
'<' => {
|
||||
buf.appendSlice("<") catch unreachable;
|
||||
},
|
||||
'>' => {
|
||||
buf.appendSlice(">") catch unreachable;
|
||||
},
|
||||
else => |c| {
|
||||
buf.append(c) catch unreachable;
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
if (!any_needs_escape) {
|
||||
return latin1;
|
||||
}
|
||||
|
||||
return buf.toOwnedSlice();
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
pub fn escapeHTMLForUTF16Input(allocator: std.mem.Allocator, utf16: []const u16) ![]const u16 {
|
||||
const Scalar = struct {
|
||||
pub const lengths: [std.math.maxInt(u8)]u4 = brk: {
|
||||
var values: [std.math.maxInt(u8)]u4 = undefined;
|
||||
for (values) |_, i| {
|
||||
values[i] = switch (i) {
|
||||
'"' => """.len,
|
||||
'&' => "&".len,
|
||||
'\'' => "'".len,
|
||||
'<' => "<".len,
|
||||
'>' => ">".len,
|
||||
else => 1,
|
||||
};
|
||||
}
|
||||
|
||||
break :brk values;
|
||||
};
|
||||
};
|
||||
switch (utf16.len) {
|
||||
0 => return &[_]u16{},
|
||||
1 => return switch (utf16[0]) {
|
||||
'"' => toUTF16Literal("""),
|
||||
'&' => toUTF16Literal("&"),
|
||||
'\'' => toUTF16Literal("'"),
|
||||
'<' => toUTF16Literal("<"),
|
||||
'>' => toUTF16Literal(">"),
|
||||
else => utf16,
|
||||
},
|
||||
2 => {
|
||||
const first_16 = switch (utf16[0]) {
|
||||
'"' => toUTF16Literal("""),
|
||||
'&' => toUTF16Literal("&"),
|
||||
'\'' => toUTF16Literal("'"),
|
||||
'<' => toUTF16Literal("<"),
|
||||
'>' => toUTF16Literal(">"),
|
||||
else => @as([]const u16, utf16[0..1]),
|
||||
};
|
||||
|
||||
const second_16 = switch (utf16[1]) {
|
||||
'"' => toUTF16Literal("""),
|
||||
'&' => toUTF16Literal("&"),
|
||||
'\'' => toUTF16Literal("'"),
|
||||
'<' => toUTF16Literal("<"),
|
||||
'>' => toUTF16Literal(">"),
|
||||
else => @as([]const u16, utf16[1..2]),
|
||||
};
|
||||
|
||||
if (first_16.ptr == utf16.ptr and second_16.ptr == utf16.ptr + 1) {
|
||||
return utf16;
|
||||
}
|
||||
|
||||
var buf = allocator.alloc(u16, first_16.len + second_16.len) catch unreachable;
|
||||
std.mem.copy(u16, buf, first_16);
|
||||
std.mem.copy(u16, buf[first_16.len..], second_16);
|
||||
return buf;
|
||||
},
|
||||
|
||||
else => {
|
||||
var remaining = utf16;
|
||||
|
||||
var any_needs_escape = false;
|
||||
var buf: std.ArrayList(u16) = undefined;
|
||||
|
||||
if (comptime Environment.isAarch64 or Environment.isX64) {
|
||||
const vec_chars = "\"&'<>";
|
||||
const vecs: [vec_chars.len]AsciiU16Vector = brk: {
|
||||
var _vecs: [vec_chars.len]AsciiU16Vector = undefined;
|
||||
for (vec_chars) |c, i| {
|
||||
_vecs[i] = @splat(ascii_u16_vector_size, @as(u16, c));
|
||||
}
|
||||
break :brk _vecs;
|
||||
};
|
||||
// pass #1: scan for any characters that need escaping
|
||||
// assume most strings won't need any escaping, so don't actually allocate the buffer
|
||||
scan_and_allocate_lazily: while (remaining.len >= ascii_u16_vector_size) {
|
||||
if (comptime Environment.allow_assert) {
|
||||
std.debug.assert(!any_needs_escape);
|
||||
}
|
||||
const vec: AsciiU16Vector = remaining[0..ascii_u16_vector_size].*;
|
||||
if (@reduce(.Max, @bitCast(AsciiVectorU16U1, vec > @splat(ascii_u16_vector_size, @as(u16, 127))) |
|
||||
@bitCast(AsciiVectorU16U1, (vec == vecs[0])) |
|
||||
@bitCast(AsciiVectorU16U1, (vec == vecs[1])) |
|
||||
@bitCast(AsciiVectorU16U1, (vec == vecs[2])) |
|
||||
@bitCast(AsciiVectorU16U1, (vec == vecs[3])) |
|
||||
@bitCast(AsciiVectorU16U1, (vec == vecs[4]))) == 1)
|
||||
{
|
||||
var i: u16 = 0;
|
||||
lazy: {
|
||||
while (i < ascii_u16_vector_size) {
|
||||
switch (remaining[i]) {
|
||||
'"', '&', '\'', '<', '>' => {
|
||||
any_needs_escape = true;
|
||||
break :lazy;
|
||||
},
|
||||
128...std.math.maxInt(u16) => {
|
||||
const cp = utf16Codepoint([]const u16, remaining[i..]);
|
||||
i += @as(u16, cp.len);
|
||||
},
|
||||
else => {
|
||||
i += 1;
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!any_needs_escape) {
|
||||
remaining = remaining[i..];
|
||||
continue :scan_and_allocate_lazily;
|
||||
}
|
||||
|
||||
buf = try std.ArrayList(u16).initCapacity(allocator, utf16.len + 6);
|
||||
std.debug.assert(@ptrToInt(remaining.ptr + i) >= @ptrToInt(utf16.ptr));
|
||||
const to_copy = std.mem.sliceAsBytes(utf16)[0 .. @ptrToInt(remaining.ptr + i) - @ptrToInt(utf16.ptr)];
|
||||
@memcpy(@ptrCast([*]align(2) u8, buf.items.ptr), to_copy.ptr, to_copy.len);
|
||||
buf.items.len = std.mem.bytesAsSlice(u16, to_copy).len;
|
||||
|
||||
while (i < ascii_u16_vector_size) {
|
||||
switch (remaining[i]) {
|
||||
'"', '&', '\'', '<', '>' => |c| {
|
||||
const result = switch (c) {
|
||||
'"' => toUTF16Literal("""),
|
||||
'&' => toUTF16Literal("&"),
|
||||
'\'' => toUTF16Literal("'"),
|
||||
'<' => toUTF16Literal("<"),
|
||||
'>' => toUTF16Literal(">"),
|
||||
else => unreachable,
|
||||
};
|
||||
|
||||
buf.appendSlice(result) catch unreachable;
|
||||
i += 1;
|
||||
},
|
||||
128...std.math.maxInt(u16) => {
|
||||
const cp = utf16Codepoint([]const u16, remaining[i..]);
|
||||
|
||||
buf.appendSlice(remaining[i..][0..@as(usize, cp.len)]) catch unreachable;
|
||||
i += @as(u16, cp.len);
|
||||
},
|
||||
else => |c| {
|
||||
i += 1;
|
||||
buf.append(c) catch unreachable;
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// edgecase: code point width could exceed asdcii_u16_vector_size
|
||||
remaining = remaining[i..];
|
||||
break :scan_and_allocate_lazily;
|
||||
}
|
||||
|
||||
remaining = remaining[ascii_u16_vector_size..];
|
||||
}
|
||||
|
||||
if (any_needs_escape) {
|
||||
// pass #2: we found something that needed an escape
|
||||
// but there's still some more text to
|
||||
// so we'll go ahead and copy the buffer into a new buffer
|
||||
while (remaining.len >= ascii_u16_vector_size) {
|
||||
const vec: AsciiU16Vector = remaining[0..ascii_u16_vector_size].*;
|
||||
if (@reduce(.Max, @bitCast(AsciiVectorU16U1, vec > @splat(ascii_u16_vector_size, @as(u16, 127))) |
|
||||
@bitCast(AsciiVectorU16U1, (vec == vecs[0])) |
|
||||
@bitCast(AsciiVectorU16U1, (vec == vecs[1])) |
|
||||
@bitCast(AsciiVectorU16U1, (vec == vecs[2])) |
|
||||
@bitCast(AsciiVectorU16U1, (vec == vecs[3])) |
|
||||
@bitCast(AsciiVectorU16U1, (vec == vecs[4]))) == 1)
|
||||
{
|
||||
buf.ensureUnusedCapacity(ascii_u16_vector_size) catch unreachable;
|
||||
var i: u16 = 0;
|
||||
while (i < ascii_u16_vector_size) {
|
||||
switch (remaining[i]) {
|
||||
'"' => {
|
||||
buf.appendSlice(toUTF16Literal(""")) catch unreachable;
|
||||
i += 1;
|
||||
},
|
||||
'&' => {
|
||||
buf.appendSlice(toUTF16Literal("&")) catch unreachable;
|
||||
i += 1;
|
||||
},
|
||||
'\'' => {
|
||||
buf.appendSlice(toUTF16Literal("'")) catch unreachable; // modified from escape-html; used to be '''
|
||||
i += 1;
|
||||
},
|
||||
'<' => {
|
||||
buf.appendSlice(toUTF16Literal("<")) catch unreachable;
|
||||
i += 1;
|
||||
},
|
||||
'>' => {
|
||||
buf.appendSlice(toUTF16Literal(">")) catch unreachable;
|
||||
i += 1;
|
||||
},
|
||||
128...std.math.maxInt(u16) => {
|
||||
const cp = utf16Codepoint([]const u16, remaining[i..]);
|
||||
|
||||
buf.appendSlice(remaining[i..][0..@as(usize, cp.len)]) catch unreachable;
|
||||
i += @as(u16, cp.len);
|
||||
},
|
||||
else => |c| {
|
||||
buf.append(c) catch unreachable;
|
||||
i += 1;
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
remaining = remaining[i..];
|
||||
continue;
|
||||
}
|
||||
|
||||
try buf.ensureUnusedCapacity(ascii_u16_vector_size);
|
||||
buf.items.ptr[buf.items.len .. buf.items.len + ascii_u16_vector_size][0..ascii_u16_vector_size].* = remaining[0..ascii_u16_vector_size].*;
|
||||
buf.items.len += ascii_u16_vector_size;
|
||||
remaining = remaining[ascii_u16_vector_size..];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var ptr = remaining.ptr;
|
||||
const end = remaining.ptr + remaining.len;
|
||||
|
||||
if (!any_needs_escape) {
|
||||
scan_and_allocate_lazily: while (ptr != end) {
|
||||
switch (ptr[0]) {
|
||||
'"', '&', '\'', '<', '>' => |c| {
|
||||
buf = try std.ArrayList(u16).initCapacity(allocator, utf16.len + @as(usize, Scalar.lengths[c]));
|
||||
std.debug.assert(@ptrToInt(ptr) >= @ptrToInt(utf16.ptr));
|
||||
|
||||
const to_copy = std.mem.sliceAsBytes(utf16)[0 .. @ptrToInt(ptr) - @ptrToInt(utf16.ptr)];
|
||||
|
||||
@memcpy(
|
||||
@ptrCast([*]align(2) u8, buf.items.ptr),
|
||||
to_copy.ptr,
|
||||
to_copy.len,
|
||||
);
|
||||
|
||||
buf.items.len = std.mem.bytesAsSlice(u16, to_copy).len;
|
||||
any_needs_escape = true;
|
||||
break :scan_and_allocate_lazily;
|
||||
},
|
||||
128...std.math.maxInt(u16) => {
|
||||
const cp = utf16Codepoint([]const u16, ptr[0..2]);
|
||||
|
||||
ptr += @as(u16, cp.len);
|
||||
},
|
||||
else => {
|
||||
ptr += 1;
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
while (ptr != end) {
|
||||
switch (ptr[0]) {
|
||||
'"' => {
|
||||
buf.appendSlice(toUTF16Literal(""")) catch unreachable;
|
||||
ptr += 1;
|
||||
},
|
||||
'&' => {
|
||||
buf.appendSlice(toUTF16Literal("&")) catch unreachable;
|
||||
ptr += 1;
|
||||
},
|
||||
'\'' => {
|
||||
buf.appendSlice(toUTF16Literal("'")) catch unreachable; // modified from escape-html; used to be '''
|
||||
ptr += 1;
|
||||
},
|
||||
'<' => {
|
||||
buf.appendSlice(toUTF16Literal("<")) catch unreachable;
|
||||
ptr += 1;
|
||||
},
|
||||
'>' => {
|
||||
buf.appendSlice(toUTF16Literal(">")) catch unreachable;
|
||||
ptr += 1;
|
||||
},
|
||||
128...std.math.maxInt(u16) => {
|
||||
const cp = utf16Codepoint([]const u16, ptr[0..2]);
|
||||
|
||||
buf.appendSlice(ptr[0..@as(usize, cp.len)]) catch unreachable;
|
||||
ptr += @as(u16, cp.len);
|
||||
},
|
||||
|
||||
else => |c| {
|
||||
buf.append(c) catch unreachable;
|
||||
ptr += 1;
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
if (!any_needs_escape) {
|
||||
return utf16;
|
||||
}
|
||||
|
||||
return buf.toOwnedSlice();
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
test "copyLatin1IntoUTF8" {
|
||||
var input: string = "hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!";
|
||||
var output = std.mem.zeroes([500]u8);
|
||||
@@ -1516,7 +2209,9 @@ pub const min_16_ascii = @splat(ascii_vector_size, @as(u8, 0x20));
|
||||
pub const max_u16_ascii = @splat(ascii_u16_vector_size, @as(u16, 127));
|
||||
pub const min_u16_ascii = @splat(ascii_u16_vector_size, @as(u16, 0x20));
|
||||
pub const AsciiVector = std.meta.Vector(ascii_vector_size, u8);
|
||||
pub const AsciiVectorSmall = std.meta.Vector(8, u8);
|
||||
pub const AsciiVectorU1 = std.meta.Vector(ascii_vector_size, u1);
|
||||
pub const AsciiVectorU1Small = std.meta.Vector(8, u1);
|
||||
pub const AsciiVectorU16U1 = std.meta.Vector(ascii_u16_vector_size, u1);
|
||||
pub const AsciiU16Vector = std.meta.Vector(ascii_u16_vector_size, u16);
|
||||
pub const max_4_ascii = @splat(4, @as(u8, 127));
|
||||
@@ -1703,9 +2398,10 @@ pub fn indexOfChar(slice: []const u8, char: u8) ?u32 {
|
||||
while (remaining.len >= ascii_vector_size) {
|
||||
const vec: AsciiVector = remaining[0..ascii_vector_size].*;
|
||||
const cmp = vec == @splat(ascii_vector_size, char);
|
||||
const bitmask = @ptrCast(*const AsciiVectorInt, &cmp).*;
|
||||
const first = @ctz(AsciiVectorInt, bitmask);
|
||||
if (first < 16) {
|
||||
|
||||
if (@reduce(.Max, @bitCast(AsciiVectorU1, cmp)) > 0) {
|
||||
const bitmask = @ptrCast(*const AsciiVectorInt, &cmp).*;
|
||||
const first = @ctz(AsciiVectorInt, bitmask);
|
||||
return @intCast(u32, @as(u32, first) + @intCast(u32, slice.len - remaining.len));
|
||||
}
|
||||
remaining = remaining[ascii_vector_size..];
|
||||
@@ -2089,27 +2785,27 @@ test "firstNonASCII" {
|
||||
|
||||
test "firstNonASCII16" {
|
||||
@setEvalBranchQuota(99999);
|
||||
const yes = std.mem.span(std.unicode.utf8ToUtf16LeStringLiteral("aspdokasdpokasdpokasd aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasd aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasd aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasd aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123"));
|
||||
const yes = std.mem.span(toUTF16Literal("aspdokasdpokasdpokasd aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasd aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasd aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasd aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123"));
|
||||
try std.testing.expectEqual(true, firstNonASCII16(@TypeOf(yes), yes) == null);
|
||||
|
||||
{
|
||||
@setEvalBranchQuota(99999);
|
||||
const no = std.mem.span(std.unicode.utf8ToUtf16LeStringLiteral("aspdokasdpokasdpokasd aspdokasdpokasdpokasdaspdoka🙂sdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123"));
|
||||
const no = std.mem.span(toUTF16Literal("aspdokasdpokasdpokasd aspdokasdpokasdpokasdaspdoka🙂sdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123"));
|
||||
try std.testing.expectEqual(@as(u32, 50), firstNonASCII16(@TypeOf(no), no).?);
|
||||
}
|
||||
{
|
||||
@setEvalBranchQuota(99999);
|
||||
const no = std.mem.span(std.unicode.utf8ToUtf16LeStringLiteral("🙂sdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123"));
|
||||
const no = std.mem.span(toUTF16Literal("🙂sdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123"));
|
||||
try std.testing.expectEqual(@as(u32, 0), firstNonASCII16(@TypeOf(no), no).?);
|
||||
}
|
||||
{
|
||||
@setEvalBranchQuota(99999);
|
||||
const no = std.mem.span(std.unicode.utf8ToUtf16LeStringLiteral("a🙂sdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123"));
|
||||
const no = std.mem.span(toUTF16Literal("a🙂sdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123"));
|
||||
try std.testing.expectEqual(@as(u32, 1), firstNonASCII16(@TypeOf(no), no).?);
|
||||
}
|
||||
{
|
||||
@setEvalBranchQuota(99999);
|
||||
const no = std.mem.span(std.unicode.utf8ToUtf16LeStringLiteral("aspdokasdpokasdpokasd aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd12312🙂3"));
|
||||
const no = std.mem.span(toUTF16Literal("aspdokasdpokasdpokasd aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd12312🙂3"));
|
||||
try std.testing.expectEqual(@as(u32, 366), firstNonASCII16(@TypeOf(no), no).?);
|
||||
}
|
||||
}
|
||||
@@ -2147,7 +2843,7 @@ pub fn formatUTF16(slice_: []align(1) const u16, writer: anytype) !void {
|
||||
|
||||
test "print UTF16" {
|
||||
var err = std.io.getStdErr();
|
||||
const utf16 = comptime std.unicode.utf8ToUtf16LeStringLiteral("❌ ✅ opkay ");
|
||||
const utf16 = comptime toUTF16Literal("❌ ✅ opkay ");
|
||||
try formatUTF16(utf16, err.writer());
|
||||
// std.unicode.fmtUtf16le(utf16le: []const u16)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user