some of buffer

This commit is contained in:
Jarred Sumner
2022-04-25 07:09:18 -07:00
parent 4b4df5095e
commit 2c6e5c3fc4
43 changed files with 3645 additions and 87 deletions

View File

@@ -719,10 +719,18 @@ pub fn copyU8IntoU16WithAlignment(comptime alignment: u21, output_: []align(alig
if (comptime Environment.allow_assert) {
std.debug.assert(input.len <= output.len);
}
while (input.len >= word) {
appendUTF8MachineWordToUTF16MachineWordUnaligned(alignment, output[0..word], input[0..word]);
output = output[word..];
input = input[word..];
// un-aligned data access is slow
// so we attempt to align the data
while (!std.mem.isAligned(@ptrToInt(output.ptr), @alignOf(u16)) and input.len >= word) {
output[0] = input[0];
output = output[1..];
input = input[1..];
}
if (std.mem.isAligned(@ptrToInt(output.ptr), @alignOf(u16)) and input.len > 0) {
copyU8IntoU16(@alignCast(@alignOf(u16), output.ptr)[0..output.len], input);
return;
}
for (input) |c, i| {
@@ -758,28 +766,33 @@ pub fn copyU8IntoU16WithAlignment(comptime alignment: u21, output_: []align(alig
// }
pub inline fn copyU16IntoU8(output_: []u8, comptime InputType: type, input_: InputType) void {
var output = output_;
var input = input_;
if (comptime Environment.allow_assert) {
std.debug.assert(input.len <= output.len);
std.debug.assert(input_.len <= output_.len);
}
// on X64, this is 4
// on WASM, this is 2
const machine_word_length = comptime @sizeOf(usize) / @sizeOf(u16);
if (comptime !JSC.is_bindgen) {
JSC.WTF.copyLCharsFromUCharSource(output_.ptr, InputType, input_);
} else {
var output = output_;
var input = input_;
while (input.len >= machine_word_length) {
comptime var machine_word_i: usize = 0;
inline while (machine_word_i < machine_word_length) : (machine_word_i += 1) {
output[machine_word_i] = @intCast(u8, input[machine_word_i]);
// on X64, this is 4
// on WASM, this is 2
const machine_word_length = comptime @sizeOf(usize) / @sizeOf(u16);
while (input.len >= machine_word_length) {
comptime var machine_word_i: usize = 0;
inline while (machine_word_i < machine_word_length) : (machine_word_i += 1) {
output[machine_word_i] = @intCast(u8, input[machine_word_i]);
}
output = output[machine_word_length..];
input = input[machine_word_length..];
}
output = output[machine_word_length..];
input = input[machine_word_length..];
}
for (input) |c, i| {
output[i] = @intCast(u8, c);
for (input) |c, i| {
output[i] = @intCast(u8, c);
}
}
}
@@ -972,7 +985,7 @@ pub fn allocateLatin1IntoUTF8(allocator: std.mem.Allocator, comptime Type: type,
if (first < 16) {
latin1 = latin1[(comptime count * ascii_vector_size)..];
list.items.len += (comptime count * ascii_vector_size);
try list.appendSlice(latin1[0..first]);
list.appendSliceAssumeCapacity(latin1[0..first]);
latin1 = latin1[first..];
break_outer = true;
break :outer;
@@ -1152,6 +1165,29 @@ pub fn copyLatin1IntoUTF8(buf_: []u8, comptime Type: type, latin1_: Type) Encode
};
}
const JSC = @import("javascript_core");
pub fn copyLatin1IntoUTF16(comptime Buffer: type, buf_: Buffer, comptime Type: type, latin1_: Type) EncodeIntoResult {
var buf = buf_;
var latin1 = latin1_;
while (buf.len > 0 and latin1.len > 0) {
var to_write = strings.firstNonASCII(latin1) orelse @truncate(u32, latin1.len);
strings.copyU8IntoU16WithAlignment(std.meta.alignment(Buffer), buf, latin1[0..to_write]);
latin1 = latin1[to_write..];
buf = buf[to_write..];
if (latin1.len > 0 and buf.len >= 2) {
buf[0..2].* = latin1ToCodepointBytesAssumeNotASCII16(latin1[0]);
latin1 = latin1[1..];
buf = buf[2..];
}
}
return .{
.read = @truncate(u32, buf_.len - buf.len),
.written = @truncate(u32, latin1_.len - latin1.len),
};
}
test "copyLatin1IntoUTF8" {
var input: string = "hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!";
var output = std.mem.zeroes([500]u8);
@@ -1172,13 +1208,21 @@ pub fn latin1ToCodepointAssumeNotASCII(char: u8, comptime CodePointType: type) C
);
}
pub fn latin1ToCodepointBytesAssumeNotASCII(char: u32) [2]u8 {
return [2]u8{
@truncate(u8, 0xc0 | char >> 6),
@truncate(u8, 0x80 | (char & 0x3f)),
pub fn latin1ToCodepointBytesAssumeNotASCIIWIthCharType(comptime Char: type, char: u32) [2]Char {
return [2]Char{
@as(Char, @truncate(u8, 0xc0 | char >> 6)),
@as(Char, @truncate(u8, 0x80 | (char & 0x3f))),
};
}
pub fn latin1ToCodepointBytesAssumeNotASCII(char: u32) [2]u8 {
return latin1ToCodepointBytesAssumeNotASCIIWIthCharType(u8, char);
}
pub fn latin1ToCodepointBytesAssumeNotASCII16(char: u32) [2]u16 {
return latin1ToCodepointBytesAssumeNotASCIIWIthCharType(u16, char);
}
pub fn copyUTF16IntoUTF8(buf: []u8, comptime Type: type, utf16: Type) EncodeIntoResult {
var remaining = buf;
var utf16_remaining = utf16;
@@ -1661,11 +1705,72 @@ pub fn indexOfNotChar(slice: []const u8, char: u8) ?u32 {
return null;
}
const hex_table: [255]u8 = brk: {
var values: [255]u8 = [_]u8{0} ** 255;
values['0'] = 0;
values['1'] = 1;
values['2'] = 2;
values['3'] = 3;
values['4'] = 4;
values['5'] = 5;
values['6'] = 6;
values['7'] = 7;
values['8'] = 8;
values['9'] = 9;
values['A'] = 10;
values['B'] = 11;
values['C'] = 12;
values['D'] = 13;
values['E'] = 14;
values['F'] = 15;
values['a'] = 10;
values['b'] = 11;
values['c'] = 12;
values['d'] = 13;
values['e'] = 14;
values['f'] = 15;
break :brk values;
};
pub fn decodeHexToBytes(destination: []u8, comptime Char: type, source: []const Char) usize {
var remain = destination;
var input = source;
while (input.len > 1 and remain.len > 0) {
const int = input[0..2].*;
const a = hex_table[@truncate(u8, int[0])];
const b = hex_table[@truncate(u8, int[1])];
if (a == 255 or b == 255) {
break;
}
remain[0] = a << 4 | b;
remain = remain[1..];
input = input[2..];
}
return destination.len - remain.len;
}
test "decodeHexToBytes" {
var buffer = std.mem.zeroes([1024]u8);
for (buffer) |_, i| {
buffer[i] = @truncate(u8, i % 256);
}
var written: [2048]u8 = undefined;
var hex = std.fmt.bufPrint(&written, "{}", .{std.fmt.fmtSliceHexLower(&buffer)}) catch unreachable;
var good: [4096]u8 = undefined;
var ours_buf: [4096]u8 = undefined;
var match = try std.fmt.hexToBytes(good[0..1024], hex);
var ours = decodeHexToBytes(&ours_buf, hex);
try std.testing.expectEqualSlices(u8, match, ours_buf[0..ours]);
try std.testing.expectEqualSlices(u8, &buffer, ours_buf[0..ours]);
}
pub fn trimLeadingChar(slice: []const u8, char: u8) []const u8 {
if (indexOfNotChar(slice, char)) |i| {
return slice[i..];
}
return "";
}
@@ -2028,9 +2133,10 @@ pub fn containsNonBmpCodePoint(text: string) bool {
}
// this is std.mem.trim except it doesn't forcibly change the slice to be const
pub fn trim(slice: anytype, values_to_strip: []const u8) @TypeOf(slice) {
pub fn trim(slice: anytype, comptime values_to_strip: []const u8) @TypeOf(slice) {
var begin: usize = 0;
var end: usize = slice.len;
while (begin < end and std.mem.indexOfScalar(u8, values_to_strip, slice[begin]) != null) : (begin += 1) {}
while (end > begin and std.mem.indexOfScalar(u8, values_to_strip, slice[end - 1]) != null) : (end -= 1) {}
return slice[begin..end];