Most of Buffer.toString

This commit is contained in:
Jarred Sumner
2022-04-26 01:12:28 -07:00
parent 6590d1f8bf
commit 77fbfb3fbb
10 changed files with 288 additions and 11 deletions

View File

@@ -0,0 +1,12 @@
import { describe, it, expect } from "bun:test";
it("buffer", () => {
var buf = new Buffer(1024);
expect(buf.write("hello world ")).toBe(12);
expect(buf.toString("utf8", 0, "hello world ".length)).toBe("hello world ");
expect(buf.toString("base64url", 0, "hello world ".length)).toBe(
btoa("hello world ")
);
expect(buf instanceof Uint8Array).toBe(true);
expect(buf instanceof Buffer).toBe(true);
});

View File

@@ -51,3 +51,5 @@ pub const urlsafe = std.base64.Base64DecoderWithIgnore.init(
null,
"= \t\r\n" ++ [_]u8{ std.ascii.control_code.VT, std.ascii.control_code.FF },
);
pub const urlsafeEncoder = std.base64.url_safe_no_pad.Encoder;

View File

@@ -10,6 +10,14 @@
#include "BufferEncodingType.h"
#include "JavaScriptCore/GenericTypedArrayView.h"
extern "C" JSC__JSValue Bun__encoding__toStringUTF16(const uint8_t* input, size_t len, JSC__JSGlobalObject* globalObject);
extern "C" JSC__JSValue Bun__encoding__toStringUTF8(const uint8_t* input, size_t len, JSC__JSGlobalObject* globalObject);
extern "C" JSC__JSValue Bun__encoding__toStringASCII(const uint8_t* input, size_t len, JSC__JSGlobalObject* globalObject);
extern "C" JSC__JSValue Bun__encoding__toStringLatin1(const uint8_t* input, size_t len, JSC__JSGlobalObject* globalObject);
extern "C" JSC__JSValue Bun__encoding__toStringHex(const uint8_t* input, size_t len, JSC__JSGlobalObject* globalObject);
extern "C" JSC__JSValue Bun__encoding__toStringBase64(const uint8_t* input, size_t len, JSC__JSGlobalObject* globalObject);
extern "C" JSC__JSValue Bun__encoding__toStringURLSafeBase64(const uint8_t* input, size_t len, JSC__JSGlobalObject* globalObject);
namespace WebCore {
class Buffer final : public RefCounted<Buffer> {

View File

@@ -568,7 +568,107 @@ static inline JSC::EncodedJSValue jsBufferPrototypeFunction_swap64Body(JSC::JSGl
static inline JSC::EncodedJSValue jsBufferPrototypeFunction_toStringBody(JSC::JSGlobalObject* lexicalGlobalObject, JSC::CallFrame* callFrame, typename IDLOperation<JSBuffer>::ClassParameter castedThis)
{
auto& vm = JSC::getVM(lexicalGlobalObject);
return JSC::JSValue::encode(jsUndefined());
uint32_t offset = 0;
uint32_t length = castedThis->length();
WebCore::BufferEncodingType encoding = WebCore::BufferEncodingType::utf8;
if (length == 0)
return JSC::JSValue::encode(JSC::jsEmptyString(vm));
auto scope = DECLARE_THROW_SCOPE(vm);
switch (callFrame->argumentCount()) {
case 0: {
break;
}
case 2:
case 3:
case 1: {
JSC::JSValue arg1 = callFrame->uncheckedArgument(0);
std::optional<BufferEncodingType> encoded = parseEnumeration<BufferEncodingType>(*lexicalGlobalObject, arg1);
if (!encoded) {
throwTypeError(lexicalGlobalObject, scope, "Invalid encoding");
return JSC::JSValue::encode(jsUndefined());
}
encoding = encoded.value();
if (callFrame->argumentCount() == 1)
break;
}
// any
case 5: {
JSC::JSValue arg2 = callFrame->uncheckedArgument(1);
int32_t ioffset = arg2.toInt32(lexicalGlobalObject);
if (ioffset < 0) {
throwTypeError(lexicalGlobalObject, scope, "Offset must be a positive integer");
return JSC::JSValue::encode(jsUndefined());
}
offset = static_cast<uint32_t>(ioffset);
if (callFrame->argumentCount() == 2)
break;
}
default: {
length = static_cast<uint32_t>(callFrame->argument(2).toInt32(lexicalGlobalObject));
break;
}
}
length -= std::min(offset, length);
if (UNLIKELY(length == 0)) {
RELEASE_AND_RETURN(scope, JSC::JSValue::encode(JSC::jsEmptyString(vm)));
}
JSC::EncodedJSValue ret = 0;
switch (encoding) {
case WebCore::BufferEncodingType::buffer:
case WebCore::BufferEncodingType::utf8: {
ret = Bun__encoding__toStringUTF8(castedThis->typedVector() + offset, length, lexicalGlobalObject);
break;
}
case WebCore::BufferEncodingType::latin1:
case WebCore::BufferEncodingType::ascii: {
ret = Bun__encoding__toStringASCII(castedThis->typedVector() + offset, length, lexicalGlobalObject);
break;
}
case WebCore::BufferEncodingType::ucs2:
case WebCore::BufferEncodingType::utf16le: {
ret = Bun__encoding__toStringUTF16(castedThis->typedVector() + offset, length, lexicalGlobalObject);
break;
}
case WebCore::BufferEncodingType::base64: {
ret = Bun__encoding__toStringBase64(castedThis->typedVector() + offset, length, lexicalGlobalObject);
break;
}
case WebCore::BufferEncodingType::base64url: {
ret = Bun__encoding__toStringURLSafeBase64(castedThis->typedVector() + offset, length, lexicalGlobalObject);
break;
}
case WebCore::BufferEncodingType::hex: {
ret = Bun__encoding__toStringHex(castedThis->typedVector() + offset, length, lexicalGlobalObject);
break;
}
default: {
throwTypeError(lexicalGlobalObject, scope, "Unsupported encoding? This shouldn't happen");
break;
}
}
JSC::JSValue retValue = JSC::JSValue::decode(ret);
if (UNLIKELY(!retValue.isString())) {
scope.throwException(lexicalGlobalObject, retValue);
return JSC::JSValue::encode(jsUndefined());
}
RELEASE_AND_RETURN(scope, JSC::JSValue::encode(retValue));
}
static inline JSC::EncodedJSValue jsBufferPrototypeFunction_writeBody(JSC::JSGlobalObject* lexicalGlobalObject, JSC::CallFrame* callFrame, typename IDLOperation<JSBuffer>::ClassParameter castedThis)
{

View File

@@ -3463,6 +3463,7 @@ pub const StringView = extern struct {
pub const WTF = struct {
extern fn WTF__copyLCharsFromUCharSource(dest: [*]u8, source: *const anyopaque, len: usize) void;
extern fn WTF__toBase64URLStringValue(bytes: [*]const u8, length: usize, globalObject: *JSGlobalObject) JSValue;
/// This uses SSE2 instructions and/or ARM NEON to copy 16-bit characters efficiently
/// See wtf/Text/ASCIIFastPath.h for details
@@ -3472,6 +3473,12 @@ pub const WTF = struct {
// This is any alignment
WTF__copyLCharsFromUCharSource(destination, source.ptr, source.len);
}
/// Encode a byte array to a URL-safe base64 string for use with JS
/// Memory is managed by JavaScriptCore instead of us
pub fn toBase64URLStringValue(bytes: []const u8, globalObject: *JSGlobalObject) JSValue {
return WTF__toBase64URLStringValue(bytes.ptr, bytes.len, globalObject);
}
};
pub const Callback = struct {

View File

@@ -217,4 +217,5 @@ extern "C" int64_t Bun__encoding__writeLatin1AsUTF8(const unsigned char* ptr, si
extern "C" int64_t Bun__encoding__writeUTF16AsUTF8(const UChar* ptr, size_t len, unsigned char* to, size_t other_len);
extern "C" int64_t Bun__encoding__writeLatin1AsASCII(const unsigned char* ptr, size_t len, unsigned char* to, size_t other_len);
extern "C" int64_t Bun__encoding__writeUTF16AsASCII(const UChar* ptr, size_t len, unsigned char* to, size_t other_len);
#endif

View File

@@ -1,6 +1,14 @@
#include "wtf-bindings.h"
#include "wtf/text/Base64.h"
extern "C" void WTF__copyLCharsFromUCharSource(LChar* destination, const UChar* source, size_t length)
{
WTF::copyLCharsFromUCharSource(destination, source, length);
}
extern "C" JSC::EncodedJSValue WTF__toBase64URLStringValue(const uint8_t* bytes, size_t length, JSC::JSGlobalObject* globalObject)
{
WTF::String string = WTF::base64URLEncodeToString(reinterpret_cast<const LChar*>(bytes), static_cast<unsigned int>(length));
string.impl()->ref();
return JSC::JSValue::encode(JSC::jsString(globalObject->vm(), string.impl()));
}

View File

@@ -3,4 +3,5 @@
#include "root.h"
#include "wtf/text/ASCIIFastPath.h"
extern "C" void WTF__copyLCharsFromUCharSource(LChar* destination, const UChar* source, size_t length);
extern "C" void WTF__copyLCharsFromUCharSource(LChar* destination, const UChar* source, size_t length);
extern "C" JSC::EncodedJSValue WTF__toBase64URLStringValue(const uint8_t* bytes, size_t length, JSC::JSGlobalObject* globalObject);

View File

@@ -688,11 +688,99 @@ pub const Encoder = struct {
return writeU8(input, len, to, to_len, .ascii);
}
export fn Bun__encoding__toStringUTF16(input: [*]const u8, len: usize, globalObject: *JSC.JSGlobalObject) JSValue {
return toString(input, len, globalObject, JSC.Node.Encoding.utf16le);
}
export fn Bun__encoding__toStringUTF8(input: [*]const u8, len: usize, globalObject: *JSC.JSGlobalObject) JSValue {
return toString(input, len, globalObject, .utf8);
}
export fn Bun__encoding__toStringASCII(input: [*]const u8, len: usize, globalObject: *JSC.JSGlobalObject) JSValue {
return toString(input, len, globalObject, .ascii);
}
export fn Bun__encoding__toStringLatin1(input: [*]const u8, len: usize, globalObject: *JSC.JSGlobalObject) JSValue {
return toString(input, len, globalObject, .latin1);
}
export fn Bun__encoding__toStringHex(input: [*]const u8, len: usize, globalObject: *JSC.JSGlobalObject) JSValue {
return toString(input, len, globalObject, .hex);
}
export fn Bun__encoding__toStringBase64(input: [*]const u8, len: usize, globalObject: *JSC.JSGlobalObject) JSValue {
return toString(input, len, globalObject, .base64);
}
export fn Bun__encoding__toStringURLSafeBase64(input: [*]const u8, len: usize, globalObject: *JSC.JSGlobalObject) JSValue {
return toString(input, len, globalObject, .base64url);
}
// pub fn writeUTF16AsUTF8(utf16: [*]const u16, len: usize, to: [*]u8, to_len: usize) callconv(.C) i32 {
// return @intCast(i32, strings.copyUTF16IntoUTF8(to[0..to_len], []const u16, utf16[0..len]).written);
// }
// pub fn toString(input: [*]const u8, len: usize, zig_str: *ZigString, comptime encoding: JSC.Node.Encoding) callconv(.C) i64 {}
pub fn toString(input_ptr: [*]const u8, len: usize, global: *JSGlobalObject, comptime encoding: JSC.Node.Encoding) JSValue {
if (len == 0)
return ZigString.Empty.toValue(global);
const input = input_ptr[0..len];
const allocator = VirtualMachine.vm.allocator;
switch (comptime encoding) {
.latin1, .ascii => {
var to = allocator.alloc(u8, len) catch return ZigString.init("Out of memory").toErrorInstance(global);
@memcpy(to.ptr, input_ptr, to.len);
// Hoping this gets auto vectorized
for (to[0..to.len]) |c, i| {
to[i] = @as(u8, @truncate(u7, c));
}
return ZigString.init(to).toExternalValue(global);
},
.buffer, .utf8 => {
// JSC only supports UTF-16 strings for non-ascii text
const converted = strings.toUTF16Alloc(allocator, input, false) catch return ZigString.init("Out of memory").toErrorInstance(global);
if (converted) |utf16| {
return ZigString.toExternalU16(utf16.ptr, utf16.len, global);
}
// If we get here, it means we can safely assume the string is 100% ASCII characters
// For this, we rely on the GC to manage the memory to minimize potential for memory leaks
return ZigString.init(input).toValueGC(global);
},
// potentially convert UTF-16 to UTF-8
JSC.Node.Encoding.ucs2, JSC.Node.Encoding.utf16le => {
const converted = strings.toUTF16Alloc(allocator, input, false) catch return ZigString.init("Out of memory").toErrorInstance(global);
if (converted) |utf16| {
return ZigString.toExternalU16(utf16.ptr, utf16.len, global);
}
var output = allocator.alloc(u8, input.len) catch return ZigString.init("Out of memory").toErrorInstance(global);
JSC.WTF.copyLCharsFromUCharSource(output.ptr, []align(1) const u16, @ptrCast([*]align(1) const u16, input.ptr)[0 .. input.len / 2]);
return ZigString.init(output).toExternalValue(global);
},
JSC.Node.Encoding.hex => {
var output = allocator.alloc(u8, input.len * 2) catch return ZigString.init("Out of memory").toErrorInstance(global);
const wrote = strings.encodeBytesToHex(output, input);
std.debug.assert(wrote == output.len);
var val = ZigString.init(output);
val.mark();
return val.toExternalValue(global);
},
JSC.Node.Encoding.base64url => {
return JSC.WTF.toBase64URLStringValue(input, global);
},
JSC.Node.Encoding.base64 => {
const to_len = bun.base64.encodeLen(input);
var to = allocator.alloc(u8, to_len) catch return ZigString.init("Out of memory").toErrorInstance(global);
const wrote = bun.base64.encode(to, input);
return ZigString.init(to[0..wrote]).toExternalValue(global);
},
}
}
pub fn writeU8(input: [*]const u8, len: usize, to: [*]u8, to_len: usize, comptime encoding: JSC.Node.Encoding) i64 {
if (len == 0 or to_len == 0)
@@ -706,14 +794,26 @@ pub const Encoder = struct {
// if (comptime encoding.isBinaryToText()) {}
switch (comptime encoding) {
JSC.Node.Encoding.ascii => {
const written = @truncate(u32, @minimum(len, to_len));
JSC.Node.Encoding.buffer => {
const written = @minimum(len, to_len);
@memcpy(to, input, written);
return @intCast(i32, written);
return @intCast(i64, written);
},
.latin1, .ascii => {
const written = @minimum(len, to_len);
@memcpy(to, input, written);
// Hoping this gets auto vectorized
for (to[0..written]) |c, i| {
to[i] = @as(u8, @truncate(u7, c));
}
return @intCast(i64, written);
},
.utf8 => {
// need to encode
return @intCast(i32, strings.copyLatin1IntoUTF8(to[0..to_len], []const u8, input[0..len]).written);
return @intCast(i64, strings.copyLatin1IntoUTF8(to[0..to_len], []const u8, input[0..len]).written);
},
// encode latin1 into UTF16
JSC.Node.Encoding.ucs2, JSC.Node.Encoding.utf16le => {
@@ -762,7 +862,7 @@ pub const Encoder = struct {
return @intCast(i64, bun.base64.decode(to[0..outlen], slice).written);
},
else => return 0,
// else => return 0,
}
}
@@ -780,7 +880,7 @@ pub const Encoder = struct {
return @intCast(i32, strings.copyUTF16IntoUTF8(to[0..to_len], []const u16, input[0..len]).written);
},
// string is already encoded, just need to copy the data
JSC.Node.Encoding.ucs2, JSC.Node.Encoding.ascii, JSC.Node.Encoding.utf16le => {
JSC.Node.Encoding.ascii, JSC.Node.Encoding.ucs2, JSC.Node.Encoding.buffer, JSC.Node.Encoding.utf16le => {
strings.copyU16IntoU8(to[0..to_len], []const u16, input[0..len]);
return @intCast(i64, @minimum(len, to_len));
@@ -868,6 +968,14 @@ pub const Encoder = struct {
_ = Bun__encoding__writeUTF16AsUTF8;
_ = Bun__encoding__writeLatin1AsASCII;
_ = Bun__encoding__writeUTF16AsASCII;
_ = Bun__encoding__toStringUTF16;
_ = Bun__encoding__toStringUTF8;
_ = Bun__encoding__toStringASCII;
_ = Bun__encoding__toStringLatin1;
_ = Bun__encoding__toStringHex;
_ = Bun__encoding__toStringBase64;
_ = Bun__encoding__toStringURLSafeBase64;
}
}
};

View File

@@ -4,6 +4,7 @@ const Environment = @import("./env.zig");
const string = @import("string_types.zig").string;
const stringZ = @import("string_types.zig").stringZ;
const CodePoint = @import("string_types.zig").CodePoint;
const bun = @import("global.zig");
const assert = std.debug.assert;
pub inline fn containsChar(self: string, char: u8) bool {
return indexOfChar(self, char) != null;
@@ -809,9 +810,9 @@ pub inline fn copyU16IntoU8(output_: []u8, comptime InputType: type, input_: Inp
const strings = @This();
/// If there are non-ascii characters in the string, this encodes UTF-8 into a new UTF-16 string.
/// Convert a UTF-8 string to a UTF-16 string IF there are any non-ascii characters
/// If there are no non-ascii characters, this returns null
/// This is intended to be used for strings that go to
/// This is intended to be used for strings that go to JavaScript
pub fn toUTF16Alloc(allocator: std.mem.Allocator, bytes: []const u8, comptime fail_if_invalid: bool) !?[]u16 {
if (strings.firstNonASCII(bytes)) |i| {
const ascii = bytes[0..i];
@@ -1715,6 +1716,22 @@ pub fn decodeHexToBytes(destination: []u8, comptime Char: type, source: []const
return destination.len - remain.len;
}
pub fn encodeBytesToHex(destination: []u8, source: []const u8) usize {
std.debug.assert(destination.len > 0);
std.debug.assert(source.len > 0);
const to_write = if (destination.len < source.len * 2)
destination.len - destination.len % 2
else
source.len * 2;
const to_read = to_write / 2;
const formatter = std.fmt.fmtSliceHexLower(source[0..to_read]);
const written = std.fmt.bufPrint(destination, "{}", .{formatter}) catch unreachable;
return written.len;
}
test "decodeHexToBytes" {
var buffer = std.mem.zeroes([1024]u8);
for (buffer) |_, i| {
@@ -1730,6 +1747,19 @@ test "decodeHexToBytes" {
try std.testing.expectEqualSlices(u8, &buffer, ours_buf[0..ours]);
}
// test "formatBytesToHex" {
// var buffer = std.mem.zeroes([1024]u8);
// for (buffer) |_, i| {
// buffer[i] = @truncate(u8, i % 256);
// }
// var written: [2048]u8 = undefined;
// var hex = std.fmt.bufPrint(&written, "{}", .{std.fmt.fmtSliceHexLower(&buffer)}) catch unreachable;
// var ours_buf: [4096]u8 = undefined;
// // var ours = formatBytesToHex(&ours_buf, &buffer);
// // try std.testing.expectEqualSlices(u8, match, ours_buf[0..ours]);
// try std.testing.expectEqualSlices(u8, &buffer, ours_buf[0..ours]);
// }
pub fn trimLeadingChar(slice: []const u8, char: u8) []const u8 {
if (indexOfNotChar(slice, char)) |i| {
return slice[i..];