[bun.js] Implement Buffer.byteLength

This commit is contained in:
Jarred Sumner
2022-05-10 19:09:28 -07:00
parent b3760cd723
commit 8def37c14e
7 changed files with 443 additions and 47 deletions

View File

@@ -360,7 +360,108 @@ static inline JSC::EncodedJSValue jsBufferConstructorFunction_allocUnsafeSlowBod
static inline JSC::EncodedJSValue jsBufferConstructorFunction_byteLengthBody(JSC::JSGlobalObject* lexicalGlobalObject, JSC::CallFrame* callFrame, typename IDLOperation<JSBuffer>::ClassParameter castedThis)
{
auto& vm = JSC::getVM(lexicalGlobalObject);
return JSValue::encode(jsUndefined());
uint32_t offset = 0;
uint32_t length = castedThis->length();
WebCore::BufferEncodingType encoding = WebCore::BufferEncodingType::utf8;
auto scope = DECLARE_THROW_SCOPE(vm);
if (UNLIKELY(callFrame->argumentCount() == 0)) {
throwTypeError(lexicalGlobalObject, scope, "Not enough arguments"_s);
return JSC::JSValue::encode(jsUndefined());
}
EnsureStillAliveScope arg0 = callFrame->argument(0);
auto input = arg0.value();
if (JSC::JSArrayBufferView* view = JSC::jsDynamicCast<JSC::JSArrayBufferView*>(input)) {
RELEASE_AND_RETURN(scope, JSValue::encode(JSC::jsNumber(view->byteLength())));
}
auto* str = arg0.value().toStringOrNull(lexicalGlobalObject);
if (!str) {
throwTypeError(lexicalGlobalObject, scope, "byteLength() expects a string"_s);
return JSC::JSValue::encode(jsUndefined());
}
EnsureStillAliveScope arg1 = callFrame->argument(1);
if (str->length() == 0)
return JSC::JSValue::encode(JSC::jsNumber(0));
if (callFrame->argumentCount() > 1) {
if (arg1.value().isString()) {
std::optional<BufferEncodingType> encoded = parseEnumeration<BufferEncodingType>(*lexicalGlobalObject, arg1.value());
if (!encoded) {
throwTypeError(lexicalGlobalObject, scope, "Invalid encoding"_s);
return JSC::JSValue::encode(jsUndefined());
}
encoding = encoded.value();
}
}
auto view = str->tryGetValue(lexicalGlobalObject);
int64_t written = 0;
switch (encoding) {
case WebCore::BufferEncodingType::utf8: {
if (view.is8Bit()) {
written = Bun__encoding__byteLengthLatin1AsUTF8(view.characters8(), view.length());
} else {
written = Bun__encoding__byteLengthUTF16AsUTF8(view.characters16(), view.length());
}
break;
}
case WebCore::BufferEncodingType::latin1:
case WebCore::BufferEncodingType::ascii: {
if (view.is8Bit()) {
written = Bun__encoding__byteLengthLatin1AsASCII(view.characters8(), view.length());
} else {
written = Bun__encoding__byteLengthUTF16AsASCII(view.characters16(), view.length());
}
break;
}
case WebCore::BufferEncodingType::ucs2:
case WebCore::BufferEncodingType::utf16le: {
if (view.is8Bit()) {
written = Bun__encoding__byteLengthLatin1AsUTF16(view.characters8(), view.length());
} else {
written = Bun__encoding__byteLengthUTF16AsUTF16(view.characters16(), view.length());
}
break;
}
case WebCore::BufferEncodingType::base64: {
if (view.is8Bit()) {
written = Bun__encoding__byteLengthLatin1AsBase64(view.characters8(), view.length());
} else {
written = Bun__encoding__byteLengthUTF16AsBase64(view.characters16(), view.length());
}
break;
}
case WebCore::BufferEncodingType::base64url: {
if (view.is8Bit()) {
written = Bun__encoding__byteLengthLatin1AsURLSafeBase64(view.characters8(), view.length());
} else {
written = Bun__encoding__byteLengthUTF16AsURLSafeBase64(view.characters16(), view.length());
}
break;
}
case WebCore::BufferEncodingType::hex: {
if (view.is8Bit()) {
written = Bun__encoding__byteLengthLatin1AsHex(view.characters8(), view.length());
} else {
written = Bun__encoding__byteLengthUTF16AsHex(view.characters16(), view.length());
}
break;
}
}
RELEASE_AND_RETURN(scope, JSC::JSValue::encode(JSC::jsNumber(written)));
}
static inline JSC::EncodedJSValue jsBufferConstructorFunction_compareBody(JSC::JSGlobalObject* lexicalGlobalObject, JSC::CallFrame* callFrame, typename IDLOperation<JSBuffer>::ClassParameter castedThis)

View File

@@ -1,4 +1,4 @@
//-- AUTOGENERATED FILE -- 1651982852
//-- AUTOGENERATED FILE -- 1652089399
// clang-format off
#pragma once

View File

@@ -218,6 +218,19 @@ extern "C" int64_t Bun__encoding__writeUTF16AsUTF8(const UChar* ptr, size_t len,
extern "C" int64_t Bun__encoding__writeLatin1AsASCII(const unsigned char* ptr, size_t len, unsigned char* to, size_t other_len);
extern "C" int64_t Bun__encoding__writeUTF16AsASCII(const UChar* ptr, size_t len, unsigned char* to, size_t other_len);
extern "C" size_t Bun__encoding__byteLengthLatin1AsHex(const unsigned char* ptr, size_t len);
extern "C" size_t Bun__encoding__byteLengthUTF16AsHex(const UChar* ptr, size_t len);
extern "C" size_t Bun__encoding__byteLengthLatin1AsURLSafeBase64(const unsigned char* ptr, size_t len);
extern "C" size_t Bun__encoding__byteLengthUTF16AsURLSafeBase64(const UChar* ptr, size_t len);
extern "C" size_t Bun__encoding__byteLengthLatin1AsBase64(const unsigned char* ptr, size_t len);
extern "C" size_t Bun__encoding__byteLengthUTF16AsBase64(const UChar* ptr, size_t len);
extern "C" size_t Bun__encoding__byteLengthLatin1AsUTF16(const unsigned char* ptr, size_t len);
extern "C" size_t Bun__encoding__byteLengthUTF16AsUTF16(const UChar* ptr, size_t len);
extern "C" size_t Bun__encoding__byteLengthLatin1AsUTF8(const unsigned char* ptr, size_t len);
extern "C" size_t Bun__encoding__byteLengthUTF16AsUTF8(const UChar* ptr, size_t len);
extern "C" size_t Bun__encoding__byteLengthLatin1AsASCII(const unsigned char* ptr, size_t len);
extern "C" size_t Bun__encoding__byteLengthUTF16AsASCII(const UChar* ptr, size_t len);
extern "C" int64_t Bun__encoding__constructFromLatin1AsHex(void*, const unsigned char* ptr, size_t len);
extern "C" int64_t Bun__encoding__constructFromUTF16AsHex(void*, const UChar* ptr, size_t len);
extern "C" int64_t Bun__encoding__constructFromLatin1AsURLSafeBase64(void*, const unsigned char* ptr, size_t len);

View File

@@ -1,5 +1,5 @@
// clang-format: off
//-- AUTOGENERATED FILE -- 1651982852
//-- AUTOGENERATED FILE -- 1652089399
#pragma once
#include <stddef.h>

View File

@@ -55,13 +55,12 @@ namespace Napi {
JSC::SourceCode generateSourceCode(WTF::String keyString, JSC::VM& vm, JSC::JSObject* object, JSC::JSGlobalObject* globalObject)
{
JSC::gcProtect(object);
JSC::JSArray* exportKeys = ownPropertyKeys(globalObject, object, PropertyNameMode::StringsAndSymbols, DontEnumPropertiesMode::Include, std::nullopt);
auto symbol = vm.symbolRegistry().symbolForKey("__BunTemporaryGlobal"_s);
JSC::Identifier ident = JSC::Identifier::fromUid(symbol);
JSC::Identifier ident = JSC::Identifier::fromString(vm, "__BunTemporaryGlobal"_s);
WTF::StringBuilder sourceCodeBuilder = WTF::StringBuilder();
// TODO: handle symbol collision
sourceCodeBuilder.append("var $$TempSymbol = Symbol.for('__BunTemporaryGlobal'), $$NativeModule = globalThis[$$TempSymbol]; globalThis[$$TempSymbol] = null;\n if (!$$NativeModule) { throw new Error('Assertion failure: Native module not found'); }\n\n"_s);
sourceCodeBuilder.append("\nvar $$NativeModule = globalThis['__BunTemporaryGlobal']; console.log($$NativeModule); globalThis['__BunTemporaryGlobal'] = null;\n if (!$$NativeModule) { throw new Error('Assertion failure: Native module not found'); }\n\n"_s);
for (unsigned i = 0; i < exportKeys->length(); i++) {
auto key = exportKeys->getIndexQuickly(i);
@@ -207,17 +206,32 @@ static void defineNapiProperty(Zig::GlobalObject* globalObject, JSC::JSObject* t
}
WTF::String nameStr;
if (property.utf8name != nullptr) {
nameStr = WTF::String::fromUTF8(property.utf8name);
nameStr = WTF::String::fromUTF8(property.utf8name).isolatedCopy();
} else if (property.name) {
nameStr = toJS(property.name).toWTFString(globalObject);
nameStr = toJS(property.name).toWTFString(globalObject).isolatedCopy();
}
auto propertyName = JSC::PropertyName(JSC::Identifier::fromString(vm, nameStr));
if (property.method) {
auto function = Zig::JSFFIFunction::create(vm, globalObject, 1, nameStr, reinterpret_cast<Zig::FFIFunction>(property.method));
function->dataPtr = dataPtr;
JSC::JSValue value = JSC::JSValue(function);
JSC::JSValue value;
auto method = reinterpret_cast<Zig::FFIFunction>(property.method);
if (!dataPtr) {
JSC::JSNativeStdFunction* func = JSC::JSNativeStdFunction::create(
globalObject->vm(), globalObject, 1, String(), [method](JSC::JSGlobalObject* globalObject, JSC::CallFrame* callFrame) -> JSC::EncodedJSValue {
JSC::MarkedArgumentBuffer values;
values.append(callFrame->thisValue());
for (int i = 0; i < callFrame->argumentCount(); i++) {
values.append(callFrame->argument(i));
}
return method(globalObject, callFrame);
});
value = JSC::JSValue(func);
} else {
auto function = Zig::JSFFIFunction::create(vm, globalObject, 1, nameStr, method);
function->dataPtr = dataPtr;
value = JSC::JSValue(function);
}
to->putDirect(vm, propertyName, value, getPropertyAttributes(property) | JSC::PropertyAttribute::Function);
return;
@@ -233,6 +247,8 @@ static void defineNapiProperty(Zig::GlobalObject* globalObject, JSC::JSObject* t
if (getterProperty) {
JSC::JSNativeStdFunction* getterFunction = JSC::JSNativeStdFunction::create(
globalObject->vm(), globalObject, 0, String(), [getterProperty](JSC::JSGlobalObject* globalObject, JSC::CallFrame* callFrame) -> JSC::EncodedJSValue {
JSC::MarkedArgumentBufferWithSize values;
values.append(callFrame->thisValue());
return getterProperty(globalObject, callFrame);
});
getter = getterFunction;
@@ -247,8 +263,10 @@ static void defineNapiProperty(Zig::GlobalObject* globalObject, JSC::JSObject* t
if (setterProperty) {
JSC::JSNativeStdFunction* setterFunction = JSC::JSNativeStdFunction::create(
globalObject->vm(), globalObject, 1, String(), [setterProperty](JSC::JSGlobalObject* globalObject, JSC::CallFrame* callFrame) -> JSC::EncodedJSValue {
setterProperty(globalObject, callFrame);
return JSC::JSValue::encode(JSC::jsBoolean(true));
JSC::MarkedArgumentBufferWithSize values;
values.append(callFrame->thisValue());
values.append(callFrame->uncheckedArgument(0));
return setterProperty(globalObject, callFrame);
});
setter = setterFunction;
} else {
@@ -500,9 +518,9 @@ extern "C" napi_status napi_wrap(napi_env env,
extern "C" napi_status napi_unwrap(napi_env env, napi_value js_object,
void** result)
{
// if (!toJS(js_object).isObject()) {
// return NAPI_OBJECT_EXPECTED;
// }
if (!toJS(js_object).isObject()) {
return NAPI_OBJECT_EXPECTED;
}
auto* globalObject = toJS(env);
auto& vm = globalObject->vm();
auto* object = JSC::jsDynamicCast<NapiPrototype*>(toJS(js_object));
@@ -521,12 +539,27 @@ extern "C" napi_status napi_create_function(napi_env env, const char* utf8name,
{
Zig::GlobalObject* globalObject = toJS(env);
JSC::VM& vm = globalObject->vm();
auto name = WTF::String::fromUTF8(utf8name, length);
auto name = WTF::String::fromUTF8(utf8name, length == NAPI_AUTO_LENGTH ? strlen(utf8name) : length).isolatedCopy();
auto method = reinterpret_cast<Zig::FFIFunction>(cb);
if (data) {
auto function = Zig::JSFFIFunction::create(vm, globalObject, 1, name, method);
function->dataPtr = data;
*result = toNapi(JSC::JSValue(function));
} else {
JSC::JSNativeStdFunction* func = JSC::JSNativeStdFunction::create(
globalObject->vm(), globalObject, 1, String(), [method](JSC::JSGlobalObject* globalObject, JSC::CallFrame* callFrame) -> JSC::EncodedJSValue {
JSC::MarkedArgumentBuffer values;
values.append(callFrame->thisValue());
for (int i = 0; i < callFrame->argumentCount(); i++) {
values.append(callFrame->argument(i));
}
return method(globalObject, callFrame);
});
*result = toNapi(JSC::JSValue(func));
}
// std::cout << "napi_create_function: " << utf8name << std::endl;
auto function = Zig::JSFFIFunction::create(vm, globalObject, 1, name, reinterpret_cast<Zig::FFIFunction>(cb));
function->dataPtr = data;
JSC::JSValue functionValue = JSC::JSValue(function);
*reinterpret_cast<JSC::EncodedJSValue*>(result) = JSC::JSValue::encode(functionValue);
return napi_ok;
}
@@ -559,8 +592,9 @@ extern "C" napi_status napi_get_cb_info(
}
}
JSC::JSValue thisValue = callFrame->thisValue();
if (this_arg != nullptr) {
JSC::JSValue thisValue = callFrame->thisValue();
*this_arg = toNapi(thisValue);
}
@@ -568,8 +602,14 @@ extern "C" napi_status napi_get_cb_info(
JSC::JSValue callee = JSC::JSValue(callFrame->jsCallee());
if (Zig::JSFFIFunction* ffiFunction = JSC::jsDynamicCast<Zig::JSFFIFunction*>(callee)) {
*data = reinterpret_cast<void*>(ffiFunction->dataPtr);
} else if (NapiPrototype* proto = JSC::jsDynamicCast<NapiPrototype*>(callee)) {
} else if (auto* proto = JSC::jsDynamicCast<NapiPrototype*>(callee)) {
*data = proto->napiRef ? proto->napiRef->data : nullptr;
} else if (auto* proto = JSC::jsDynamicCast<NapiClass*>(callee)) {
*data = proto->dataPtr;
} else if (auto* proto = JSC::jsDynamicCast<NapiPrototype*>(thisValue)) {
*data = proto->napiRef ? proto->napiRef->data : nullptr;
} else if (auto* proto = JSC::jsDynamicCast<NapiClass*>(thisValue)) {
*data = proto->dataPtr;
} else {
*data = nullptr;
}
@@ -595,6 +635,8 @@ napi_define_properties(napi_env env, napi_value object, size_t property_count,
void* inheritedDataPtr = nullptr;
if (NapiPrototype* proto = jsDynamicCast<NapiPrototype*>(objectValue)) {
inheritedDataPtr = proto->napiRef ? proto->napiRef->data : nullptr;
} else if (NapiClass* proto = jsDynamicCast<NapiClass*>(objectValue)) {
inheritedDataPtr = proto->dataPtr;
}
for (size_t i = 0; i < property_count; i++) {
@@ -980,6 +1022,55 @@ static JSC_DEFINE_HOST_FUNCTION(NapiClass_ConstructorFunction,
callFrame->setThisValue(prototype->subclass(newTarget));
napi->constructor()(globalObject, callFrame);
size_t count = callFrame->argumentCount();
switch (count) {
case 0: {
break;
}
case 1: {
JSC::ensureStillAliveHere(callFrame->argument(0));
break;
}
case 2: {
JSC::ensureStillAliveHere(callFrame->argument(0));
JSC::ensureStillAliveHere(callFrame->argument(1));
break;
}
case 3: {
JSC::ensureStillAliveHere(callFrame->argument(0));
JSC::ensureStillAliveHere(callFrame->argument(1));
JSC::ensureStillAliveHere(callFrame->argument(2));
break;
}
case 4: {
JSC::ensureStillAliveHere(callFrame->argument(0));
JSC::ensureStillAliveHere(callFrame->argument(1));
JSC::ensureStillAliveHere(callFrame->argument(2));
JSC::ensureStillAliveHere(callFrame->argument(3));
break;
}
case 5: {
JSC::ensureStillAliveHere(callFrame->argument(0));
JSC::ensureStillAliveHere(callFrame->argument(1));
JSC::ensureStillAliveHere(callFrame->argument(2));
JSC::ensureStillAliveHere(callFrame->argument(3));
JSC::ensureStillAliveHere(callFrame->argument(4));
break;
}
default: {
JSC::ensureStillAliveHere(callFrame->argument(0));
JSC::ensureStillAliveHere(callFrame->argument(1));
JSC::ensureStillAliveHere(callFrame->argument(2));
JSC::ensureStillAliveHere(callFrame->argument(3));
JSC::ensureStillAliveHere(callFrame->argument(4));
JSC::ensureStillAliveHere(callFrame->argument(5));
for (int i = 6; i < count; i++) {
JSC::ensureStillAliveHere(callFrame->argument(i));
}
break;
}
}
RETURN_IF_EXCEPTION(scope, {});
RELEASE_AND_RETURN(scope, JSValue::encode(callFrame->thisValue()));
@@ -992,7 +1083,7 @@ NapiClass* NapiClass::create(VM& vm, Zig::GlobalObject* globalObject, const char
size_t property_count,
const napi_property_descriptor* properties)
{
WTF::String name = WTF::String::fromUTF8(utf8name, length);
WTF::String name = WTF::String::fromUTF8(utf8name, length).isolatedCopy();
NativeExecutable* executable = vm.getHostFunction(NapiClass_ConstructorFunction, NapiClass_ConstructorFunction, name);
Structure* structure = globalObject->NapiClassStructure();
@@ -1098,8 +1189,11 @@ extern "C" napi_status napi_define_class(napi_env env,
{
Zig::GlobalObject* globalObject = toJS(env);
JSC::VM& vm = globalObject->vm();
NapiClass* napiClass = NapiClass::create(vm, globalObject, utf8name, length, constructor, data, property_count, properties);
size_t len = length;
if (len == NAPI_AUTO_LENGTH) {
len = strlen(utf8name);
}
NapiClass* napiClass = NapiClass::create(vm, globalObject, utf8name, len, constructor, data, property_count, properties);
JSC::JSValue value = JSC::JSValue(napiClass);
if (data != nullptr) {
napiClass->dataPtr = data;

View File

@@ -692,6 +692,43 @@ pub const Encoder = struct {
return writeU8(input, len, to, to_len, .ascii);
}
export fn Bun__encoding__byteLengthLatin1AsHex(input: [*]const u8, len: usize) usize {
return byteLengthU8(input, len, .hex);
}
export fn Bun__encoding__byteLengthLatin1AsASCII(input: [*]const u8, len: usize) usize {
return byteLengthU8(input, len, .ascii);
}
export fn Bun__encoding__byteLengthLatin1AsURLSafeBase64(input: [*]const u8, len: usize) usize {
return byteLengthU8(input, len, .base64url);
}
export fn Bun__encoding__byteLengthLatin1AsUTF16(input: [*]const u8, len: usize) usize {
return byteLengthU8(input, len, .utf16le);
}
export fn Bun__encoding__byteLengthLatin1AsUTF8(input: [*]const u8, len: usize) usize {
return byteLengthU8(input, len, .utf8);
}
export fn Bun__encoding__byteLengthLatin1AsBase64(input: [*]const u8, len: usize) usize {
return byteLengthU8(input, len, .base64);
}
export fn Bun__encoding__byteLengthUTF16AsBase64(input: [*]const u16, len: usize) usize {
return byteLengthU16(input, len, .base64);
}
export fn Bun__encoding__byteLengthUTF16AsHex(input: [*]const u16, len: usize) usize {
return byteLengthU16(input, len, .hex);
}
export fn Bun__encoding__byteLengthUTF16AsURLSafeBase64(input: [*]const u16, len: usize) usize {
return byteLengthU16(input, len, .base64url);
}
export fn Bun__encoding__byteLengthUTF16AsUTF16(input: [*]const u16, len: usize) usize {
return byteLengthU16(input, len, .utf16le);
}
export fn Bun__encoding__byteLengthUTF16AsUTF8(input: [*]const u16, len: usize) usize {
return byteLengthU16(input, len, .utf8);
}
export fn Bun__encoding__byteLengthUTF16AsASCII(input: [*]const u8, len: usize) usize {
return byteLengthU8(input, len, .ascii);
}
export fn Bun__encoding__constructFromLatin1AsHex(globalObject: *JSGlobalObject, input: [*]const u8, len: usize) JSValue {
var slice = constructFromU8(input, len, .hex);
return JSC.JSValue.createBuffer(globalObject, slice, VirtualMachine.vm.allocator);
@@ -919,15 +956,37 @@ pub const Encoder = struct {
}
}
pub fn byteLengthU8(input: [*]const u8, len: usize, comptime encoding: JSC.Node.Encoding) usize {
if (len == 0)
return 0;
switch (comptime encoding) {
.utf8 => {
return strings.elementLengthLatin1IntoUTF8([]const u8, input[0..len]);
},
.latin1, JSC.Node.Encoding.ascii, JSC.Node.Encoding.buffer => {
return len;
},
JSC.Node.Encoding.ucs2, JSC.Node.Encoding.utf16le => {
return strings.elementLengthUTF8IntoUTF16([]const u8, input[0..len]) * 2;
},
JSC.Node.Encoding.hex => {
return len * 2;
},
JSC.Node.Encoding.base64, JSC.Node.Encoding.base64url => {
return bun.base64.encodeLen(input[0..len]);
},
// else => return &[_]u8{};
}
}
pub fn writeU16(input: [*]const u16, len: usize, to: [*]u8, to_len: usize, comptime encoding: JSC.Node.Encoding) i64 {
if (len == 0)
return 0;
// TODO: increase temporary buffer size for larger amounts of data
// defer {
// if (comptime encoding.isBinaryToText()) {}
// }
// if (comptime encoding.isBinaryToText()) {}
switch (comptime encoding) {
.utf8 => {
@@ -958,6 +1017,32 @@ pub const Encoder = struct {
}
}
/// Node returns imprecise byte length here
/// Should be fast enough for us to return precise length
pub fn byteLengthU16(input: [*]const u16, len: usize, comptime encoding: JSC.Node.Encoding) usize {
if (len == 0)
return 0;
switch (comptime encoding) {
// these should be the same size
.ascii, .latin1, .utf8 => {
return strings.elementLengthUTF16IntoUTF8([]const u16, input[0..len]);
},
JSC.Node.Encoding.ucs2, JSC.Node.Encoding.buffer, JSC.Node.Encoding.utf16le => {
return len * 2;
},
JSC.Node.Encoding.hex => {
return len;
},
JSC.Node.Encoding.base64, JSC.Node.Encoding.base64url => {
return bun.base64.encodeLen(input[0..len]);
},
// else => return &[_]u8{};
}
}
pub fn constructFromU8(input: [*]const u8, len: usize, comptime encoding: JSC.Node.Encoding) []u8 {
if (len == 0)
return &[_]u8{};
@@ -1105,6 +1190,19 @@ pub const Encoder = struct {
_ = Bun__encoding__writeLatin1AsASCII;
_ = Bun__encoding__writeUTF16AsASCII;
_ = Bun__encoding__byteLengthLatin1AsHex;
_ = Bun__encoding__byteLengthLatin1AsURLSafeBase64;
_ = Bun__encoding__byteLengthLatin1AsUTF16;
_ = Bun__encoding__byteLengthLatin1AsUTF8;
_ = Bun__encoding__byteLengthLatin1AsBase64;
_ = Bun__encoding__byteLengthUTF16AsBase64;
_ = Bun__encoding__byteLengthUTF16AsHex;
_ = Bun__encoding__byteLengthUTF16AsURLSafeBase64;
_ = Bun__encoding__byteLengthUTF16AsUTF16;
_ = Bun__encoding__byteLengthUTF16AsUTF8;
_ = Bun__encoding__byteLengthLatin1AsASCII;
_ = Bun__encoding__byteLengthUTF16AsASCII;
_ = Bun__encoding__toStringUTF16;
_ = Bun__encoding__toStringUTF8;
_ = Bun__encoding__toStringASCII;

View File

@@ -699,7 +699,7 @@ pub inline fn copyU8IntoU16(output_: []u16, input_: []const u8) void {
// https://zig.godbolt.org/z/9rTn1orcY
const group = if (Environment.isAarch64)
const group = comptime if (Environment.isAarch64)
// on ARM64, 128 seems to be the best choice judging by lines of ASM
128
else
@@ -945,12 +945,7 @@ pub fn toUTF8AllocWithType(allocator: std.mem.Allocator, comptime Type: type, ut
const replacement = utf16Codepoint(Type, utf16_remaining);
utf16_remaining = utf16_remaining[replacement.len..];
const count: usize = switch (replacement.code_point) {
0...0x7F => 1,
(0x7F + 1)...0x7FF => 2,
(0x7FF + 1)...0xFFFF => 3,
else => 4,
};
const count: usize = replacement.utf8Width();
try list.ensureUnusedCapacity(i + count);
list.items.len += i;
@@ -1009,6 +1004,15 @@ pub fn allocateLatin1IntoUTF8(allocator: std.mem.Allocator, comptime Type: type,
pub const UTF16Replacement = struct {
code_point: u32 = unicode_replacement,
len: u3 = 0,
pub inline fn utf8Width(replacement: UTF16Replacement) usize {
return switch (replacement.code_point) {
0...0x7F => 1,
(0x7F + 1)...0x7FF => 2,
(0x7FF + 1)...0xFFFF => 3,
else => 4,
};
}
};
// This variation matches WebKit behavior.
@@ -1138,6 +1142,36 @@ pub fn copyLatin1IntoUTF8(buf_: []u8, comptime Type: type, latin1_: Type) Encode
};
}
pub fn elementLengthLatin1IntoUTF8(comptime Type: type, latin1_: Type) usize {
var latin1 = latin1_;
var count: usize = 0;
while (latin1.len > 0) {
var read: usize = 0;
while (latin1.len > ascii_vector_size) {
const vec: AsciiVector = latin1[0..ascii_vector_size].*;
if (@reduce(.Max, vec) > 127) {
break;
}
latin1 = latin1[ascii_vector_size..];
count += ascii_vector_size;
}
while (read < latin1.len and latin1[read] < 0x80) : (read += 1) {}
count += read;
latin1 = latin1[read..];
if (latin1.len > 0) {
latin1 = latin1[1..];
count += 2;
}
}
return count;
}
const JSC = @import("javascript_core");
pub fn copyLatin1IntoUTF16(comptime Buffer: type, buf_: Buffer, comptime Type: type, latin1_: Type) EncodeIntoResult {
@@ -1161,6 +1195,28 @@ pub fn copyLatin1IntoUTF16(comptime Buffer: type, buf_: Buffer, comptime Type: t
};
}
pub fn elementLengthLatin1IntoUTF16(comptime Type: type, latin1_: Type) usize {
// latin1 is always at most 1 UTF-16 code unit long
if (comptime std.meta.Child(u16) == Type) {
return latin1_.len;
}
var count: usize = 0;
var latin1 = latin1_;
while (latin1.len > 0) {
const function = comptime if (std.meta.Child(Type) == u8) strings.firstNonASCIIWithType else strings.firstNonASCII16;
const to_write = function(Type, latin1) orelse @truncate(u32, latin1.len);
count += to_write;
latin1 = latin1[to_write..];
if (latin1.len > 0) {
count += comptime if (std.meta.Child(Type) == u8) 2 else 1;
latin1 = latin1[1..];
}
}
return count;
}
test "copyLatin1IntoUTF8" {
var input: string = "hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!";
var output = std.mem.zeroes([500]u8);
@@ -1213,13 +1269,7 @@ pub fn copyUTF16IntoUTF8(buf: []u8, comptime Type: type, utf16: Type) EncodeInto
const replacement = utf16Codepoint(Type, utf16_remaining);
const width: usize = switch (replacement.code_point) {
0...0x7F => 1,
(0x7F + 1)...0x7FF => 2,
(0x7FF + 1)...0xFFFF => 3,
else => 4,
};
const width: usize = replacement.utf8Width();
if (width > remaining.len) {
ended_on_non_ascii = width > 1;
break;
@@ -1243,6 +1293,42 @@ pub fn copyUTF16IntoUTF8(buf: []u8, comptime Type: type, utf16: Type) EncodeInto
};
}
pub fn elementLengthUTF16IntoUTF8(comptime Type: type, utf16: Type) usize {
var utf16_remaining = utf16;
var count: usize = 0;
while (firstNonASCII16(Type, utf16_remaining)) |i| {
count += i;
utf16_remaining = utf16_remaining[i..];
const replacement = utf16Codepoint(Type, utf16_remaining);
count += replacement.utf8Width();
utf16_remaining = utf16_remaining[replacement.len..];
}
return count + utf16_remaining.len;
}
pub fn elementLengthUTF8IntoUTF16(comptime Type: type, utf8: Type) usize {
var utf8_remaining = utf8;
var count: usize = 0;
while (firstNonASCII(utf8_remaining)) |i| {
count += i;
utf8_remaining = utf8_remaining[i..];
const replacement = utf16Codepoint(Type, utf8_remaining);
count += replacement.len;
utf8_remaining = utf8_remaining[@minimum(replacement.utf8Width(), utf8_remaining.len)..];
}
return count + utf8_remaining.len;
}
// Check utf16 string equals utf8 string without allocating extra memory
pub fn utf16EqlString(text: []const u16, str: string) bool {
if (text.len > str.len) {
@@ -1462,6 +1548,10 @@ pub inline fn u16Len(supplementary: anytype) u2 {
}
pub fn firstNonASCII(slice: []const u8) ?u32 {
return firstNonASCIIWithType([]const u8, slice);
}
pub fn firstNonASCIIWithType(comptime Type: type, slice: Type) ?u32 {
var remaining = slice;
if (comptime Environment.isAarch64 or Environment.isX64) {