Compare commits

...

1 Commits

Author SHA1 Message Date
Jarred Sumner
84a9fac315 experiment: add a direct SIMDJSON utf8 version
benchmark                                   time (avg)             (min … max)       p75       p99      p995
------------------------------------------------------------------------------ -----------------------------
• small object
------------------------------------------------------------------------------ -----------------------------
JSON.parse                              656.38 ns/iter (621.56 ns … 788.83 ns) 665.85 ns 788.83 ns 788.83 ns
JSON.parse (SIMDJSON on-demand buffer)  743.61 ns/iter  (720.6 ns … 833.83 ns) 745.12 ns 833.83 ns 833.83 ns

summary for small object
  JSON.parse
   1.13x faster than JSON.parse (SIMDJSON on-demand buffer)

• Array(4096) of true
------------------------------------------------------------------------------ -----------------------------
JSON.parse                               45.42 µs/iter    (42.79 µs … 1.91 ms)  44.79 µs  52.71 µs  56.54 µs
JSON.parse (SIMDJSON on-demand buffer)   38.65 µs/iter    (35.33 µs … 1.44 ms)  38.58 µs  45.38 µs  50.17 µs

summary for Array(4096) of true
  JSON.parse (SIMDJSON on-demand buffer)
   1.18x faster than JSON.parse

• Array(4096) of 1234.567
------------------------------------------------------------------------------ -----------------------------
JSON.parse                              100.79 µs/iter  (96.42 µs … 962.79 µs) 100.08 µs  111.5 µs 115.38 µs
JSON.parse (SIMDJSON on-demand buffer)   62.12 µs/iter  (58.13 µs … 751.96 µs)  62.75 µs  71.21 µs  75.96 µs

summary for Array(4096) of 1234.567
  JSON.parse (SIMDJSON on-demand buffer)
   1.62x faster than JSON.parse

• Array(4096) of 'hello'
------------------------------------------------------------------------------ -----------------------------
JSON.parse                              142.44 µs/iter   (132.75 µs … 1.38 ms) 141.33 µs 159.42 µs 169.54 µs
JSON.parse (SIMDJSON on-demand buffer)  196.67 µs/iter    (130.54 µs … 1.9 ms)  203.5 µs  234.5 µs 407.46 µs

summary for Array(4096) of 'hello'
  JSON.parse
   1.38x faster than JSON.parse (SIMDJSON on-demand buffer)

• Array(4096) of 'hello'.repeat(1024)
------------------------------------------------------------------------------ -----------------------------
JSON.parse                                 9.8 ms/iter    (9.07 ms … 11.26 ms)  10.19 ms  11.26 ms  11.26 ms
JSON.parse (SIMDJSON on-demand buffer)    6.39 ms/iter         (5.9 ms … 9 ms)   6.74 ms      9 ms      9 ms

summary for Array(4096) of 'hello'.repeat(1024)
  JSON.parse (SIMDJSON on-demand buffer)
   1.53x faster than JSON.parse

• Array(4096) of {a: 123, b: 456}
------------------------------------------------------------------------------ -----------------------------
JSON.parse                              310.68 µs/iter   (297.96 µs … 1.14 ms) 308.25 µs 386.33 µs 752.25 µs
JSON.parse (SIMDJSON on-demand buffer)  413.16 µs/iter   (398.67 µs … 1.13 ms) 411.88 µs 474.38 µs 717.29 µs

summary for Array(4096) of {a: 123, b: 456}
  JSON.parse
   1.33x faster than JSON.parse (SIMDJSON on-demand buffer)

Benchmark:

```js
import { bench, group, run } from "mitata";

function load(obj) {
  const asStr = JSON.stringify(obj);
  const buffer = Buffer.from(asStr);

  bench("JSON.parse", () => {
    return JSON.parse(asStr);
  });

  bench("JSON.parse (SIMDJSON on-demand buffer)", () => {
    return buffer.json();
  });
}

group("small object", () => {
  var obj = {
    a: 1,
    b: 2,
    c: null,
    false: false,
    true: true,
    null: null,
    foo: "bar",
    arr: [1, 2, 3],
    h: {
      a: 1,
    },
    i: {
      a: 1,
    },
    j: {},
    // 100 more keys
    k: {},
  };
  load(obj);
});

group("Array(4096) of true", () => {
  var obj = Array(4096);
  obj.length = 4096;
  obj.fill(true);
  load(obj);
});

group("Array(4096) of 1234.567", () => {
  var obj = Array(4096);
  obj.length = 4096;
  obj.fill(1234.567);
  load(obj);
});

group("Array(4096) of 'hello'", () => {
  var obj = Array(4096);
  obj.length = 4096;
  obj.fill("hello");
  load(obj);
});

group("Array(4096) of 'hello'.repeat(1024)", () => {
  var obj = Array(4096);
  obj.length = 4096;
  obj.fill("hello".repeat(1024));
  load(obj);
});

group("Array(4096) of {a: 123, b: 456}", () => {
  var obj = Array(4096);
  obj.length = 4096;
  obj.fill({ a: 123, b: 456 });
  load(obj);
});

run();
```
2023-04-05 23:54:56 -07:00
3 changed files with 48874 additions and 1 deletions

View File

@@ -56,6 +56,46 @@
// #include "JavaScriptCore/JSTypedArrayViewPrototype.h"
#include "JavaScriptCore/JSArrayBufferViewInlines.h"
#include "simdjson.h"
#include <JavaScriptCore/JSONAtomStringCache.h>
#include "simdutf.h"
#include "JavaScriptCore/SmallStrings.h"
#include "JavaScriptCore/VM.h"
namespace JSC {
template<typename CharacterType>
ALWAYS_INLINE Ref<AtomStringImpl> JSONAtomStringCache::make(Type type, const CharacterType* characters, unsigned length)
{
if (!length)
return *static_cast<AtomStringImpl*>(StringImpl::empty());
auto firstCharacter = characters[0];
if (length == 1) {
if (firstCharacter <= maxSingleCharacterString)
return vm().smallStrings.singleCharacterStringRep(firstCharacter);
} else if (length > maxStringLengthForCache)
return AtomStringImpl::add(characters, length).releaseNonNull();
auto lastCharacter = characters[length - 1];
auto& slot = cacheSlot(type, firstCharacter, lastCharacter, length);
if (!equal(slot.get(), characters, length)) {
auto result = AtomStringImpl::add(characters, length);
slot = result;
return result.releaseNonNull();
}
return *slot;
}
ALWAYS_INLINE VM& JSONAtomStringCache::vm() const
{
return *bitwise_cast<VM*>(bitwise_cast<uintptr_t>(this) - OBJECT_OFFSETOF(VM, jsonAtomStringCache));
}
} // namespace JSC
JSC_DECLARE_HOST_FUNCTION(constructJSBuffer);
static JSC_DECLARE_HOST_FUNCTION(jsBufferConstructorFunction_alloc);
@@ -82,6 +122,406 @@ static JSC_DECLARE_HOST_FUNCTION(jsBufferPrototypeFunction_swap64);
static JSC_DECLARE_HOST_FUNCTION(jsBufferPrototypeFunction_toString);
static JSC_DECLARE_HOST_FUNCTION(jsBufferPrototypeFunction_write);
using namespace simdjson;
static JSValue recursiveParseJSON(JSC::JSGlobalObject* globalObject, bool& stop, simdjson::ondemand::value element)
{
simdjson::ondemand::json_type type;
if (UNLIKELY(element.type().get(type))) {
stop = true;
return JSC::jsUndefined();
}
switch (type) {
case simdjson::ondemand::json_type::array: {
JSC::JSArray* array = JSC::JSArray::tryCreate(globalObject->vm(), globalObject->arrayStructureForIndexingTypeDuringAllocation(JSC::ArrayWithUndecided));
for (auto child : element.get_array()) {
simdjson::ondemand::value current;
if (UNLIKELY(child.get(current))) {
stop = true;
return JSC::jsUndefined();
}
array->push(globalObject, recursiveParseJSON(globalObject, stop, current));
if (UNLIKELY(stop)) {
return JSC::jsUndefined();
}
}
return array;
}
case simdjson::ondemand::json_type::object: {
JSC::JSObject* object = JSC::constructEmptyObject(globalObject);
simdjson::ondemand::object elementObject;
if (UNLIKELY(element.get_object().get(elementObject))) {
stop = true;
return JSC::jsUndefined();
}
for (auto field : elementObject) {
simdjson::ondemand::value current;
std::string_view keyView;
if (UNLIKELY(field.unescaped_key(true).get(keyView))) {
stop = true;
return JSC::jsUndefined();
}
if (UNLIKELY(field.value().get(current))) {
stop = true;
return JSC::jsUndefined();
}
JSC::Identifier identifier;
if (simdutf::validate_ascii(keyView.data(), keyView.length())) {
identifier = JSC::Identifier::fromString(globalObject->vm(), globalObject->vm().jsonAtomStringCache.makeIdentifier(keyView.data(), keyView.length()));
} else {
identifier = JSC::Identifier::fromString(globalObject->vm(), WTF::String::fromUTF8(keyView.data(), keyView.length()));
}
object->putDirect(
globalObject->vm(),
identifier,
recursiveParseJSON(globalObject, stop, current));
if (UNLIKELY(stop)) {
return JSC::jsUndefined();
}
}
return object;
}
case simdjson::ondemand::json_type::number: {
double val;
if (UNLIKELY(element.get(val))) {
stop = true;
return JSC::jsUndefined();
}
return jsNumber(val);
}
case simdjson::ondemand::json_type::string: {
std::string_view str;
if (UNLIKELY(element.get(str))) {
stop = true;
return JSC::jsUndefined();
}
return JSC::jsString(globalObject->vm(), WTF::String::fromUTF8(str.data(), str.length()));
}
case simdjson::ondemand::json_type::boolean: {
bool val;
if (UNLIKELY(element.get(val))) {
stop = true;
return JSC::jsUndefined();
}
return jsBoolean(val);
}
case simdjson::ondemand::json_type::null: {
bool val;
if (UNLIKELY(element.is_null().get(val) || !val)) {
stop = true;
return JSC::jsUndefined();
}
return JSC::jsNull();
}
}
}
static JSValue recursiveParseJSONDOM(JSC::JSGlobalObject* globalObject, bool& stop, simdjson::dom::element element)
{
simdjson::dom::element_type type;
switch (element.type()) {
case simdjson::dom::element_type::ARRAY: {
JSC::JSArray* array = JSC::JSArray::tryCreate(globalObject->vm(), globalObject->arrayStructureForIndexingTypeDuringAllocation(JSC::ArrayWithUndecided));
simdjson::dom::array elementArray;
if (UNLIKELY(element.get_array().get(elementArray))) {
stop = true;
return JSC::jsUndefined();
}
for (auto child : elementArray) {
simdjson::dom::element current;
if (UNLIKELY(child.get(current))) {
stop = true;
return JSC::jsUndefined();
}
array->push(globalObject, recursiveParseJSONDOM(globalObject, stop, current));
if (UNLIKELY(stop)) {
return JSC::jsUndefined();
}
}
return array;
}
case simdjson::dom::element_type::OBJECT: {
JSC::JSObject* object = JSC::constructEmptyObject(globalObject);
simdjson::dom::object elementObject;
if (UNLIKELY(element.get_object().get(elementObject))) {
stop = true;
return JSC::jsUndefined();
}
for (auto field : elementObject) {
simdjson::dom::element current = field.value;
std::string_view keyView = field.key;
object->putDirect(
globalObject->vm(),
JSC::Identifier::fromString(globalObject->vm(), WTF::String::fromUTF8(keyView.data(), keyView.length())),
recursiveParseJSONDOM(globalObject, stop, current));
if (UNLIKELY(stop)) {
return JSC::jsUndefined();
}
}
return object;
}
case simdjson::dom::element_type::INT64:
case simdjson::dom::element_type::UINT64:
case simdjson::dom::element_type::DOUBLE: {
double val;
if (UNLIKELY(element.get(val))) {
stop = true;
return JSC::jsUndefined();
}
return jsNumber(val);
}
case simdjson::dom::element_type::STRING: {
std::string_view str;
if (UNLIKELY(element.get(str))) {
stop = true;
return JSC::jsUndefined();
}
return JSC::jsString(globalObject->vm(), WTF::String::fromUTF8(str.data(), str.length()));
}
case simdjson::dom::element_type::BOOL: {
bool val;
if (UNLIKELY(element.get(val))) {
stop = true;
return JSC::jsUndefined();
}
return jsBoolean(val);
}
case simdjson::dom::element_type::NULL_VALUE: {
bool val;
if (UNLIKELY(element.get(val) || !val)) {
stop = true;
return JSC::jsUndefined();
}
return JSC::jsNull();
}
}
}
static JSValue parseJSONOnDemand(JSC::JSGlobalObject* globalObject, const char* ptr, size_t byteLength, size_t allocatedLength)
{
JSC::VM& vm = JSC::getVM(globalObject);
auto throwScope = DECLARE_THROW_SCOPE(vm);
simdjson::ondemand::document document;
static simdjson::ondemand::parser parser = simdjson::ondemand::parser();
bool stop = false;
auto err = parser.iterate(ptr, byteLength, allocatedLength).get(document);
if (err != simdjson::SUCCESS) {
throwException(globalObject, throwScope, JSC::createSyntaxError(globalObject, "Invalid JSON"_s));
return JSC::jsNull();
}
simdjson::ondemand::value value;
err = document.get_value().get(value);
if (err == simdjson::error_code::SCALAR_DOCUMENT_AS_VALUE) {
simdjson::ondemand::json_type type;
if (!document.type().get(type)) {
switch (type) {
case simdjson::ondemand::json_type::number: {
double val;
if (UNLIKELY(document.get_double().get(val))) {
throwException(globalObject, throwScope, JSC::createSyntaxError(globalObject, "Invalid JSON"_s));
return JSC::jsNull();
}
return jsNumber(val);
}
case simdjson::ondemand::json_type::string: {
std::string_view str;
if (UNLIKELY(document.get_string().get(str))) {
throwException(globalObject, throwScope, JSC::createSyntaxError(globalObject, "Invalid JSON"_s));
return JSC::jsNull();
}
return JSC::jsString(globalObject->vm(), WTF::String::fromUTF8(str.data(), str.length()));
}
case simdjson::ondemand::json_type::boolean: {
bool val;
if (UNLIKELY(document.get_bool().get(val))) {
throwException(globalObject, throwScope, JSC::createSyntaxError(globalObject, "Invalid JSON"_s));
return JSC::jsNull();
}
return jsBoolean(val);
}
case simdjson::ondemand::json_type::null: {
bool val;
if (UNLIKELY(document.is_null().get(val) || !val)) {
throwException(globalObject, throwScope, JSC::createSyntaxError(globalObject, "Invalid JSON"_s));
return JSC::jsNull();
}
return JSC::jsNull();
}
}
}
}
// if (err != simdjson::error_code::SUCCESS) {
// throwException(globalObject, throwScope, JSC::createSyntaxError(globalObject, makeString("Invalid JSON due to error"_s, err)));
// return JSC::jsNull();
// }
auto result = recursiveParseJSON(globalObject, stop, value);
if (stop) {
throwException(globalObject, throwScope, JSC::createSyntaxError(globalObject, "Invalid JSON"_s));
return JSC::jsNull();
}
return result;
}
static JSValue parseJSON(JSC::JSGlobalObject* globalObject, const char* ptr, size_t byteLength, size_t allocatedLength)
{
JSC::VM& vm = JSC::getVM(globalObject);
auto throwScope = DECLARE_THROW_SCOPE(vm);
dom::parser parser;
dom::element document;
if (parser.parse(ptr, byteLength, allocatedLength).get(document)) {
throwException(globalObject, throwScope, JSC::createSyntaxError(globalObject, "Invalid JSON"_s));
return JSC::jsNull();
}
// if (err == simdjson::error_code::SCALAR_DOCUMENT_AS_VALUE) {
// simdjson::ondemand::json_type type;
// if (!document.type().get(type)) {
// switch (type) {
// case simdjson::ondemand::json_type::number: {
// double val;
// if (UNLIKELY(document.get_double().get(val))) {
// throwException(globalObject, throwScope, JSC::createSyntaxError(globalObject, "Invalid JSON"_s));
// return JSC::jsNull();
// }
// return jsNumber(val);
// }
// case simdjson::ondemand::json_type::string: {
// std::string_view str;
// if (UNLIKELY(document.get_string().get(str))) {
// throwException(globalObject, throwScope, JSC::createSyntaxError(globalObject, "Invalid JSON"_s));
// return JSC::jsNull();
// }
// return JSC::jsString(globalObject->vm(), WTF::String::fromUTF8(str.data(), str.length()));
// }
// case simdjson::ondemand::json_type::boolean: {
// bool val;
// if (UNLIKELY(document.get_bool().get(val))) {
// throwException(globalObject, throwScope, JSC::createSyntaxError(globalObject, "Invalid JSON"_s));
// return JSC::jsNull();
// }
// return jsBoolean(val);
// }
// case simdjson::ondemand::json_type::null: {
// bool val;
// if (UNLIKELY(document.is_null().get(val) || !val)) {
// throwException(globalObject, throwScope, JSC::createSyntaxError(globalObject, "Invalid JSON"_s));
// return JSC::jsNull();
// }
// return JSC::jsNull();
// }
// }
// }
// }
// if (err != simdjson::error_code::SUCCESS) {
// throwException(globalObject, throwScope, JSC::createSyntaxError(globalObject, makeString("Invalid JSON due to error"_s, err)));
// return JSC::jsNull();
// }
bool stop = false;
auto result = recursiveParseJSONDOM(globalObject, stop, document);
if (stop) {
throwException(globalObject, throwScope, JSC::createSyntaxError(globalObject, "Invalid JSON"_s));
return JSC::jsNull();
}
return result;
}
static JSValue parseJSONWithInsufficientPadding(JSC::JSGlobalObject* globalObject, const void* data, size_t byteLength)
{
JSC::VM& vm = JSC::getVM(globalObject);
if (!byteLength)
return JSC::jsNull();
void* ptr = malloc(byteLength + simdjson::SIMDJSON_PADDING);
if (!ptr) {
auto throwScope = DECLARE_THROW_SCOPE(vm);
throwOutOfMemoryError(globalObject, throwScope);
return JSC::jsNull();
}
memcpy(ptr, data, byteLength);
JSValue result = parseJSONOnDemand(globalObject, reinterpret_cast<const char*>(ptr), byteLength, byteLength + simdjson::SIMDJSON_PADDING);
free(ptr);
return result;
}
JSC_DEFINE_HOST_FUNCTION(jsBufferPrototypeJSONParseFunction, (JSC::JSGlobalObject * globalObject, JSC::CallFrame* callframe))
{
JSC::VM& vm = JSC::getVM(globalObject);
auto throwScope = DECLARE_THROW_SCOPE(vm);
auto thisValue = callframe->thisValue();
if (UNLIKELY(throwScope.exception()) || !thisValue.isObject())
return JSValue::encode(JSC::jsUndefined());
auto* thisObject = thisValue.getObject();
if (!thisObject || !thisValue.inherits<JSUint8Array>())
return JSValue::encode(JSC::jsUndefined());
auto* buffer = jsCast<JSUint8Array*>(thisObject);
JSC::EnsureStillAliveScope ensureStillAliveScope(buffer);
auto res = parseJSONWithInsufficientPadding(globalObject, buffer->vector(), buffer->byteLength());
RETURN_IF_EXCEPTION(throwScope, JSValue::encode(JSC::jsUndefined()));
return JSValue::encode(res);
}
static JSUint8Array* allocBuffer(JSC::JSGlobalObject* lexicalGlobalObject, size_t byteLength)
{
JSC::VM& vm = JSC::getVM(lexicalGlobalObject);
@@ -1902,6 +2342,8 @@ static const HashTableValue JSBufferPrototypeTableValues[]
{ "writeUint32BE"_s, static_cast<unsigned>(JSC::PropertyAttribute::Builtin), NoIntrinsic, { HashTableValue::BuiltinGeneratorType, jsBufferPrototypeWriteUInt32BECodeGenerator, 1 } },
{ "writeUint32LE"_s, static_cast<unsigned>(JSC::PropertyAttribute::Builtin), NoIntrinsic, { HashTableValue::BuiltinGeneratorType, jsBufferPrototypeWriteUInt32LECodeGenerator, 1 } },
{ "writeUint8"_s, static_cast<unsigned>(JSC::PropertyAttribute::Builtin), NoIntrinsic, { HashTableValue::BuiltinGeneratorType, jsBufferPrototypeWriteUInt8CodeGenerator, 1 } },
{ "json"_s, static_cast<unsigned>(JSC::PropertyAttribute::Function), NoIntrinsic, { HashTableValue::NativeFunctionType, jsBufferPrototypeJSONParseFunction, 0 } },
};
void JSBufferPrototype::finishCreation(VM& vm, JSC::JSGlobalObject* globalThis)
@@ -2173,4 +2615,4 @@ bool JSBuffer__isBuffer(JSC::JSGlobalObject* lexicalGlobalObject, JSC::EncodedJS
JSValue prototype = cell->getPrototype(vm, lexicalGlobalObject);
return prototype.inherits<JSBufferPrototype>();
}
}

File diff suppressed because it is too large Load Diff

31846
src/bun.js/bindings/simdjson.h Normal file

File diff suppressed because it is too large Load Diff