mirror of
https://github.com/oven-sh/bun
synced 2026-02-02 15:08:46 +00:00
Faster UTF16 -> UTF8 and UTF8 -> UTF16 (#1552)
* Fix freezing test * Add SIMDUTF * More micro bench snippets * Update .gitattributes * Update .gitattributes Co-authored-by: Jarred Sumner <709451+Jarred-Sumner@users.noreply.github.com>
This commit is contained in:
2
.gitattributes
vendored
2
.gitattributes
vendored
@@ -6,3 +6,5 @@ src/bun.js/bindings/sqlite/sqlite3.c linguist-vendored
|
||||
src/bun.js/bindings/sqlite/sqlite3_local.h linguist-vendored
|
||||
*.lockb binary diff=lockb
|
||||
*.zig text eol=lf
|
||||
src/bun.js/bindings/simdutf.cpp linguist-vendored
|
||||
src/bun.js/bindings/simdutf.h linguist-vendored
|
||||
|
||||
30
bench/snippets/arraybuffersink.mjs
Normal file
30
bench/snippets/arraybuffersink.mjs
Normal file
@@ -0,0 +1,30 @@
|
||||
import { ArrayBufferSink } from "bun";
|
||||
import { bench, run } from "mitata";
|
||||
|
||||
var short = "Hello World!";
|
||||
var shortUTF16 = "Hello World 💕💕💕";
|
||||
var long = "Hello World!".repeat(1024);
|
||||
var longUTF16 = "Hello World 💕💕💕".repeat(1024);
|
||||
var encoder = new ArrayBufferSink({ stream: true, highWaterMark: 512 });
|
||||
|
||||
bench(`${short.length} ascii`, () => {
|
||||
encoder.write(short);
|
||||
encoder.start();
|
||||
});
|
||||
|
||||
bench(`${short.length} utf8`, () => {
|
||||
encoder.write(shortUTF16);
|
||||
encoder.start();
|
||||
});
|
||||
|
||||
bench(`${long.length} ascii`, () => {
|
||||
encoder.write(long);
|
||||
encoder.start();
|
||||
});
|
||||
|
||||
bench(`${longUTF16.length} utf8`, () => {
|
||||
encoder.write(longUTF16);
|
||||
encoder.start();
|
||||
});
|
||||
|
||||
await run();
|
||||
45
bench/snippets/read-file.mjs
Normal file
45
bench/snippets/read-file.mjs
Normal file
@@ -0,0 +1,45 @@
|
||||
import { readFileSync, writeFileSync } from "node:fs";
|
||||
import { bench, run } from "mitata";
|
||||
|
||||
var short = (function () {
|
||||
const text = "Hello World!";
|
||||
const path = "/tmp/bun-bench-short.text";
|
||||
writeFileSync(path, text, "utf8");
|
||||
return { path, length: text.length };
|
||||
})();
|
||||
var shortUTF16 = (function () {
|
||||
const text = "Hello World 💕💕💕";
|
||||
const path = "/tmp/bun-bench-shortUTF16.text";
|
||||
writeFileSync(path, text, "utf8");
|
||||
return { path, length: text.length };
|
||||
})();
|
||||
var long = (function () {
|
||||
const text = "Hello World!".repeat(1024);
|
||||
const path = "/tmp/bun-bench-long.text";
|
||||
writeFileSync(path, text, "utf8");
|
||||
return { path, length: text.length };
|
||||
})();
|
||||
var longUTF16 = (function () {
|
||||
const text = "Hello World 💕💕💕".repeat(1024);
|
||||
const path = "/tmp/bun-bench-longUTF16.text";
|
||||
writeFileSync(path, text, "utf8");
|
||||
return { path, length: text.length };
|
||||
})();
|
||||
|
||||
bench(`${short.length} ascii`, () => {
|
||||
readFileSync(short.path, "utf-8");
|
||||
});
|
||||
|
||||
bench(`${short.length} utf8`, () => {
|
||||
readFileSync(shortUTF16.path, "utf-8");
|
||||
});
|
||||
|
||||
bench(`${long.length} ascii`, () => {
|
||||
readFileSync(long.path, "utf-8");
|
||||
});
|
||||
|
||||
bench(`${longUTF16.length} utf8`, () => {
|
||||
readFileSync(longUTF16.path, "utf-8");
|
||||
});
|
||||
|
||||
await run();
|
||||
49
bench/snippets/text-decoder.mjs
Normal file
49
bench/snippets/text-decoder.mjs
Normal file
@@ -0,0 +1,49 @@
|
||||
import { bench, run } from "../node_modules/mitata/src/cli.mjs";
|
||||
|
||||
var short = new TextEncoder().encode("Hello World!");
|
||||
var shortUTF16 = new TextEncoder().encode("Hello World 💕💕💕");
|
||||
var long = new TextEncoder().encode("Hello World!".repeat(1024));
|
||||
var longUTF16 = new TextEncoder().encode("Hello World 💕💕💕".repeat(1024));
|
||||
bench(`${short.length} ascii`, () => {
|
||||
var decoder = new TextDecoder();
|
||||
decoder.decode(short);
|
||||
});
|
||||
|
||||
bench(`${short.length} utf8`, () => {
|
||||
var decoder = new TextDecoder();
|
||||
decoder.decode(shortUTF16);
|
||||
});
|
||||
|
||||
bench(`${long.length} ascii`, () => {
|
||||
var decoder = new TextDecoder();
|
||||
decoder.decode(long);
|
||||
});
|
||||
|
||||
bench(`${longUTF16.length} utf8`, () => {
|
||||
var decoder = new TextDecoder();
|
||||
decoder.decode(longUTF16);
|
||||
});
|
||||
|
||||
if ("Buffer" in globalThis) {
|
||||
const buffer_short = Buffer.from(short);
|
||||
bench(`Buffer ${buffer_short.length} ascii`, () => {
|
||||
buffer_short.toString("ascii");
|
||||
});
|
||||
|
||||
const buffer_shortUTF16 = Buffer.from(short);
|
||||
bench(`Buffer ${buffer_shortUTF16.length} utf8`, () => {
|
||||
buffer_shortUTF16.toString("utf8");
|
||||
});
|
||||
|
||||
const buffer_long = Buffer.from(long);
|
||||
bench(`Buffer ${buffer_long.length} ascii`, () => {
|
||||
buffer_long.toString("ascii");
|
||||
});
|
||||
|
||||
const buffer_longUTF16 = Buffer.from(longUTF16);
|
||||
bench(`Buffer ${buffer_longUTF16.length} utf8`, () => {
|
||||
buffer_longUTF16.toString("utf8");
|
||||
});
|
||||
}
|
||||
|
||||
await run();
|
||||
33
bench/snippets/text-encoder.mjs
Normal file
33
bench/snippets/text-encoder.mjs
Normal file
@@ -0,0 +1,33 @@
|
||||
import { bench, run } from "mitata";
|
||||
|
||||
var short = "Hello World!";
|
||||
var shortUTF16 = "Hello World 💕💕💕";
|
||||
var long = "Hello World!".repeat(1024);
|
||||
var longUTF16 = "Hello World 💕💕💕".repeat(1024);
|
||||
var encoder = new TextEncoder();
|
||||
|
||||
bench(`4 ascii`, () => {
|
||||
encoder.encode("heyo");
|
||||
});
|
||||
|
||||
bench(`4 utf8`, () => {
|
||||
encoder.encode("💕💕");
|
||||
});
|
||||
|
||||
bench(`${short.length} ascii`, () => {
|
||||
encoder.encode(short);
|
||||
});
|
||||
|
||||
bench(`${short.length} utf8`, () => {
|
||||
encoder.encode(shortUTF16);
|
||||
});
|
||||
|
||||
bench(`${long.length} ascii`, () => {
|
||||
encoder.encode(long);
|
||||
});
|
||||
|
||||
bench(`${longUTF16.length} utf8`, () => {
|
||||
encoder.encode(longUTF16);
|
||||
});
|
||||
|
||||
await run();
|
||||
25
bench/snippets/write-file.mjs
Normal file
25
bench/snippets/write-file.mjs
Normal file
@@ -0,0 +1,25 @@
|
||||
import { readFileSync, writeFileSync } from "node:fs";
|
||||
import { bench, run } from "mitata";
|
||||
|
||||
var short = "Hello World!";
|
||||
var shortUTF16 = "Hello World 💕💕💕";
|
||||
var long = "Hello World!".repeat(1024);
|
||||
var longUTF16 = "Hello World 💕💕💕".repeat(1024);
|
||||
|
||||
bench(`${short.length} ascii`, () => {
|
||||
writeFileSync("/tmp/bun.bench-out.txt", short);
|
||||
});
|
||||
|
||||
bench(`${short.length} utf8`, () => {
|
||||
writeFileSync("/tmp/bun.bench-out.txt", shortUTF16);
|
||||
});
|
||||
|
||||
bench(`${long.length} ascii`, () => {
|
||||
writeFileSync("/tmp/bun.bench-out.txt", long);
|
||||
});
|
||||
|
||||
bench(`${longUTF16.length} utf8`, () => {
|
||||
writeFileSync("/tmp/bun.bench-out.txt", longUTF16);
|
||||
});
|
||||
|
||||
await run();
|
||||
327
src/bun.js/bindings/bun-simdutf.cpp
Normal file
327
src/bun.js/bindings/bun-simdutf.cpp
Normal file
@@ -0,0 +1,327 @@
|
||||
#include "simdutf.h"
|
||||
|
||||
typedef struct SIMDUTFResult {
|
||||
int error;
|
||||
size_t count;
|
||||
} SIMDUTFResult;
|
||||
|
||||
extern "C" {
|
||||
|
||||
int simdutf__detect_encodings(const char* input, size_t length)
|
||||
{
|
||||
return simdutf::detect_encodings(input, length);
|
||||
}
|
||||
|
||||
bool simdutf__validate_utf8(const char* buf, size_t len)
|
||||
{
|
||||
return simdutf::validate_utf8(buf, len);
|
||||
}
|
||||
|
||||
SIMDUTFResult simdutf__validate_utf8_with_errors(const char* buf, size_t len)
|
||||
{
|
||||
auto res = simdutf::validate_utf8_with_errors(buf, len);
|
||||
return { res.error, res.count };
|
||||
}
|
||||
|
||||
bool simdutf__validate_ascii(const char* buf, size_t len)
|
||||
{
|
||||
return simdutf::validate_ascii(buf, len);
|
||||
}
|
||||
|
||||
SIMDUTFResult simdutf__validate_ascii_with_errors(const char* buf, size_t len)
|
||||
{
|
||||
auto res = simdutf::validate_ascii_with_errors(buf, len);
|
||||
return { res.error, res.count };
|
||||
}
|
||||
|
||||
bool simdutf__validate_utf16le(const char16_t* buf, size_t len)
|
||||
{
|
||||
return simdutf::validate_utf16le(buf, len);
|
||||
}
|
||||
|
||||
bool simdutf__validate_utf16be(const char16_t* buf, size_t len)
|
||||
{
|
||||
return simdutf::validate_utf16be(buf, len);
|
||||
}
|
||||
|
||||
SIMDUTFResult simdutf__validate_utf16le_with_errors(const char16_t* buf,
|
||||
size_t len)
|
||||
{
|
||||
auto res = simdutf::validate_utf16le_with_errors(buf, len);
|
||||
return { res.error, res.count };
|
||||
}
|
||||
|
||||
SIMDUTFResult simdutf__validate_utf16be_with_errors(const char16_t* buf,
|
||||
size_t len)
|
||||
{
|
||||
auto res = simdutf::validate_utf16be_with_errors(buf, len);
|
||||
return { res.error, res.count };
|
||||
}
|
||||
|
||||
bool simdutf__validate_utf32(const char32_t* buf, size_t len)
|
||||
{
|
||||
return simdutf::validate_utf32(buf, len);
|
||||
}
|
||||
|
||||
SIMDUTFResult simdutf__validate_utf32_with_errors(const char32_t* buf,
|
||||
size_t len)
|
||||
{
|
||||
auto res = simdutf::validate_utf32_with_errors(buf, len);
|
||||
return { res.error, res.count };
|
||||
}
|
||||
|
||||
size_t simdutf__convert_utf8_to_utf16le(const char* buf, size_t len,
|
||||
char16_t* utf16_output)
|
||||
{
|
||||
return simdutf::convert_utf8_to_utf16le(buf, len, utf16_output);
|
||||
}
|
||||
|
||||
size_t simdutf__convert_utf8_to_utf16be(const char* buf, size_t len,
|
||||
char16_t* utf16_output)
|
||||
{
|
||||
return simdutf::convert_utf8_to_utf16be(buf, len, utf16_output);
|
||||
}
|
||||
SIMDUTFResult
|
||||
simdutf__convert_utf8_to_utf16le_with_errors(const char* buf, size_t len,
|
||||
char16_t* utf16_output)
|
||||
{
|
||||
auto res = simdutf::convert_utf8_to_utf16le_with_errors(buf, len, utf16_output);
|
||||
return { res.error, res.count };
|
||||
}
|
||||
|
||||
SIMDUTFResult
|
||||
simdutf__convert_utf8_to_utf16be_with_errors(const char* buf, size_t len,
|
||||
char16_t* utf16_output)
|
||||
{
|
||||
auto res = simdutf::convert_utf8_to_utf16be_with_errors(buf, len, utf16_output);
|
||||
return { res.error, res.count };
|
||||
}
|
||||
size_t simdutf__convert_valid_utf8_to_utf16le(const char* buf, size_t len,
|
||||
char16_t* utf16_buffer)
|
||||
{
|
||||
return simdutf::convert_valid_utf8_to_utf16le(buf, len, utf16_buffer);
|
||||
}
|
||||
|
||||
size_t simdutf__convert_valid_utf8_to_utf16be(const char* buf, size_t len,
|
||||
char16_t* utf16_buffer)
|
||||
{
|
||||
return simdutf::convert_valid_utf8_to_utf16be(buf, len, utf16_buffer);
|
||||
}
|
||||
|
||||
size_t simdutf__convert_utf8_to_utf32(const char* buf, size_t len,
|
||||
char32_t* utf32_output)
|
||||
{
|
||||
return simdutf::convert_utf8_to_utf32(buf, len, utf32_output);
|
||||
}
|
||||
SIMDUTFResult
|
||||
simdutf__convert_utf8_to_utf32_with_errors(const char* buf, size_t len,
|
||||
char32_t* utf32_output)
|
||||
{
|
||||
auto res = simdutf::convert_utf8_to_utf32_with_errors(buf, len, utf32_output);
|
||||
return { res.error, res.count };
|
||||
}
|
||||
|
||||
size_t simdutf__convert_valid_utf8_to_utf32(const char* buf, size_t len,
|
||||
char32_t* utf32_buffer)
|
||||
{
|
||||
return simdutf::convert_valid_utf8_to_utf32(buf, len, utf32_buffer);
|
||||
}
|
||||
|
||||
size_t simdutf__convert_utf16le_to_utf8(const char16_t* buf, size_t len,
|
||||
char* utf8_buffer)
|
||||
{
|
||||
return simdutf::convert_utf16le_to_utf8(buf, len, utf8_buffer);
|
||||
}
|
||||
|
||||
size_t simdutf__convert_utf16be_to_utf8(const char16_t* buf, size_t len,
|
||||
char* utf8_buffer)
|
||||
{
|
||||
return simdutf::convert_utf16be_to_utf8(buf, len, utf8_buffer);
|
||||
}
|
||||
SIMDUTFResult simdutf__convert_utf16le_to_utf8_with_errors(const char16_t* buf,
|
||||
size_t len,
|
||||
char* utf8_buffer)
|
||||
{
|
||||
auto res = simdutf::convert_utf16le_to_utf8_with_errors(buf, len, utf8_buffer);
|
||||
return { res.error, res.count };
|
||||
}
|
||||
|
||||
SIMDUTFResult simdutf__convert_utf16be_to_utf8_with_errors(const char16_t* buf,
|
||||
size_t len,
|
||||
char* utf8_buffer)
|
||||
{
|
||||
auto res = simdutf::convert_utf16be_to_utf8_with_errors(buf, len, utf8_buffer);
|
||||
return { res.error, res.count };
|
||||
}
|
||||
|
||||
size_t simdutf__convert_valid_utf16le_to_utf8(const char16_t* buf, size_t len,
|
||||
char* utf8_buffer)
|
||||
{
|
||||
return simdutf::convert_valid_utf16le_to_utf8(buf, len, utf8_buffer);
|
||||
}
|
||||
|
||||
size_t simdutf__convert_valid_utf16be_to_utf8(const char16_t* buf, size_t len,
|
||||
char* utf8_buffer)
|
||||
{
|
||||
return simdutf::convert_valid_utf16be_to_utf8(buf, len, utf8_buffer);
|
||||
}
|
||||
|
||||
size_t simdutf__convert_utf32_to_utf8(const char32_t* buf, size_t len,
|
||||
char* utf8_buffer)
|
||||
{
|
||||
return simdutf::convert_utf32_to_utf8(buf, len, utf8_buffer);
|
||||
}
|
||||
|
||||
SIMDUTFResult simdutf__convert_utf32_to_utf8_with_errors(const char32_t* buf,
|
||||
size_t len,
|
||||
char* utf8_buffer)
|
||||
{
|
||||
auto res = simdutf::convert_utf32_to_utf8_with_errors(buf, len, utf8_buffer);
|
||||
return { res.error, res.count };
|
||||
}
|
||||
|
||||
size_t simdutf__convert_valid_utf32_to_utf8(const char32_t* buf, size_t len,
|
||||
char* utf8_buffer)
|
||||
{
|
||||
return simdutf::convert_valid_utf32_to_utf8(buf, len, utf8_buffer);
|
||||
}
|
||||
|
||||
size_t simdutf__convert_utf32_to_utf16le(const char32_t* buf, size_t len,
|
||||
char16_t* utf16_buffer)
|
||||
{
|
||||
return simdutf::convert_utf32_to_utf16le(buf, len, utf16_buffer);
|
||||
}
|
||||
|
||||
size_t simdutf__convert_utf32_to_utf16be(const char32_t* buf, size_t len,
|
||||
char16_t* utf16_buffer)
|
||||
{
|
||||
return simdutf::convert_utf32_to_utf16be(buf, len, utf16_buffer);
|
||||
}
|
||||
|
||||
SIMDUTFResult
|
||||
simdutf__convert_utf32_to_utf16le_with_errors(const char32_t* buf, size_t len,
|
||||
char16_t* utf16_buffer)
|
||||
{
|
||||
auto res = simdutf::convert_utf32_to_utf16le_with_errors(buf, len, utf16_buffer);
|
||||
return { res.error, res.count };
|
||||
}
|
||||
|
||||
SIMDUTFResult
|
||||
simdutf__convert_utf32_to_utf16be_with_errors(const char32_t* buf, size_t len,
|
||||
char16_t* utf16_buffer)
|
||||
{
|
||||
auto res = simdutf::convert_utf32_to_utf16be_with_errors(buf, len, utf16_buffer);
|
||||
return { res.error, res.count };
|
||||
}
|
||||
|
||||
size_t simdutf__convert_valid_utf32_to_utf16le(const char32_t* buf, size_t len,
|
||||
char16_t* utf16_buffer)
|
||||
{
|
||||
return simdutf::convert_valid_utf32_to_utf16le(buf, len, utf16_buffer);
|
||||
}
|
||||
|
||||
size_t simdutf__convert_valid_utf32_to_utf16be(const char32_t* buf, size_t len,
|
||||
char16_t* utf16_buffer)
|
||||
{
|
||||
return simdutf::convert_valid_utf32_to_utf16be(buf, len, utf16_buffer);
|
||||
}
|
||||
|
||||
size_t simdutf__convert_utf16le_to_utf32(const char16_t* buf, size_t len,
|
||||
char32_t* utf32_buffer)
|
||||
{
|
||||
return simdutf::convert_utf16le_to_utf32(buf, len, utf32_buffer);
|
||||
}
|
||||
|
||||
size_t simdutf__convert_utf16be_to_utf32(const char16_t* buf, size_t len,
|
||||
char32_t* utf32_buffer)
|
||||
{
|
||||
return simdutf::convert_utf16be_to_utf32(buf, len, utf32_buffer);
|
||||
}
|
||||
|
||||
SIMDUTFResult
|
||||
simdutf__convert_utf16le_to_utf32_with_errors(const char16_t* buf, size_t len,
|
||||
char32_t* utf32_buffer)
|
||||
{
|
||||
auto res = simdutf::convert_utf16le_to_utf32_with_errors(buf, len, utf32_buffer);
|
||||
return { res.error, res.count };
|
||||
}
|
||||
|
||||
SIMDUTFResult
|
||||
simdutf__convert_utf16be_to_utf32_with_errors(const char16_t* buf, size_t len,
|
||||
char32_t* utf32_buffer)
|
||||
{
|
||||
auto res = simdutf::convert_utf16be_to_utf32_with_errors(buf, len, utf32_buffer);
|
||||
return { res.error, res.count };
|
||||
}
|
||||
|
||||
size_t simdutf__convert_valid_utf16le_to_utf32(const char16_t* buf, size_t len,
|
||||
char32_t* utf32_buffer)
|
||||
{
|
||||
return simdutf::convert_valid_utf16le_to_utf32(buf, len, utf32_buffer);
|
||||
}
|
||||
size_t simdutf__convert_valid_utf16be_to_utf32(const char16_t* buf, size_t len,
|
||||
char32_t* utf32_buffer)
|
||||
{
|
||||
return simdutf::convert_valid_utf16be_to_utf32(buf, len, utf32_buffer);
|
||||
}
|
||||
void simdutf__change_endianness_utf16(const char16_t* buf, size_t length,
|
||||
char16_t* output)
|
||||
{
|
||||
simdutf::change_endianness_utf16(buf, length, output);
|
||||
}
|
||||
|
||||
size_t simdutf__count_utf16le(const char16_t* buf, size_t length)
|
||||
{
|
||||
return simdutf::count_utf16le(buf, length);
|
||||
}
|
||||
|
||||
size_t simdutf__count_utf16be(const char16_t* buf, size_t length)
|
||||
{
|
||||
return simdutf::count_utf16be(buf, length);
|
||||
}
|
||||
|
||||
size_t simdutf__count_utf8(const char* buf, size_t length)
|
||||
{
|
||||
return simdutf::count_utf8(buf, length);
|
||||
}
|
||||
|
||||
size_t simdutf__utf8_length_from_utf16le(const char16_t* input, size_t length)
|
||||
{
|
||||
return simdutf::utf8_length_from_utf16le(input, length);
|
||||
}
|
||||
|
||||
size_t simdutf__utf8_length_from_utf16be(const char16_t* input, size_t length)
|
||||
{
|
||||
return simdutf::utf8_length_from_utf16be(input, length);
|
||||
}
|
||||
|
||||
size_t simdutf__utf32_length_from_utf16le(const char16_t* input, size_t length)
|
||||
{
|
||||
return simdutf::utf32_length_from_utf16le(input, length);
|
||||
}
|
||||
|
||||
size_t simdutf__utf32_length_from_utf16be(const char16_t* input, size_t length)
|
||||
{
|
||||
return simdutf::utf32_length_from_utf16be(input, length);
|
||||
}
|
||||
|
||||
size_t simdutf__utf16_length_from_utf8(const char* input, size_t length)
|
||||
{
|
||||
return simdutf::utf16_length_from_utf8(input, length);
|
||||
}
|
||||
|
||||
size_t simdutf__utf8_length_from_utf32(const char32_t* input, size_t length)
|
||||
{
|
||||
return simdutf::utf8_length_from_utf32(input, length);
|
||||
}
|
||||
|
||||
size_t simdutf__utf16_length_from_utf32(const char32_t* input, size_t length)
|
||||
{
|
||||
return simdutf::utf16_length_from_utf32(input, length);
|
||||
}
|
||||
|
||||
size_t simdutf__utf32_length_from_utf8(const char* input, size_t length)
|
||||
{
|
||||
return simdutf::utf32_length_from_utf8(input, length);
|
||||
}
|
||||
}
|
||||
346
src/bun.js/bindings/bun-simdutf.zig
Normal file
346
src/bun.js/bindings/bun-simdutf.zig
Normal file
@@ -0,0 +1,346 @@
|
||||
pub const SIMDUTFResult = extern struct {
|
||||
status: Status,
|
||||
count: usize = 0,
|
||||
|
||||
pub const Status = enum(i32) {
|
||||
success = 0,
|
||||
/// Any byte must have fewer than 5 header bits.
|
||||
header_bits = 1,
|
||||
|
||||
/// The leading byte must be followed by N-1 continuation bytes, where N is the UTF-8 character length
|
||||
/// This is also the error when the input is truncated.
|
||||
too_short = 2,
|
||||
|
||||
/// The leading byte must not be a continuation byte.
|
||||
too_long = 3,
|
||||
|
||||
/// The decoded character must be above U+7F for two-byte characters, U+7FF for three-byte characters,
|
||||
overlong = 4,
|
||||
|
||||
/// and U+FFFF for four-byte characters.
|
||||
/// The decoded character must be less than or equal to U+10FFFF OR less than or equal than U+7F for ASCII.
|
||||
/// The decoded character must be not be in U+D800...DFFF (UTF-8 or UTF-32) OR
|
||||
/// a high surrogate must be followed by a low surrogate and a low surrogate must be preceded by a high surrogate (UTF-16)
|
||||
too_large = 5,
|
||||
surrogate = 6,
|
||||
/// Not related to validation/transcoding.
|
||||
_,
|
||||
};
|
||||
};
|
||||
pub extern fn simdutf__detect_encodings(input: [*]const u8, length: usize) c_int;
|
||||
pub extern fn simdutf__validate_utf8(buf: [*]const u8, len: usize) bool;
|
||||
pub extern fn simdutf__validate_utf8_with_errors(buf: [*]const u8, len: usize) SIMDUTFResult;
|
||||
pub extern fn simdutf__validate_ascii(buf: [*]const u8, len: usize) bool;
|
||||
pub extern fn simdutf__validate_ascii_with_errors(buf: [*]const u8, len: usize) SIMDUTFResult;
|
||||
pub extern fn simdutf__validate_utf16le(buf: [*]const u16, len: usize) bool;
|
||||
pub extern fn simdutf__validate_utf16be(buf: [*]const u16, len: usize) bool;
|
||||
pub extern fn simdutf__validate_utf16le_with_errors(buf: [*]const u16, len: usize) SIMDUTFResult;
|
||||
pub extern fn simdutf__validate_utf16be_with_errors(buf: [*]const u16, len: usize) SIMDUTFResult;
|
||||
pub extern fn simdutf__validate_utf32(buf: [*c]const c_uint, len: usize) bool;
|
||||
pub extern fn simdutf__validate_utf32_with_errors(buf: [*c]const c_uint, len: usize) SIMDUTFResult;
|
||||
pub extern fn simdutf__convert_utf8_to_utf16le(buf: [*]const u8, len: usize, utf16_output: [*]u16) usize;
|
||||
pub extern fn simdutf__convert_utf8_to_utf16be(buf: [*]const u8, len: usize, utf16_output: [*]u16) usize;
|
||||
pub extern fn simdutf__convert_utf8_to_utf16le_with_errors(buf: [*]const u8, len: usize, utf16_output: [*]u16) SIMDUTFResult;
|
||||
pub extern fn simdutf__convert_utf8_to_utf16be_with_errors(buf: [*]const u8, len: usize, utf16_output: [*]u16) SIMDUTFResult;
|
||||
pub extern fn simdutf__convert_valid_utf8_to_utf16be(buf: [*]const u8, len: usize, utf16_buffer: [*]u16) usize;
|
||||
pub extern fn simdutf__convert_utf8_to_utf32(buf: [*]const u8, len: usize, utf32_output: [*]u32) usize;
|
||||
pub extern fn simdutf__convert_utf8_to_utf32_with_errors(buf: [*]const u8, len: usize, utf32_output: [*]u32) SIMDUTFResult;
|
||||
pub extern fn simdutf__convert_valid_utf8_to_utf32(buf: [*]const u8, len: usize, utf32_buffer: [*]u32) usize;
|
||||
pub extern fn simdutf__convert_utf16le_to_utf8(buf: [*]const u16, len: usize, utf8_buffer: [*]u8) usize;
|
||||
pub extern fn simdutf__convert_utf16be_to_utf8(buf: [*]const u16, len: usize, utf8_buffer: [*]u8) usize;
|
||||
pub extern fn simdutf__convert_utf16le_to_utf8_with_errors(buf: [*]const u16, len: usize, utf8_buffer: [*]u8) SIMDUTFResult;
|
||||
pub extern fn simdutf__convert_utf16be_to_utf8_with_errors(buf: [*]const u16, len: usize, utf8_buffer: [*]u8) SIMDUTFResult;
|
||||
pub extern fn simdutf__convert_valid_utf16le_to_utf8(buf: [*]const u16, len: usize, utf8_buffer: [*]u8) usize;
|
||||
pub extern fn simdutf__convert_valid_utf16be_to_utf8(buf: [*]const u16, len: usize, utf8_buffer: [*]u8) usize;
|
||||
pub extern fn simdutf__convert_utf32_to_utf8(buf: [*c]const c_uint, len: usize, utf8_buffer: [*]u8) usize;
|
||||
pub extern fn simdutf__convert_utf32_to_utf8_with_errors(buf: [*c]const c_uint, len: usize, utf8_buffer: [*]u8) SIMDUTFResult;
|
||||
pub extern fn simdutf__convert_valid_utf32_to_utf8(buf: [*c]const c_uint, len: usize, utf8_buffer: [*]u8) usize;
|
||||
pub extern fn simdutf__convert_utf32_to_utf16le(buf: [*c]const c_uint, len: usize, utf16_buffer: [*]u16) usize;
|
||||
pub extern fn simdutf__convert_utf32_to_utf16be(buf: [*c]const c_uint, len: usize, utf16_buffer: [*]u16) usize;
|
||||
pub extern fn simdutf__convert_utf32_to_utf16le_with_errors(buf: [*c]const c_uint, len: usize, utf16_buffer: [*]u16) SIMDUTFResult;
|
||||
pub extern fn simdutf__convert_utf32_to_utf16be_with_errors(buf: [*c]const c_uint, len: usize, utf16_buffer: [*]u16) SIMDUTFResult;
|
||||
pub extern fn simdutf__convert_valid_utf32_to_utf16le(buf: [*c]const c_uint, len: usize, utf16_buffer: [*]u16) usize;
|
||||
pub extern fn simdutf__convert_valid_utf32_to_utf16be(buf: [*c]const c_uint, len: usize, utf16_buffer: [*]u16) usize;
|
||||
pub extern fn simdutf__convert_utf16le_to_utf32(buf: [*]const u16, len: usize, utf32_buffer: [*]u32) usize;
|
||||
pub extern fn simdutf__convert_utf16be_to_utf32(buf: [*]const u16, len: usize, utf32_buffer: [*]u32) usize;
|
||||
pub extern fn simdutf__convert_utf16le_to_utf32_with_errors(buf: [*]const u16, len: usize, utf32_buffer: [*]u32) SIMDUTFResult;
|
||||
pub extern fn simdutf__convert_utf16be_to_utf32_with_errors(buf: [*]const u16, len: usize, utf32_buffer: [*]u32) SIMDUTFResult;
|
||||
pub extern fn simdutf__convert_valid_utf16le_to_utf32(buf: [*]const u16, len: usize, utf32_buffer: [*]u32) usize;
|
||||
pub extern fn simdutf__convert_valid_utf16be_to_utf32(buf: [*]const u16, len: usize, utf32_buffer: [*]u32) usize;
|
||||
pub extern fn simdutf__change_endianness_utf16(buf: [*]const u16, length: usize, output: [*]u16) void;
|
||||
pub extern fn simdutf__count_utf16le(buf: [*]const u16, length: usize) usize;
|
||||
pub extern fn simdutf__count_utf16be(buf: [*]const u16, length: usize) usize;
|
||||
pub extern fn simdutf__count_utf8(buf: [*]const u8, length: usize) usize;
|
||||
pub extern fn simdutf__utf8_length_from_utf16le(input: [*]const u16, length: usize) usize;
|
||||
pub extern fn simdutf__utf8_length_from_utf16be(input: [*]const u16, length: usize) usize;
|
||||
pub extern fn simdutf__utf32_length_from_utf16le(input: [*]const u16, length: usize) usize;
|
||||
pub extern fn simdutf__utf32_length_from_utf16be(input: [*]const u16, length: usize) usize;
|
||||
pub extern fn simdutf__utf16_length_from_utf8(input: [*]const u8, length: usize) usize;
|
||||
pub extern fn simdutf__utf8_length_from_utf32(input: [*c]const c_uint, length: usize) usize;
|
||||
pub extern fn simdutf__utf16_length_from_utf32(input: [*c]const c_uint, length: usize) usize;
|
||||
pub extern fn simdutf__utf32_length_from_utf8(input: [*]const u8, length: usize) usize;
|
||||
|
||||
pub const validate = struct {
|
||||
pub const with_errors = struct {
|
||||
pub fn utf8(input: []const u8) SIMDUTFResult {
|
||||
return simdutf__validate_utf8_with_errors(input.ptr, input.len);
|
||||
}
|
||||
pub fn ascii(input: []const u8) SIMDUTFResult {
|
||||
return simdutf__validate_ascii_with_errors(input.ptr, input.len);
|
||||
}
|
||||
pub fn utf16le(input: []const u16) SIMDUTFResult {
|
||||
return simdutf__validate_utf16le_with_errors(input.ptr, input.len);
|
||||
}
|
||||
pub fn utf16be(input: []const u16) SIMDUTFResult {
|
||||
return simdutf__validate_utf16be_with_errors(input.ptr, input.len);
|
||||
}
|
||||
};
|
||||
|
||||
pub fn utf8(input: []const u8) bool {
|
||||
return simdutf__validate_utf8(input.ptr, input.len);
|
||||
}
|
||||
pub fn ascii(input: []const u8) bool {
|
||||
return simdutf__validate_ascii(input.ptr, input.len);
|
||||
}
|
||||
pub fn utf16le(input: []const u16) bool {
|
||||
return simdutf__validate_utf16le(input.ptr, input.len);
|
||||
}
|
||||
pub fn utf16be(input: []const u16) bool {
|
||||
return simdutf__validate_utf16be(input.ptr, input.len);
|
||||
}
|
||||
};
|
||||
|
||||
pub const convert = struct {
|
||||
pub const utf8 = struct {
|
||||
pub const to = struct {
|
||||
pub const utf16 = struct {
|
||||
pub const with_errors = struct {
|
||||
pub fn le(input: []const u8, output: []u16) SIMDUTFResult {
|
||||
return simdutf__convert_utf8_to_utf16le_with_errors(input.ptr, input.len, output.ptr);
|
||||
}
|
||||
pub fn be(input: []const u8, output: []u16) SIMDUTFResult {
|
||||
return simdutf__convert_utf8_to_utf16be_with_errors(input.ptr, input.len, output.ptr);
|
||||
}
|
||||
};
|
||||
|
||||
pub fn le(input: []const u8, output: []u16) usize {
|
||||
return simdutf__convert_utf8_to_utf16le(input.ptr, input.len, output.ptr);
|
||||
}
|
||||
pub fn be(input: []const u8, output: []u16) usize {
|
||||
return simdutf__convert_utf8_to_utf16be(input.ptr, input.len, output.ptr);
|
||||
}
|
||||
};
|
||||
|
||||
pub const utf32 = struct {
|
||||
pub const with_errors = struct {
|
||||
pub fn le(input: []const u8, output: []u32) SIMDUTFResult {
|
||||
return simdutf__convert_utf8_to_utf32_with_errors(input.ptr, input.len, output.ptr);
|
||||
}
|
||||
pub fn be(input: []const u8, output: []u32) SIMDUTFResult {
|
||||
return simdutf__convert_utf8_to_utf32_with_errors(input.ptr, input.len, output.ptr);
|
||||
}
|
||||
};
|
||||
|
||||
pub fn le(input: []const u8, output: []u32) usize {
|
||||
return simdutf__convert_valid_utf8_to_utf32(input.ptr, input.len, output.ptr);
|
||||
}
|
||||
pub fn be(input: []const u8, output: []u32) usize {
|
||||
return simdutf__convert_valid_utf8_to_utf32(input.ptr, input.len, output.ptr);
|
||||
}
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
pub const utf16 = struct {
|
||||
pub const to = struct {
|
||||
pub const utf8 = struct {
|
||||
pub const with_errors = struct {
|
||||
pub fn le(input: []const u16, output: []u8) SIMDUTFResult {
|
||||
return simdutf__convert_utf16le_to_utf8_with_errors(input.ptr, input.len, output.ptr);
|
||||
}
|
||||
pub fn be(input: []const u16, output: []u8) SIMDUTFResult {
|
||||
return simdutf__convert_utf16be_to_utf8_with_errors(input.ptr, input.len, output.ptr);
|
||||
}
|
||||
};
|
||||
|
||||
pub fn le(input: []const u16, output: []u8) usize {
|
||||
return simdutf__convert_valid_utf16le_to_utf8(input.ptr, input.len, output.ptr);
|
||||
}
|
||||
pub fn be(input: []const u16, output: []u8) usize {
|
||||
return simdutf__convert_valid_utf16be_to_utf8(input.ptr, input.len, output.ptr);
|
||||
}
|
||||
};
|
||||
|
||||
pub const utf32 = struct {
|
||||
pub const with_errors = struct {
|
||||
pub fn le(input: []const u16, output: []u32) SIMDUTFResult {
|
||||
return simdutf__convert_utf16le_to_utf32_with_errors(input.ptr, input.len, output.ptr);
|
||||
}
|
||||
pub fn be(input: []const u16, output: []u32) SIMDUTFResult {
|
||||
return simdutf__convert_utf16be_to_utf32_with_errors(input.ptr, input.len, output.ptr);
|
||||
}
|
||||
};
|
||||
|
||||
pub fn le(input: []const u16, output: []u32) usize {
|
||||
return simdutf__convert_valid_utf16le_to_utf32(input.ptr, input.len, output.ptr);
|
||||
}
|
||||
pub fn be(input: []const u16, output: []u32) usize {
|
||||
return simdutf__convert_valid_utf16be_to_utf32(input.ptr, input.len, output.ptr);
|
||||
}
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
pub const utf32 = struct {
|
||||
pub const to = struct {
|
||||
pub const utf8 = struct {
|
||||
pub const with_errors = struct {
|
||||
pub fn le(input: []const u32, output: []u8) SIMDUTFResult {
|
||||
return simdutf__convert_utf32_to_utf8_with_errors(input.ptr, input.len, output.ptr);
|
||||
}
|
||||
pub fn be(input: []const u32, output: []u8) SIMDUTFResult {
|
||||
return simdutf__convert_utf32_to_utf8_with_errors(input.ptr, input.len, output.ptr);
|
||||
}
|
||||
};
|
||||
|
||||
pub fn le(input: []const u32, output: []u8) usize {
|
||||
return simdutf__convert_valid_utf32_to_utf8(input.ptr, input.len, output.ptr);
|
||||
}
|
||||
pub fn be(input: []const u32, output: []u8) usize {
|
||||
return simdutf__convert_valid_utf32_to_utf8(input.ptr, input.len, output.ptr);
|
||||
}
|
||||
};
|
||||
|
||||
pub const utf16 = struct {
|
||||
pub const with_errors = struct {
|
||||
pub fn le(input: []const u32, output: []u16) SIMDUTFResult {
|
||||
return simdutf__convert_utf32_to_utf16le_with_errors(input.ptr, input.len, output.ptr);
|
||||
}
|
||||
pub fn be(input: []const u32, output: []u16) SIMDUTFResult {
|
||||
return simdutf__convert_utf32_to_utf16be_with_errors(input.ptr, input.len, output.ptr);
|
||||
}
|
||||
};
|
||||
|
||||
pub fn le(input: []const u32, output: []u16) usize {
|
||||
return simdutf__convert_valid_utf32_to_utf16le(input.ptr, input.len, output.ptr);
|
||||
}
|
||||
pub fn be(input: []const u32, output: []u16) usize {
|
||||
return simdutf__convert_valid_utf32_to_utf16be(input.ptr, input.len, output.ptr);
|
||||
}
|
||||
};
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
pub const length = struct {
|
||||
pub const utf8 = struct {
|
||||
pub const from = struct {
|
||||
pub const utf16 = struct {
|
||||
pub fn le(input: []const u16) usize {
|
||||
return simdutf__utf8_length_from_utf16le(input.ptr, input.len);
|
||||
}
|
||||
pub fn be(input: []const u16) usize {
|
||||
return simdutf__utf8_length_from_utf16be(input.ptr, input.len);
|
||||
}
|
||||
};
|
||||
|
||||
pub fn utf32(input: []const u32) usize {
|
||||
return simdutf__utf8_length_from_utf32(input.ptr, input.len);
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
pub const utf16 = struct {
|
||||
pub const from = struct {
|
||||
pub const utf8 = struct {
|
||||
pub fn le(input: []const u8) usize {
|
||||
return simdutf__utf16_length_from_utf8(input.ptr, input.len);
|
||||
}
|
||||
pub fn be(input: []const u8) usize {
|
||||
return simdutf__utf16_length_from_utf8(input.ptr, input.len);
|
||||
}
|
||||
};
|
||||
|
||||
pub fn utf32(input: []const u32) usize {
|
||||
return simdutf__utf16_length_from_utf32(input.ptr, input.len);
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
pub const utf32 = struct {
|
||||
pub const from = struct {
|
||||
pub const utf8 = struct {
|
||||
pub fn le(input: []const u8) usize {
|
||||
return simdutf__utf32_length_from_utf8(input.ptr, input.len);
|
||||
}
|
||||
pub fn be(input: []const u8) usize {
|
||||
return simdutf__utf32_length_from_utf8(input.ptr, input.len);
|
||||
}
|
||||
};
|
||||
|
||||
pub const utf16 = struct {
|
||||
pub fn le(input: []const u16) usize {
|
||||
return simdutf__utf32_length_from_utf16le(input.ptr, input.len);
|
||||
}
|
||||
pub fn be(input: []const u16) usize {
|
||||
return simdutf__utf32_length_from_utf16be(input.ptr, input.len);
|
||||
}
|
||||
};
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
pub const trim = struct {
|
||||
pub fn utf8_len(buf: []const u8) usize {
|
||||
if (buf.len < 3) {
|
||||
switch (buf.len) {
|
||||
2 => {
|
||||
if (buf[buf.len - 1] >= 0b11000000) {
|
||||
return buf.len - 1;
|
||||
} // 2-, 3- and 4-byte characters with only 1 byte left
|
||||
if (buf[buf.len - 2] >= 0b11100000) {
|
||||
return buf.len - 2;
|
||||
} // 3- and 4-byte characters with only 2 bytes left
|
||||
return buf.len;
|
||||
},
|
||||
1 => {
|
||||
if (buf[buf.len - 1] >= 0b11000000) {
|
||||
return buf.len - 1;
|
||||
} // 2-, 3- and 4-byte characters with only 1 byte left
|
||||
return buf.len;
|
||||
},
|
||||
0 => return buf.len,
|
||||
else => unreachable,
|
||||
}
|
||||
}
|
||||
|
||||
if (buf[buf.len - 1] >= 0b11000000) {
|
||||
return buf.len - 1;
|
||||
} // 2-, 3- and 4-byte characters with only 1 byte left
|
||||
if (buf[buf.len - 2] >= 0b11100000) {
|
||||
return buf.len - 2;
|
||||
} // 3- and 4-byte characters with only 1 byte left
|
||||
if (buf[buf.len - 3] >= 0b11110000) {
|
||||
return buf.len - 3;
|
||||
} // 4-byte characters with only 3 bytes left
|
||||
return buf.len;
|
||||
}
|
||||
|
||||
pub fn utf16_len(buf: []const u16) usize {
|
||||
if (buf.len == 0) {
|
||||
return 0;
|
||||
}
|
||||
if ((buf[buf.len - 1] >= 0xD800) and (buf[buf.len - 1] <= 0xDBFF)) {
|
||||
return buf.len - 1;
|
||||
}
|
||||
return buf.len;
|
||||
}
|
||||
|
||||
pub fn utf16(buf: []const u16) []const u16 {
|
||||
return buf[0..utf16_len(buf)];
|
||||
}
|
||||
|
||||
pub fn utf8(buf: []const u8) []const u8 {
|
||||
return buf[0..utf8_len(buf)];
|
||||
}
|
||||
};
|
||||
27955
src/bun.js/bindings/simdutf.cpp
vendored
Normal file
27955
src/bun.js/bindings/simdutf.cpp
vendored
Normal file
File diff suppressed because it is too large
Load Diff
2435
src/bun.js/bindings/simdutf.h
vendored
Normal file
2435
src/bun.js/bindings/simdutf.h
vendored
Normal file
File diff suppressed because it is too large
Load Diff
@@ -111,3 +111,5 @@ pub const hardcode_localhost_to_127_0_0_1 = true;
|
||||
/// But it's very easy to end up importing it accidentally, causing an error at runtime
|
||||
/// so we just disable it
|
||||
pub const support_jsxs_in_jsx_transform = false;
|
||||
|
||||
pub const use_simdutf = true;
|
||||
|
||||
@@ -482,3 +482,5 @@ pub fn rangeOfSliceInBuffer(slice: []const u8, buffer: []const u8) ?[2]u32 {
|
||||
}
|
||||
|
||||
pub const invalid_fd = std.math.maxInt(FileDescriptor);
|
||||
|
||||
pub const simdutf = @import("./bun.js/bindings/bun-simdutf.zig");
|
||||
|
||||
@@ -935,7 +935,36 @@ const strings = @This();
|
||||
/// If there are no non-ascii characters, this returns null
|
||||
/// This is intended to be used for strings that go to JavaScript
|
||||
pub fn toUTF16Alloc(allocator: std.mem.Allocator, bytes: []const u8, comptime fail_if_invalid: bool) !?[]u16 {
|
||||
if (strings.firstNonASCII(bytes)) |i| {
|
||||
var first_non_ascii: ?u32 = null;
|
||||
|
||||
if (bun.FeatureFlags.use_simdutf) {
|
||||
if (bytes.len == 0)
|
||||
return &[_]u16{};
|
||||
|
||||
if (bun.simdutf.validate.ascii(bytes))
|
||||
return null;
|
||||
|
||||
const trimmed = bun.simdutf.trim.utf8(bytes);
|
||||
const out_length = bun.simdutf.length.utf16.from.utf8.le(trimmed);
|
||||
var out = try allocator.alloc(u16, out_length);
|
||||
|
||||
const result = bun.simdutf.convert.utf8.to.utf16.with_errors.le(trimmed, out);
|
||||
switch (result.status) {
|
||||
.success => {
|
||||
return out;
|
||||
},
|
||||
else => {
|
||||
if (fail_if_invalid) {
|
||||
allocator.free(out);
|
||||
return error.InvalidUTF8;
|
||||
}
|
||||
|
||||
first_non_ascii = @truncate(u32, result.count);
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
if (first_non_ascii orelse strings.firstNonASCII(bytes)) |i| {
|
||||
const ascii = bytes[0..i];
|
||||
const chunk = bytes[i..];
|
||||
var output = try std.ArrayList(u16).initCapacity(allocator, ascii.len + 2);
|
||||
@@ -1054,12 +1083,31 @@ pub fn utf16Codepoint(comptime Type: type, input: Type) UTF16Replacement {
|
||||
}
|
||||
|
||||
pub fn toUTF8AllocWithType(allocator: std.mem.Allocator, comptime Type: type, utf16: Type) ![]u8 {
|
||||
if (bun.FeatureFlags.use_simdutf and comptime Type == []const u16) {
|
||||
const length = bun.simdutf.length.utf8.from.utf16.le(utf16);
|
||||
const list = try std.ArrayList(u8).initCapacity(allocator, length);
|
||||
list.items.len += bun.simdutf.convert.utf16.to.utf8.le(utf16, list.items.ptr[0..length]);
|
||||
return list;
|
||||
}
|
||||
|
||||
var list = try std.ArrayList(u8).initCapacity(allocator, utf16.len);
|
||||
list = try toUTF8ListWithType(list, Type, utf16);
|
||||
return list.items;
|
||||
}
|
||||
|
||||
pub fn toUTF8ListWithType(list_: std.ArrayList(u8), comptime Type: type, utf16: Type) !std.ArrayList(u8) {
|
||||
if (bun.FeatureFlags.use_simdutf and comptime Type == []const u16) {
|
||||
var list = list_;
|
||||
const length = bun.simdutf.length.utf8.from.utf16.le(utf16);
|
||||
try list.ensureTotalCapacityPrecise(length);
|
||||
list.items.len += bun.simdutf.convert.utf16.to.utf8.le(utf16, list.items.ptr[0..length]);
|
||||
return list;
|
||||
}
|
||||
|
||||
return toUTF8ListWithTypeBun(list_, Type, utf16);
|
||||
}
|
||||
|
||||
pub fn toUTF8ListWithTypeBun(list_: std.ArrayList(u8), comptime Type: type, utf16: Type) !std.ArrayList(u8) {
|
||||
var list = list_;
|
||||
var utf16_remaining = utf16;
|
||||
|
||||
@@ -2288,6 +2336,20 @@ pub fn copyUTF16IntoUTF8(buf: []u8, comptime Type: type, utf16: Type) EncodeInto
|
||||
var utf16_remaining = utf16;
|
||||
var ended_on_non_ascii = false;
|
||||
|
||||
if (comptime Type == []const u16) {
|
||||
if (bun.FeatureFlags.use_simdutf) {
|
||||
const trimmed = bun.simdutf.trim.utf16(utf16_remaining);
|
||||
const out_len = bun.simdutf.length.utf8.from.utf16.le(trimmed);
|
||||
if (remaining.len >= out_len) {
|
||||
const result = bun.simdutf.convert.utf16.to.utf8.with_errors.le(trimmed, remaining[0..out_len]);
|
||||
return EncodeIntoResult{
|
||||
.read = @truncate(u32, trimmed.len),
|
||||
.written = @truncate(u32, result.count),
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
while (firstNonASCII16(Type, utf16_remaining)) |i| {
|
||||
const end = @minimum(i, remaining.len);
|
||||
if (end > 0) copyU16IntoU8(remaining, Type, utf16_remaining[0..end]);
|
||||
@@ -2324,6 +2386,10 @@ pub fn copyUTF16IntoUTF8(buf: []u8, comptime Type: type, utf16: Type) EncodeInto
|
||||
}
|
||||
|
||||
pub fn elementLengthUTF16IntoUTF8(comptime Type: type, utf16: Type) usize {
|
||||
if (bun.FeatureFlags.use_simdutf) {
|
||||
return bun.simdutf.length.utf8.from.utf16.le(utf16);
|
||||
}
|
||||
|
||||
var utf16_remaining = utf16;
|
||||
var count: usize = 0;
|
||||
|
||||
@@ -2345,6 +2411,10 @@ pub fn elementLengthUTF8IntoUTF16(comptime Type: type, utf8: Type) usize {
|
||||
var utf8_remaining = utf8;
|
||||
var count: usize = 0;
|
||||
|
||||
if (bun.FeatureFlags.use_simdutf) {
|
||||
return bun.simdutf.length.utf16.from.utf8.le(utf8);
|
||||
}
|
||||
|
||||
while (firstNonASCII(utf8_remaining)) |i| {
|
||||
count += i;
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import { expect, it, describe } from "bun:test";
|
||||
import { gc as gcTrace } from "./gc";
|
||||
import { gc as gcTrace, withoutAggressiveGC } from "./gc";
|
||||
|
||||
const getByteLength = (str) => {
|
||||
// returns the byte length of an utf8 string
|
||||
@@ -74,10 +74,12 @@ describe("TextDecoder", () => {
|
||||
|
||||
it("DOMJIT call", () => {
|
||||
const array = new Uint8Array(bytes.buffer);
|
||||
for (let i = 0; i < 100_000; i++) {
|
||||
const decoded = decoder.decode(array);
|
||||
expect(decoded).toBe(text);
|
||||
}
|
||||
withoutAggressiveGC(() => {
|
||||
for (let i = 0; i < 100_000; i++) {
|
||||
const decoded = decoder.decode(array);
|
||||
expect(decoded).toBe(text);
|
||||
}
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
|
||||
Reference in New Issue
Block a user