From d861347dc5c92050fe533aaa04b5c0f73af9193f Mon Sep 17 00:00:00 2001
From: Jarred Sumner <jarred@jarredsumner.com>
Date: Sat, 10 Aug 2024 02:13:36 -0700
Subject: [PATCH] Optimize TextEncoderStream, part 1 (#13222)

---
 bench/snippets/text-encoder-stream.mjs |  61 +++++++++
 src/bun.js/base.zig                    |   5 +
 src/bun.js/bindings/Uint8Array.cpp     |  28 ++++
 src/bun.js/bindings/bindings.zig       |   6 +
 src/bun.js/webcore/encoding.zig        | 170 ++++++++++---------------
 src/bun.zig                            |   2 +-
 src/string_builder.zig                 |   2 +-
 src/string_immutable.zig               |  28 +++-
 8 files changed, 190 insertions(+), 112 deletions(-)
 create mode 100644 bench/snippets/text-encoder-stream.mjs
 create mode 100644 src/bun.js/bindings/Uint8Array.cpp

diff --git a/bench/snippets/text-encoder-stream.mjs b/bench/snippets/text-encoder-stream.mjs
new file mode 100644
index 0000000000..8fe4ed621b
--- /dev/null
+++ b/bench/snippets/text-encoder-stream.mjs
@@ -0,0 +1,61 @@
+import { bench, run } from "./runner.mjs";
+
+const latin1 = `hello hello hello!!!!`.repeat(102400).split("").join("");
+
+function create(src) {
+  function split(str, chunkSize) {
+    let chunkedHTML = [];
+    let html = str;
+    while (html.length > 0) {
+      chunkedHTML.push(html.slice(0, chunkSize).split("").join(""));
+      html = html.slice(chunkSize);
+    }
+    return chunkedHTML;
+  }
+
+  const quarterKB = split(src, 256);
+  const oneKB = split(src, 1024);
+  const fourKB = split(src, 4096);
+  const sixteenKB = split(src, 16 * 1024);
+
+  async function runBench(chunks) {
+    const encoderStream = new TextEncoderStream();
+    const stream = new ReadableStream({
+      pull(controller) {
+        for (let chunk of chunks) {
+          controller.enqueue(chunk);
+        }
+        controller.close();
+      },
+    }).pipeThrough(encoderStream);
+    for (let reader = stream.getReader(); ; ) {
+      const { value, done } = await reader.read();
+      if (done) break;
+    }
+  }
+
+  // if (new TextDecoder().decode(await runBench(oneKB)) !== src) {
+  //   throw new Error("Benchmark failed");
+  // }
+
+  bench(`${(src.length / 1024) | 0} KB of HTML in 0.25 KB chunks`, async () => {
+    await runBench(quarterKB);
+  });
+
+  bench(`${(src.length / 1024) | 0} KB of HTML in 1 KB chunks`, async () => {
+    await runBench(oneKB);
+  });
+
+  bench(`${(src.length / 1024) | 0} KB of HTML in 4 KB chunks`, async () => {
+    await runBench(fourKB);
+  });
+
+  bench(`${(src.length / 1024) | 0} KB of HTML in 16 KB chunks`, async () => {
+    await runBench(sixteenKB);
+  });
+}
+
+create(latin1);
+create(await fetch("https://bun.sh").then(res => res.text()));
+
+await run();
diff --git a/src/bun.js/base.zig b/src/bun.js/base.zig
index ec0ed774a0..b3e89c575c 100644
--- a/src/bun.js/base.zig
+++ b/src/bun.js/base.zig
@@ -405,6 +405,11 @@ pub const ArrayBuffer = extern struct {
         return Bun__createUint8ArrayForCopy(globalThis, bytes.ptr, bytes.len, true);
     }
 
+    pub fn createUint8Array(globalThis: *JSC.JSGlobalObject, bytes: []const u8) JSValue {
+        JSC.markBinding(@src());
+        return Bun__createUint8ArrayForCopy(globalThis, bytes.ptr, bytes.len, false);
+    }
+
     extern "C" fn Bun__allocUint8ArrayForCopy(*JSC.JSGlobalObject, usize, **anyopaque) JSValue;
     pub fn allocBuffer(globalThis: *JSC.JSGlobalObject, len: usize) struct { JSValue, []u8 } {
         var ptr: [*]u8 = undefined;
diff --git a/src/bun.js/bindings/Uint8Array.cpp b/src/bun.js/bindings/Uint8Array.cpp
new file mode 100644
index 0000000000..c1e3708b07
--- /dev/null
+++ b/src/bun.js/bindings/Uint8Array.cpp
@@ -0,0 +1,28 @@
+#include "root.h"
+
+#include "JavaScriptCore/TypedArrayType.h"
+#include "JavaScriptCore/JSArrayBufferViewInlines.h"
+#include "JavaScriptCore/JSArrayBufferView.h"
+#include "JavaScriptCore/JSTypedArrayViewPrototype.h"
+#include "mimalloc.h"
+
+namespace Bun {
+
+extern "C" JSC::EncodedJSValue JSUint8Array__fromDefaultAllocator(JSC::JSGlobalObject* lexicalGlobalObject, uint8_t* ptr, size_t length)
+{
+
+    JSC::JSUint8Array* uint8Array = nullptr;
+
+    if (LIKELY(length > 0)) {
+        auto buffer = ArrayBuffer::createFromBytes({ ptr, length }, createSharedTask<void(void*)>([](void* p) {
+            mi_free(p);
+        }));
+
+        uint8Array = JSC::JSUint8Array::create(lexicalGlobalObject, lexicalGlobalObject->m_typedArrayUint8.get(lexicalGlobalObject), WTFMove(buffer), 0, length);
+    } else {
+        uint8Array = JSC::JSUint8Array::create(lexicalGlobalObject, lexicalGlobalObject->m_typedArrayUint8.get(lexicalGlobalObject), 0);
+    }
+
+    return JSC::JSValue::encode(uint8Array);
+}
+}
\ No newline at end of file
diff --git a/src/bun.js/bindings/bindings.zig b/src/bun.js/bindings/bindings.zig
index a130a7b046..f2f7493027 100644
--- a/src/bun.js/bindings/bindings.zig
+++ b/src/bun.js/bindings/bindings.zig
@@ -1705,6 +1705,12 @@ pub const JSUint8Array = opaque {
     pub fn slice(this: *JSUint8Array) []u8 {
         return this.ptr()[0..this.len()];
     }
+
+    extern fn JSUint8Array__fromDefaultAllocator(*JSC.JSGlobalObject, ptr: [*]u8, len: usize) JSC.JSValue;
+    /// *bytes* must come from bun.default_allocator
+    pub fn fromBytes(globalThis: *JSGlobalObject, bytes: []u8) JSC.JSValue {
+        return JSUint8Array__fromDefaultAllocator(globalThis, bytes.ptr, bytes.len);
+    }
 };
 
 pub const JSCell = extern struct {
diff --git a/src/bun.js/webcore/encoding.zig b/src/bun.js/webcore/encoding.zig
index 77e2816865..f069f7326f 100644
--- a/src/bun.js/webcore/encoding.zig
+++ b/src/bun.js/webcore/encoding.zig
@@ -452,32 +452,58 @@ pub const TextEncoderStreamEncoder = struct {
 
         if (input.len == 0) return .undefined;
 
-        const prepend_replacement = prepend_replacement: {
+        const prepend_replacement_len: usize = prepend_replacement: {
             if (this.pending_lead_surrogate != null) {
                 this.pending_lead_surrogate = null;
                 // no latin1 surrogate pairs
-                break :prepend_replacement true;
+                break :prepend_replacement 3;
             }
 
-            break :prepend_replacement false;
+            break :prepend_replacement 0;
         };
-
-        const length: usize = bun.simdutf.length.utf8.from.latin1(input) + @as(usize, if (prepend_replacement) 3 else 0);
-
-        const array_value, const bytes = ArrayBuffer.allocBuffer(globalObject, length);
-
-        var remain = bytes;
-
-        if (prepend_replacement) {
-            @memcpy(remain[0..3], &[3]u8{ 0xef, 0xbf, 0xbd });
-            remain = remain[3..];
+        // In a previous benchmark, counting the length took about as much time as allocating the buffer.
+        //
+        // Benchmark	Time %	CPU (ns)	Iterations	Ratio
+        // 288.00 ms   13.5%	288.00 ms	 	  simdutf::arm64::implementation::convert_latin1_to_utf8(char const*, unsigned long, char*) const
+        // 278.00 ms   13.0%	278.00 ms	 	  simdutf::arm64::implementation::utf8_length_from_latin1(char const*, unsigned long) const
+        //
+        //
+        var buffer = std.ArrayList(u8).initCapacity(bun.default_allocator, input.len + prepend_replacement_len) catch {
+            globalObject.throwOutOfMemory();
+            return .zero;
+        };
+        if (prepend_replacement_len > 0) {
+            buffer.appendSliceAssumeCapacity(&[3]u8{ 0xef, 0xbf, 0xbd });
         }
 
-        const count = bun.simdutf.convert.latin1.to.utf8(input, remain);
+        var remain = input;
+        while (remain.len > 0) {
+            const result = strings.copyLatin1IntoUTF8(buffer.unusedCapacitySlice(), []const u8, remain);
 
-        bun.debugAssert(count == remain.len);
+            buffer.items.len += result.written;
+            remain = remain[result.read..];
 
-        return array_value;
+            if (result.written == 0 and result.read == 0) {
+                buffer.ensureUnusedCapacity(2) catch {
+                    buffer.deinit();
+                    globalObject.throwOutOfMemory();
+                    return .zero;
+                };
+            } else if (buffer.items.len == buffer.capacity and remain.len > 0) {
+                buffer.ensureTotalCapacity(buffer.items.len + remain.len + 1) catch {
+                    buffer.deinit();
+                    globalObject.throwOutOfMemory();
+                    return .zero;
+                };
+            }
+        }
+
+        if (comptime Environment.isDebug) {
+            // wrap in comptime if so simdutf isn't called in a release build here.
+            bun.debugAssert(buffer.items.len == (bun.simdutf.length.utf8.from.latin1(input) + prepend_replacement_len));
+        }
+
+        return JSC.JSUint8Array.fromBytes(globalObject, buffer.items);
     }
 
     fn encodeUTF16(this: *TextEncoderStreamEncoder, globalObject: *JSGlobalObject, input: []const u16) JSValue {
@@ -526,107 +552,43 @@ pub const TextEncoderStreamEncoder = struct {
             break :prepend null;
         };
 
-        // TODO: use ExternalArrayBuffer and skip validation pass
-        const validate_result = bun.simdutf.validate.with_errors.utf16le(remain);
-        if (validate_result.status == .success) {
-            const len = bun.simdutf.length.utf8.from.utf16.le(remain);
-            if (len == 0) return .undefined;
-
-            const array_value, var bytes = ArrayBuffer.allocBuffer(globalObject, len + if (prepend) |pre| pre.len else 0);
-            if (array_value.isEmpty()) {
-                return .zero;
-            }
-
-            if (prepend) |pre| {
-                @memcpy(bytes[0..pre.len], pre.bytes[0..pre.len]);
-                bytes = bytes[pre.len..];
-            }
-
-            const convert_result = bun.simdutf.convert.utf16.to.utf8.with_errors.le(remain, bytes);
-            bun.debugAssert(convert_result.status == .success);
-
-            return array_value;
-        }
+        const length = bun.simdutf.length.utf8.from.utf16.le(remain);
 
         var buf = std.ArrayList(u8).initCapacity(
             bun.default_allocator,
-            validate_result.count + if (prepend) |pre| pre.len else 0,
-        ) catch bun.outOfMemory();
-        defer buf.deinit();
+            length + @as(usize, if (prepend) |pre| pre.len else 0),
+        ) catch {
+            globalObject.throwOutOfMemory();
+            return .zero;
+        };
 
-        if (prepend) |pre| {
+        if (prepend) |*pre| {
             buf.appendSliceAssumeCapacity(pre.bytes[0..pre.len]);
         }
 
-        var lead_surrogate: ?u16 = null;
+        const result = bun.simdutf.convert.utf16.to.utf8.with_errors.le(remain, buf.unusedCapacitySlice());
 
-        while (strings.firstNonASCII16([]const u16, remain)) |non_ascii| {
-            const token = remain[non_ascii];
-            const ascii_slice = remain[0..non_ascii];
-            remain = remain[non_ascii + 1 ..];
+        switch (result.status) {
+            else => {
+                // Slow path: there was invalid UTF-16, so we need to convert it without simdutf.
+                const lead_surrogate = strings.toUTF8ListWithTypeBun(&buf, []const u16, remain, true) catch {
+                    buf.deinit();
+                    globalObject.throwOutOfMemory();
+                    return .zero;
+                };
 
-            if (lead_surrogate) |lead| {
-                lead_surrogate = null;
-
-                if (ascii_slice.len != 0) {
-                    // - +3 for replacement character
-                    // - it's ascii, length will be the same, just need to convert u16 -> u8
-                    buf.ensureUnusedCapacity(ascii_slice.len + 3) catch bun.outOfMemory();
-                    buf.appendSlice(&.{ 0xef, 0xbf, 0xbd }) catch bun.outOfMemory();
-                    strings.convertUTF16ToUTF8Append(&buf, ascii_slice) catch bun.outOfMemory();
-
-                    continue;
-                }
-
-                if (strings.u16IsTrail(token)) {
-                    const converted = strings.utf16CodepointWithFFFD([]const u16, &.{ lead, token });
-                    bun.debugAssert(!converted.fail);
-
-                    const sequence = strings.wtf8Sequence(converted.code_point);
-
-                    buf.appendSlice(sequence[0..converted.utf8Width()]) catch bun.outOfMemory();
-                    continue;
-                }
-
-                buf.appendSlice(&.{ 0xef, 0xbf, 0xbd }) catch bun.outOfMemory();
-            }
-
-            if (strings.u16IsLead(token)) {
-                if (remain.len == 0) {
-                    this.pending_lead_surrogate = token;
+                if (lead_surrogate) |pending_lead| {
+                    this.pending_lead_surrogate = pending_lead;
                     if (buf.items.len == 0) return .undefined;
-                    return ArrayBuffer.createBuffer(globalObject, buf.items);
                 }
 
-                lead_surrogate = token;
-                continue;
-            }
-
-            bun.debugAssert(strings.u16IsTrail(token));
-
-            buf.appendSlice(&.{ 0xef, 0xbf, 0xbd }) catch bun.outOfMemory();
+                return JSC.JSUint8Array.fromBytes(globalObject, buf.items);
+            },
+            .success => {
+                buf.items.len += result.count;
+                return JSC.JSUint8Array.fromBytes(globalObject, buf.items);
+            },
         }
-
-        if (lead_surrogate != null and remain.len == 0) {
-            this.pending_lead_surrogate = lead_surrogate;
-            if (buf.items.len == 0) return .undefined;
-            return ArrayBuffer.createBuffer(globalObject, buf.items);
-        }
-
-        const array_value, var bytes = ArrayBuffer.allocBuffer(globalObject, buf.items.len + remain.len + @as(usize, if (lead_surrogate != null) 3 else 0));
-        if (array_value.isEmpty()) return .zero;
-
-        @memcpy(bytes[0..buf.items.len], buf.items);
-        bytes = bytes[buf.items.len..];
-
-        if (lead_surrogate != null) {
-            @memcpy(bytes[0..3], &[3]u8{ 0xef, 0xbf, 0xbd });
-            bytes = bytes[3..];
-        }
-
-        _ = strings.convertUTF16toUTF8InBuffer(bytes, remain) catch unreachable;
-
-        return array_value;
     }
 
     pub fn flush(this: *TextEncoderStreamEncoder, globalObject: *JSGlobalObject, _: *JSC.CallFrame) JSValue {
diff --git a/src/bun.zig b/src/bun.zig
index 796448d794..9e4d64daf9 100644
--- a/src/bun.zig
+++ b/src/bun.zig
@@ -3330,7 +3330,7 @@ noinline fn assertionFailureWithLocation(src: std.builtin.SourceLocation) noretu
     });
 }
 
-pub inline fn debugAssert(cheap_value_only_plz: bool) void {
+pub fn debugAssert(cheap_value_only_plz: bool) callconv(callconv_inline) void {
     if (comptime !Environment.isDebug) {
         return;
     }
diff --git a/src/string_builder.zig b/src/string_builder.zig
index 4dd4be99df..e5e3d0bd47 100644
--- a/src/string_builder.zig
+++ b/src/string_builder.zig
@@ -67,7 +67,7 @@ pub fn append16(this: *StringBuilder, slice: []const u16, fallback_allocator: st
         return buf[0..result.count :0];
     } else {
         var list = std.ArrayList(u8).init(fallback_allocator);
-        var out = bun.strings.toUTF8ListWithTypeBun(&list, []const u16, slice) catch return null;
+        var out = bun.strings.toUTF8ListWithTypeBun(&list, []const u16, slice, false) catch return null;
         out.append(0) catch return null;
         return list.items[0 .. list.items.len - 1 :0];
     }
diff --git a/src/string_immutable.zig b/src/string_immutable.zig
index ba6211d3cd..de502e4655 100644
--- a/src/string_immutable.zig
+++ b/src/string_immutable.zig
@@ -1614,13 +1614,18 @@ pub fn toUTF16AllocNoTrim(allocator: std.mem.Allocator, bytes: []const u8, compt
 }
 
 pub fn utf16CodepointWithFFFD(comptime Type: type, input: Type) UTF16Replacement {
-    const c0 = @as(u21, input[0]);
+    return utf16CodepointWithFFFDAndFirstInputChar(Type, input[0], input);
+}
+
+fn utf16CodepointWithFFFDAndFirstInputChar(comptime Type: type, char: std.meta.Elem(Type), input: Type) UTF16Replacement {
+    const c0 = @as(u21, char);
 
     if (c0 & ~@as(u21, 0x03ff) == 0xd800) {
         // surrogate pair
         if (input.len == 1)
             return .{
                 .len = 1,
+                .is_lead = true,
             };
         //error.DanglingSurrogateHalf;
         const c1 = @as(u21, input[1]);
@@ -1634,6 +1639,7 @@ pub fn utf16CodepointWithFFFD(comptime Type: type, input: Type) UTF16Replacement
                     .fail = true,
                     .len = 1,
                     .code_point = unicode_replacement,
+                    .is_lead = true,
                 };
             };
         // return error.ExpectedSecondSurrogateHalf;
@@ -1862,7 +1868,7 @@ pub fn convertUTF16ToUTF8(list_: std.ArrayList(u8), comptime Type: type, utf16:
     );
     if (result.status == .surrogate) {
         // Slow path: there was invalid UTF-16, so we need to convert it without simdutf.
-        return toUTF8ListWithTypeBun(&list, Type, utf16);
+        return toUTF8ListWithTypeBun(&list, Type, utf16, false);
     }
 
     list.items.len = result.count;
@@ -1877,7 +1883,7 @@ pub fn convertUTF16ToUTF8Append(list: *std.ArrayList(u8), utf16: []const u16) !v
 
     if (result.status == .surrogate) {
         // Slow path: there was invalid UTF-16, so we need to convert it without simdutf.
-        _ = try toUTF8ListWithTypeBun(list, []const u16, utf16);
+        _ = try toUTF8ListWithTypeBun(list, []const u16, utf16, false);
         return;
     }
 
@@ -1951,14 +1957,15 @@ pub fn toUTF8FromLatin1Z(allocator: std.mem.Allocator, latin1: []const u8) !?std
     return list1;
 }
 
-pub fn toUTF8ListWithTypeBun(list: *std.ArrayList(u8), comptime Type: type, utf16: Type) !std.ArrayList(u8) {
+pub fn toUTF8ListWithTypeBun(list: *std.ArrayList(u8), comptime Type: type, utf16: Type, comptime skip_trailing_replacement: bool) !(if (skip_trailing_replacement) ?u16 else std.ArrayList(u8)) {
     var utf16_remaining = utf16;
 
     while (firstNonASCII16(Type, utf16_remaining)) |i| {
         const to_copy = utf16_remaining[0..i];
         utf16_remaining = utf16_remaining[i..];
+        const token = utf16_remaining[0];
 
-        const replacement = utf16CodepointWithFFFD(Type, utf16_remaining);
+        const replacement = utf16CodepointWithFFFDAndFirstInputChar(Type, token, utf16_remaining);
         utf16_remaining = utf16_remaining[replacement.len..];
 
         const count: usize = replacement.utf8Width();
@@ -1975,8 +1982,13 @@ pub fn toUTF8ListWithTypeBun(list: *std.ArrayList(u8), comptime Type: type, utf1
             to_copy,
         );
 
-        list.items.len += count;
+        if (comptime skip_trailing_replacement) {
+            if (replacement.is_lead and utf16_remaining.len == 0) {
+                return token;
+            }
+        }
 
+        list.items.len += count;
         _ = encodeWTF8RuneT(
             list.items.ptr[list.items.len - count .. list.items.len - count + 4][0..4],
             u32,
@@ -1993,6 +2005,9 @@ pub fn toUTF8ListWithTypeBun(list: *std.ArrayList(u8), comptime Type: type, utf1
 
     log("UTF16 {d} -> {d} UTF8", .{ utf16.len, list.items.len });
 
+    if (comptime skip_trailing_replacement) {
+        return null;
+    }
     return list.*;
 }
 
@@ -2140,6 +2155,7 @@ pub const UTF16Replacement = struct {
     fail: bool = false,
 
     can_buffer: bool = true,
+    is_lead: bool = false,
 
     pub inline fn utf8Width(replacement: UTF16Replacement) u3 {
         return switch (replacement.code_point) {