From 7e906c1cae0181dbf6c1ec293df63b6fea841cf4 Mon Sep 17 00:00:00 2001
From: Jarred Sumner <jarred@jarredsumner.com>
Date: Thu, 22 Feb 2024 20:04:50 -0800
Subject: [PATCH] Remove ignore min branch (#9061)

Co-authored-by: Jarred Sumner <709451+Jarred-Sumner@users.noreply.github.com>
---
 src/bun.js/webcore/encoding.zig |  2 +-
 src/string.zig                  |  2 +-
 src/string_immutable.zig        | 72 ++++++++++++---------------------
 3 files changed, 28 insertions(+), 48 deletions(-)

diff --git a/src/bun.js/webcore/encoding.zig b/src/bun.js/webcore/encoding.zig
index e9954fc154..a7415794ca 100644
--- a/src/bun.js/webcore/encoding.zig
+++ b/src/bun.js/webcore/encoding.zig
@@ -527,7 +527,7 @@ pub const TextDecoder = struct {
         while (remainder.len > 0) {
             switch (remainder[0]) {
                 0...127 => {
-                    const count: usize = if (strings.firstNonASCII16IgnoreMin(Slice, remainder)) |index| index + 1 else remainder.len;
+                    const count: usize = if (strings.firstNonASCII16(Slice, remainder)) |index| index + 1 else remainder.len;
 
                     buffer.ensureUnusedCapacity(allocator, count) catch unreachable;
 
diff --git a/src/string.zig b/src/string.zig
index 5645dad8d2..8795f788c7 100644
--- a/src/string.zig
+++ b/src/string.zig
@@ -370,7 +370,7 @@ pub const String = extern struct {
 
     pub fn createUTF16(bytes: []const u16) String {
         if (bytes.len == 0) return String.empty;
-        if (bun.strings.firstNonASCII16IgnoreMin([]const u16, bytes) == null) {
+        if (bun.strings.firstNonASCII16([]const u16, bytes) == null) {
             return BunString__fromUTF16ToLatin1(bytes.ptr, bytes.len);
         }
         return BunString__fromUTF16(bytes.ptr, bytes.len);
diff --git a/src/string_immutable.zig b/src/string_immutable.zig
index 4311101fd6..4170cf8163 100644
--- a/src/string_immutable.zig
+++ b/src/string_immutable.zig
@@ -4205,14 +4205,6 @@ pub fn trimLeadingChar(slice: []const u8, char: u8) []const u8 {
     return "";
 }
 
-pub fn firstNonASCII16(comptime Slice: type, slice: Slice) ?u32 {
-    return firstNonASCII16CheckMin(Slice, slice, true);
-}
-
-pub fn firstNonASCII16IgnoreMin(comptime Slice: type, slice: Slice) ?u32 {
-    return firstNonASCII16CheckMin(Slice, slice, false);
-}
-
 /// Get the line number and the byte offsets of `line_range_count` above the desired line number
 /// The final element is the end index of the desired line
 const LineRange = struct {
@@ -4354,7 +4346,7 @@ pub fn getLinesInText(text: []const u8, line: u32, comptime line_range_count: us
     return results;
 }
 
-pub fn firstNonASCII16CheckMin(comptime Slice: type, slice: Slice, comptime check_min: bool) ?u32 {
+pub fn firstNonASCII16(comptime Slice: type, slice: Slice) ?u32 {
     var remaining = slice;
     const remaining_start = remaining.ptr;
 
@@ -4365,56 +4357,44 @@ pub fn firstNonASCII16CheckMin(comptime Slice: type, slice: Slice, comptime chec
                 const vec: AsciiU16Vector = remaining[0..ascii_u16_vector_size].*;
                 const max_value = @reduce(.Max, vec);
 
-                if (comptime check_min) {
-                    // by using @reduce here, we make it only do one comparison
-                    // @reduce doesn't tell us the index though
-                    const min_value = @reduce(.Min, vec);
-                    if (min_value < 0x20 or max_value > 127) {
-                        remaining.len -= (@intFromPtr(remaining.ptr) - @intFromPtr(remaining_start)) / 2;
+                if (max_value > 127) {
+                    const cmp = vec > max_u16_ascii;
+                    const bitmask: u8 = @as(u8, @bitCast(cmp));
+                    const index_of_first_nonascii_in_vector = @ctz(bitmask);
 
-                        // this is really slow
-                        // it does it element-wise for every single u8 on the vector
-                        // instead of doing the SIMD instructions
-                        // it removes a loop, but probably is slower in the end
-                        const cmp = @as(AsciiVectorU16U1, @bitCast(vec > max_u16_ascii)) | @as(AsciiVectorU16U1, @bitCast(vec < min_u16_ascii));
-                        const bitmask: u8 = @as(u8, @bitCast(cmp));
-                        const first = @ctz(bitmask);
+                    const offset_of_vector_in_input = (@intFromPtr(remaining.ptr) - @intFromPtr(remaining_start)) / 2;
+                    const out: u32 = @intCast(offset_of_vector_in_input + index_of_first_nonascii_in_vector);
 
-                        return @as(u32, @intCast(@as(u32, first) + @as(u32, @intCast(slice.len - remaining.len))));
+                    if (comptime Environment.isDebug) {
+                        for (0..index_of_first_nonascii_in_vector) |i| {
+                            if (vec[i] > 127) {
+                                bun.Output.panic("firstNonASCII16: found non-ASCII character in ASCII vector before the first non-ASCII character", .{});
+                            }
+                        }
+
+                        if (remaining[out] <= 127) {
+                            bun.Output.panic("firstNonASCII16: Expected non-ascii character", .{});
+                        }
                     }
-                } else {
-                    if (max_value > 127) {
-                        const cmp = vec > max_u16_ascii;
-                        const bitmask: u8 = @as(u8, @bitCast(cmp));
-                        const index_of_first_nonascii_in_vector = @ctz(bitmask);
 
-                        const offset_of_vector_in_input = (@intFromPtr(remaining.ptr) - @intFromPtr(remaining_start)) / 2;
-
-                        return @intCast(offset_of_vector_in_input + index_of_first_nonascii_in_vector);
-                    }
+                    return out;
                 }
 
                 remaining.ptr += ascii_u16_vector_size;
             }
             remaining.len -= (@intFromPtr(remaining.ptr) - @intFromPtr(remaining_start)) / 2;
         }
+
+        std.debug.assert(remaining.len < ascii_u16_vector_size);
     }
 
     var i: usize = (@intFromPtr(remaining.ptr) - @intFromPtr(remaining_start)) / 2;
-    if (comptime check_min) {
-        for (remaining) |char| {
-            if (char > 127 or char < 0x20) {
-                return @as(u32, @truncate(i));
-            }
-            i += 1;
-        }
-    } else {
-        for (remaining) |char| {
-            if (char > 127) {
-                return @as(u32, @truncate(i));
-            }
-            i += 1;
+
+    for (remaining) |char| {
+        if (char > 127) {
+            return @truncate(i);
         }
+        i += 1;
     }
 
     return null;
@@ -6155,7 +6135,7 @@ pub const visible = struct {
 
         while (true) {
             {
-                const idx = firstNonASCII16IgnoreMin([]const u16, input) orelse input.len;
+                const idx = firstNonASCII16([]const u16, input) orelse input.len;
                 for (0..idx) |j| {
                     const cp = input[j];
                     defer prev = cp;