fix writing UTF-16 with a trailing unpaired surrogate to process.stdout/stderr (#23444)

### What does this PR do? Fixes `bun -p "process.stderr.write('Hello' + String.fromCharCode(0xd800))"`. Also fixes potential index out of bounds if there are many invalid sequences. This also affects `TextEncoder`. ### How did you verify your code works? Added tests for edgecases --------- Co-authored-by: Jarred Sumner <jarred@jarredsumner.com>
2026-02-12 03:48:56 +00:00 · 2025-10-10 03:48:04 -07:00
parent 8826b4f5f5
commit 312a86fd43
30 changed files with 765 additions and 253 deletions
--- a/src/string/immutable/unicode.zig
+++ b/src/string/immutable/unicode.zig
@@ -258,26 +258,26 @@ pub fn codepointSize(comptime R: type, r: R) u3_fast {
    };
 }

-pub fn convertUTF16ToUTF8(list_: std.ArrayList(u8), comptime Type: type, utf16: Type) OOM!std.ArrayList(u8) {
+pub fn convertUTF16ToUTF8(list_: std.ArrayList(u8), utf16: []const u16) OOM!std.ArrayList(u8) {
    var list = list_;
    const result = bun.simdutf.convert.utf16.to.utf8.with_errors.le(
        utf16,
-        list.items.ptr[0..list.capacity],
+        list.allocatedSlice(),
    );
    if (result.status == .surrogate) {
        // Slow path: there was invalid UTF-16, so we need to convert it without simdutf.
-        return toUTF8ListWithTypeBun(&list, Type, utf16, false);
+        return toUTF8ListWithTypeBun(&list, utf16, false);
    }

    list.items.len = result.count;
    return list;
 }

-pub fn convertUTF16ToUTF8WithoutInvalidSurrogatePairs(list_: std.ArrayList(u8), comptime Type: type, utf16: Type) !std.ArrayList(u8) {
+pub fn convertUTF16ToUTF8WithoutInvalidSurrogatePairs(list_: std.ArrayList(u8), utf16: []const u16) error{SurrogatePair}!std.ArrayList(u8) {
    var list = list_;
    const result = bun.simdutf.convert.utf16.to.utf8.with_errors.le(
        utf16,
-        list.items.ptr[0..list.capacity],
+        list.allocatedSlice(),
    );
    if (result.status == .surrogate) {
        return error.SurrogatePair;
@@ -287,55 +287,55 @@ pub fn convertUTF16ToUTF8WithoutInvalidSurrogatePairs(list_: std.ArrayList(u8),
    return list;
 }

-pub fn convertUTF16ToUTF8Append(list: *std.ArrayList(u8), utf16: []const u16) !void {
+pub fn convertUTF16ToUTF8Append(list: *std.ArrayList(u8), utf16: []const u16) OOM!void {
    const result = bun.simdutf.convert.utf16.to.utf8.with_errors.le(
        utf16,
-        list.items.ptr[list.items.len..list.capacity],
+        list.unusedCapacitySlice(),
    );

    if (result.status == .surrogate) {
        // Slow path: there was invalid UTF-16, so we need to convert it without simdutf.
-        _ = try toUTF8ListWithTypeBun(list, []const u16, utf16, false);
+        _ = try toUTF8ListWithTypeBun(list, utf16, false);
        return;
    }

    list.items.len += result.count;
 }

-pub fn toUTF8AllocWithTypeWithoutInvalidSurrogatePairs(allocator: std.mem.Allocator, comptime Type: type, utf16: Type) ![]u8 {
-    if (bun.FeatureFlags.use_simdutf and comptime Type == []const u16) {
+pub fn toUTF8AllocWithTypeWithoutInvalidSurrogatePairs(allocator: std.mem.Allocator, utf16: []const u16) OOM![]u8 {
+    if (bun.FeatureFlags.use_simdutf) {
        const length = bun.simdutf.length.utf8.from.utf16.le(utf16);
        // add 16 bytes of padding for SIMDUTF
        var list = try std.ArrayList(u8).initCapacity(allocator, length + 16);
-        list = try convertUTF16ToUTF8(list, Type, utf16);
+        list = try convertUTF16ToUTF8(list, utf16);
        return list.items;
    }

    var list = try std.ArrayList(u8).initCapacity(allocator, utf16.len);
-    list = try toUTF8ListWithType(list, Type, utf16);
+    list = try toUTF8ListWithType(list, utf16);
    return list.items;
 }

-pub fn toUTF8AllocWithType(allocator: std.mem.Allocator, comptime Type: type, utf16: Type) OOM![]u8 {
-    if (bun.FeatureFlags.use_simdutf and comptime Type == []const u16) {
+pub fn toUTF8AllocWithType(allocator: std.mem.Allocator, utf16: []const u16) OOM![]u8 {
+    if (bun.FeatureFlags.use_simdutf) {
        const length = bun.simdutf.length.utf8.from.utf16.le(utf16);
        // add 16 bytes of padding for SIMDUTF
        var list = try std.ArrayList(u8).initCapacity(allocator, length + 16);
-        list = try convertUTF16ToUTF8(list, Type, utf16);
+        list = try convertUTF16ToUTF8(list, utf16);
        return list.items;
    }

    var list = try std.ArrayList(u8).initCapacity(allocator, utf16.len);
-    list = try toUTF8ListWithType(list, Type, utf16);
+    list = try toUTF8ListWithType(list, utf16);
    return list.items;
 }

-pub fn toUTF8ListWithType(list_: std.ArrayList(u8), comptime Type: type, utf16: Type) OOM!std.ArrayList(u8) {
-    if (bun.FeatureFlags.use_simdutf and comptime Type == []const u16) {
+pub fn toUTF8ListWithType(list_: std.ArrayList(u8), utf16: []const u16) OOM!std.ArrayList(u8) {
+    if (bun.FeatureFlags.use_simdutf) {
        var list = list_;
        const length = bun.simdutf.length.utf8.from.utf16.le(utf16);
        try list.ensureTotalCapacityPrecise(length + 16);
-        const buf = try convertUTF16ToUTF8(list, Type, utf16);
+        const buf = try convertUTF16ToUTF8(list, utf16);

        // Commenting out because `convertUTF16ToUTF8` may convert to WTF-8
        // which uses 3 bytes for invalid surrogates, causing the length to not
@@ -364,7 +364,7 @@ pub fn toUTF8FromLatin1(allocator: std.mem.Allocator, latin1: []const u8) !?std.
        return null;

    const list = try std.ArrayList(u8).initCapacity(allocator, latin1.len);
-    return try allocateLatin1IntoUTF8WithList(list, 0, []const u8, latin1);
+    return try allocateLatin1IntoUTF8WithList(list, 0, latin1);
 }

 pub fn toUTF8FromLatin1Z(allocator: std.mem.Allocator, latin1: []const u8) !?std.ArrayList(u8) {
@@ -372,20 +372,20 @@ pub fn toUTF8FromLatin1Z(allocator: std.mem.Allocator, latin1: []const u8) !?std
        return null;

    const list = try std.ArrayList(u8).initCapacity(allocator, latin1.len + 1);
-    var list1 = try allocateLatin1IntoUTF8WithList(list, 0, []const u8, latin1);
+    var list1 = try allocateLatin1IntoUTF8WithList(list, 0, latin1);
    try list1.append(0);
    return list1;
 }

-pub fn toUTF8ListWithTypeBun(list: *std.ArrayList(u8), comptime Type: type, utf16: Type, comptime skip_trailing_replacement: bool) OOM!(if (skip_trailing_replacement) ?u16 else std.ArrayList(u8)) {
+pub fn toUTF8ListWithTypeBun(list: *std.ArrayList(u8), utf16: []const u16, comptime skip_trailing_replacement: bool) OOM!(if (skip_trailing_replacement) ?u16 else std.ArrayList(u8)) {
    var utf16_remaining = utf16;

-    while (firstNonASCII16(Type, utf16_remaining)) |i| {
+    while (firstNonASCII16(utf16_remaining)) |i| {
        const to_copy = utf16_remaining[0..i];
        utf16_remaining = utf16_remaining[i..];
        const token = utf16_remaining[0];

-        const replacement = utf16CodepointWithFFFDAndFirstInputChar(Type, token, utf16_remaining);
+        const replacement = utf16CodepointWithFFFDAndFirstInputChar(token, utf16_remaining);
        utf16_remaining = utf16_remaining[replacement.len..];

        const count: usize = replacement.utf8Width();
@@ -433,7 +433,7 @@ pub const EncodeIntoResult = struct {
    /// The number of u8s we wrote to the utf-8 buffer
    written: u32 = 0,
 };
-pub fn allocateLatin1IntoUTF8(allocator: std.mem.Allocator, comptime Type: type, latin1_: Type) ![]u8 {
+pub fn allocateLatin1IntoUTF8(allocator: std.mem.Allocator, latin1_: []const u8) ![]u8 {
    if (comptime bun.FeatureFlags.latin1_is_now_ascii) {
        var out = try allocator.alloc(u8, latin1_.len);
        @memcpy(out[0..latin1_.len], latin1_);
@@ -441,11 +441,11 @@ pub fn allocateLatin1IntoUTF8(allocator: std.mem.Allocator, comptime Type: type,
    }

    const list = try std.ArrayList(u8).initCapacity(allocator, latin1_.len);
-    var foo = try allocateLatin1IntoUTF8WithList(list, 0, Type, latin1_);
+    var foo = try allocateLatin1IntoUTF8WithList(list, 0, latin1_);
    return try foo.toOwnedSlice();
 }

-pub fn allocateLatin1IntoUTF8WithList(list_: std.ArrayList(u8), offset_into_list: usize, comptime Type: type, latin1_: Type) OOM!std.ArrayList(u8) {
+pub fn allocateLatin1IntoUTF8WithList(list_: std.ArrayList(u8), offset_into_list: usize, latin1_: []const u8) OOM!std.ArrayList(u8) {
    var latin1 = latin1_;
    var i: usize = offset_into_list;
    var list = list_;
@@ -688,11 +688,11 @@ pub fn convertUTF8BytesIntoUTF16(bytes: []const u8) UTF16Replacement {
    return convertUTF8BytesIntoUTF16WithLength(&sequence, sequence_length, bytes.len);
 }

-pub fn copyLatin1IntoUTF8(buf_: []u8, comptime Type: type, latin1_: Type) EncodeIntoResult {
-    return copyLatin1IntoUTF8StopOnNonASCII(buf_, Type, latin1_, false);
+pub fn copyLatin1IntoUTF8(buf_: []u8, latin1_: []const u8) EncodeIntoResult {
+    return copyLatin1IntoUTF8StopOnNonASCII(buf_, latin1_, false);
 }

-pub fn copyLatin1IntoUTF8StopOnNonASCII(buf_: []u8, comptime Type: type, latin1_: Type, comptime stop: bool) EncodeIntoResult {
+pub fn copyLatin1IntoUTF8StopOnNonASCII(buf_: []u8, latin1_: []const u8, comptime stop: bool) EncodeIntoResult {
    if (comptime bun.FeatureFlags.latin1_is_now_ascii) {
        const to_copy = @as(u32, @truncate(@min(buf_.len, latin1_.len)));
        @memcpy(buf_[0..to_copy], latin1_[0..to_copy]);
@@ -839,16 +839,12 @@ pub fn elementLengthLatin1IntoUTF8(slice: []const u8) usize {
    return bun.simdutf.length.utf8.from.latin1(slice);
 }

-pub fn copyCP1252IntoUTF16(comptime Buffer: type, buf_: Buffer, comptime Type: type, latin1_: Type) EncodeIntoResult {
+pub fn copyCP1252IntoUTF16(buf_: []u16, latin1_: []const u8) EncodeIntoResult {
    var buf = buf_;
    var latin1 = latin1_;
    while (buf.len > 0 and latin1.len > 0) {
        const to_write = strings.firstNonASCII(latin1) orelse @as(u32, @truncate(@min(latin1.len, buf.len)));
-        if (comptime std.meta.alignment(Buffer) != @alignOf(u16)) {
-            strings.copyU8IntoU16WithAlignment(std.meta.alignment(Buffer), buf, latin1[0..to_write]);
-        } else {
-            strings.copyU8IntoU16(buf, latin1[0..to_write]);
-        }
+        strings.copyU8IntoU16(buf, latin1[0..to_write]);

        latin1 = latin1[to_write..];
        buf = buf[to_write..];
@@ -865,13 +861,13 @@ pub fn copyCP1252IntoUTF16(comptime Buffer: type, buf_: Buffer, comptime Type: t
    };
 }

-pub fn copyLatin1IntoUTF16(comptime Buffer: type, buf_: Buffer, comptime Type: type, latin1_: Type) EncodeIntoResult {
+pub fn copyLatin1IntoUTF16(comptime Buffer: type, buf_: Buffer, latin1_: []const u8) EncodeIntoResult {
    const len = @min(buf_.len, latin1_.len);
    for (buf_[0..len], latin1_[0..len]) |*out, in| out.* = in;
    return .{ .read = @as(u32, @truncate(len)), .written = @as(u32, @truncate(len)) };
 }

-pub fn elementLengthCP1252IntoUTF16(comptime Type: type, cp1252_: Type) usize {
+pub fn elementLengthCP1252IntoUTF16(cp1252_: []const u8) usize {
    // cp1252 is always at most 1 UTF-16 code unit long
    return cp1252_.len;
 }
@@ -885,7 +881,7 @@ pub fn eqlUtf16(comptime self: string, other: []const u16) bool {
 }

 pub fn toUTF8Alloc(allocator: std.mem.Allocator, js: []const u16) OOM![]u8 {
-    return try toUTF8AllocWithType(allocator, []const u16, js);
+    return try toUTF8AllocWithType(allocator, js);
 }

 pub fn toUTF8AllocZ(allocator: std.mem.Allocator, js: []const u16) OOM![:0]u8 {
@@ -924,55 +920,6 @@ pub fn copyU8IntoU16(output_: []u16, input_: []const u8) callconv(bun.callconv_i
    }
 }

-pub fn copyU8IntoU16WithAlignment(comptime alignment: u21, output_: []align(alignment) u16, input_: []const u8) void {
-    var output = output_;
-    var input = input_;
-    const word = @sizeOf(usize) / 2;
-    if (comptime Environment.allow_assert) assert(input.len <= output.len);
-
-    // un-aligned data access is slow
-    // so we attempt to align the data
-    while (!std.mem.isAligned(@intFromPtr(output.ptr), @alignOf(u16)) and input.len >= word) {
-        output[0] = input[0];
-        output = output[1..];
-        input = input[1..];
-    }
-
-    if (std.mem.isAligned(@intFromPtr(output.ptr), @alignOf(u16)) and input.len > 0) {
-        copyU8IntoU16(@as([*]u16, @alignCast(output.ptr))[0..output.len], input);
-        return;
-    }
-
-    for (input, 0..) |c, i| {
-        output[i] = c;
-    }
-}
-
-// pub fn copy(output_: []u8, input_: []const u8) callconv(bun.callconv_inline) void {
-//     var output = output_;
-//     var input = input_;
-//     if (comptime Environment.allow_assert) assert(input.len <= output.len);
-
-//     if (input.len > @sizeOf(usize) * 4) {
-//         comptime var i: usize = 0;
-//         inline while (i < 4) : (i += 1) {
-//             appendUTF8MachineWord(output[i * @sizeOf(usize) ..][0..@sizeOf(usize)], input[i * @sizeOf(usize) ..][0..@sizeOf(usize)]);
-//         }
-//         output = output[4 * @sizeOf(usize) ..];
-//         input = input[4 * @sizeOf(usize) ..];
-//     }
-
-//     while (input.len >= @sizeOf(usize)) {
-//         appendUTF8MachineWord(output[0..@sizeOf(usize)], input[0..@sizeOf(usize)]);
-//         output = output[@sizeOf(usize)..];
-//         input = input[@sizeOf(usize)..];
-//     }
-
-//     for (input) |c, i| {
-//         output[i] = c;
-//     }
-// }
-
 pub inline fn copyU16IntoU8(output: []u8, input: []align(1) const u16) void {
    if (comptime Environment.allow_assert) assert(input.len <= output.len);
    const count = @min(input.len, output.len);
@@ -1372,11 +1319,11 @@ pub fn toUTF16AllocMaybeBuffered(
    return .{ output.items, .{0} ** 3, 0 };
 }

-pub fn utf16CodepointWithFFFD(comptime Type: type, input: Type) UTF16Replacement {
-    return utf16CodepointWithFFFDAndFirstInputChar(Type, input[0], input);
+pub fn utf16CodepointWithFFFD(input: []const u16) UTF16Replacement {
+    return utf16CodepointWithFFFDAndFirstInputChar(input[0], input);
 }

-fn utf16CodepointWithFFFDAndFirstInputChar(comptime Type: type, char: std.meta.Elem(Type), input: Type) UTF16Replacement {
+fn utf16CodepointWithFFFDAndFirstInputChar(char: u16, input: []const u16) UTF16Replacement {
    const c0 = @as(u21, char);

    if (c0 & ~@as(u21, 0x03ff) == 0xd800) {
@@ -1412,7 +1359,7 @@ fn utf16CodepointWithFFFDAndFirstInputChar(comptime Type: type, char: std.meta.E
    }
 }

-pub fn utf16Codepoint(comptime Type: type, input: Type) UTF16Replacement {
+pub fn utf16Codepoint(input: []const u16) UTF16Replacement {
    const c0 = @as(u21, input[0]);

    if (c0 & ~@as(u21, 0x03ff) == 0xd800) {
@@ -1686,34 +1633,28 @@ pub fn cp1252ToCodepointBytesAssumeNotASCII16(char: u32) u16 {
 /// Copy a UTF-16 string as UTF-8 into `buf`
 ///
 /// This may not encode everything if `buf` is not big enough.
-pub fn copyUTF16IntoUTF8(buf: []u8, comptime Type: type, utf16: Type) EncodeIntoResult {
-    return copyUTF16IntoUTF8Impl(buf, Type, utf16, false);
+pub fn copyUTF16IntoUTF8(buf: []u8, utf16: []const u16) EncodeIntoResult {
+    return copyUTF16IntoUTF8Impl(buf, utf16, false);
 }

 /// See comment on `copyUTF16IntoUTF8WithBufferImpl` on what `allow_truncated_utf8_sequence` should do
-pub fn copyUTF16IntoUTF8Impl(buf: []u8, comptime Type: type, utf16: Type, comptime allow_truncated_utf8_sequence: bool) EncodeIntoResult {
-    if (comptime Type == []const u16) {
-        if (bun.FeatureFlags.use_simdutf) {
-            if (utf16.len == 0)
-                return .{ .read = 0, .written = 0 };
-            const trimmed = bun.simdutf.trim.utf16(utf16);
-            if (trimmed.len == 0)
-                return .{ .read = 0, .written = 0 };
+pub fn copyUTF16IntoUTF8Impl(buf: []u8, utf16: []const u16, comptime allow_truncated_utf8_sequence: bool) EncodeIntoResult {
+    if (bun.FeatureFlags.use_simdutf) {
+        if (utf16.len == 0)
+            return .{ .read = 0, .written = 0 };
+        const trimmed = bun.simdutf.trim.utf16(utf16);
+        if (trimmed.len == 0)
+            return .{ .read = 0, .written = 0 };

-            const out_len = if (buf.len <= (trimmed.len * 3 + 2))
-                bun.simdutf.length.utf8.from.utf16.le(trimmed)
-            else
-                buf.len;
+        const out_len = if (buf.len <= (trimmed.len * 3 + 2))
+            bun.simdutf.length.utf8.from.utf16.le(trimmed)
+        else
+            buf.len;

-            return copyUTF16IntoUTF8WithBufferImpl(buf, Type, utf16, trimmed, out_len, allow_truncated_utf8_sequence);
-        }
+        return copyUTF16IntoUTF8WithBufferImpl(buf, utf16, out_len, allow_truncated_utf8_sequence);
    }

-    return copyUTF16IntoUTF8WithBufferImpl(buf, Type, utf16, utf16, utf16.len, allow_truncated_utf8_sequence);
-}
-
-pub fn copyUTF16IntoUTF8WithBuffer(buf: []u8, comptime Type: type, utf16: Type, trimmed: Type, out_len: usize) EncodeIntoResult {
-    return copyUTF16IntoUTF8WithBufferImpl(buf, Type, utf16, trimmed, out_len, false);
+    return copyUTF16IntoUTF8WithBufferImpl(buf, utf16, utf16.len, allow_truncated_utf8_sequence);
 }

 /// Q: What does the `allow_truncated_utf8_sequence` parameter do?
@@ -1731,29 +1672,27 @@ pub fn copyUTF16IntoUTF8WithBuffer(buf: []u8, comptime Type: type, utf16: Type,
 /// buffer.fill("\u0222");
 /// expect(buffer[0]).toBe(0xc8);
 /// ```
-pub fn copyUTF16IntoUTF8WithBufferImpl(buf: []u8, comptime Type: type, utf16: Type, trimmed: Type, out_len: usize, comptime allow_truncated_utf8_sequence: bool) EncodeIntoResult {
+pub fn copyUTF16IntoUTF8WithBufferImpl(buf: []u8, utf16: []const u16, out_len: usize, comptime allow_truncated_utf8_sequence: bool) EncodeIntoResult {
    var remaining = buf;
    var utf16_remaining = utf16;
    var ended_on_non_ascii = false;

    brk: {
-        if (comptime Type == []const u16) {
-            if (bun.FeatureFlags.use_simdutf) {
-                log("UTF16 {d} -> UTF8 {d}", .{ utf16.len, out_len });
-                if (remaining.len >= out_len) {
-                    const result = bun.simdutf.convert.utf16.to.utf8.with_errors.le(trimmed, remaining);
-                    if (result.status == .surrogate) break :brk;
+        if (bun.FeatureFlags.use_simdutf) {
+            log("UTF16 {d} -> UTF8 {d}", .{ utf16.len, out_len });
+            if (remaining.len >= out_len) {
+                const result = bun.simdutf.convert.utf16.to.utf8.with_errors.le(utf16, remaining);
+                if (result.status == .surrogate) break :brk;

-                    return EncodeIntoResult{
-                        .read = @as(u32, @truncate(trimmed.len)),
-                        .written = @as(u32, @truncate(result.count)),
-                    };
-                }
+                return EncodeIntoResult{
+                    .read = @as(u32, @truncate(utf16.len)),
+                    .written = @as(u32, @truncate(result.count)),
+                };
            }
        }
    }

-    while (firstNonASCII16(Type, utf16_remaining)) |i| {
+    while (firstNonASCII16(utf16_remaining)) |i| {
        const end = @min(i, remaining.len);
        if (end > 0) copyU16IntoU8(remaining, utf16_remaining[0..end]);
        remaining = remaining[end..];
@@ -1762,7 +1701,7 @@ pub fn copyUTF16IntoUTF8WithBufferImpl(buf: []u8, comptime Type: type, utf16: Ty
        if (@min(utf16_remaining.len, remaining.len) == 0)
            break;

-        const replacement = utf16CodepointWithFFFD(Type, utf16_remaining);
+        const replacement = utf16CodepointWithFFFD(utf16_remaining);

        const width: usize = replacement.utf8Width();
        bun.assert(width > 1);
@@ -1836,7 +1775,7 @@ pub fn copyUTF16IntoUTF8WithBufferImpl(buf: []u8, comptime Type: type, utf16: Ty
    };
 }

-pub fn elementLengthUTF16IntoUTF8(comptime Type: type, utf16: Type) usize {
+pub fn elementLengthUTF16IntoUTF8(utf16: []const u16) usize {
    if (bun.FeatureFlags.use_simdutf) {
        return bun.simdutf.length.utf8.from.utf16.le(utf16);
    }
@@ -1844,12 +1783,12 @@ pub fn elementLengthUTF16IntoUTF8(comptime Type: type, utf16: Type) usize {
    var utf16_remaining = utf16;
    var count: usize = 0;

-    while (firstNonASCII16(Type, utf16_remaining)) |i| {
+    while (firstNonASCII16(utf16_remaining)) |i| {
        count += i;

        utf16_remaining = utf16_remaining[i..];

-        const replacement = utf16Codepoint(Type, utf16_remaining);
+        const replacement = utf16Codepoint(utf16_remaining);

        count += replacement.utf8Width();
        utf16_remaining = utf16_remaining[replacement.len..];
@@ -1858,7 +1797,7 @@ pub fn elementLengthUTF16IntoUTF8(comptime Type: type, utf16: Type) usize {
    return count + utf16_remaining.len;
 }

-pub fn elementLengthUTF8IntoUTF16(comptime Type: type, utf8: Type) usize {
+pub fn elementLengthUTF8IntoUTF16(utf8: []const u8) usize {
    var utf8_remaining = utf8;
    var count: usize = 0;

@@ -1871,7 +1810,7 @@ pub fn elementLengthUTF8IntoUTF16(comptime Type: type, utf8: Type) usize {

        utf8_remaining = utf8_remaining[i..];

-        const replacement = utf16Codepoint(Type, utf8_remaining);
+        const replacement = utf16Codepoint(utf8_remaining);

        count += replacement.len;
        utf8_remaining = utf8_remaining[@min(replacement.utf8Width(), utf8_remaining.len)..];