Improve Bun.stringWidth accuracy and robustness (#25447)

This PR significantly improves `Bun.stringWidth` to handle a wider variety of Unicode characters and escape sequences correctly. ## Zero-width character handling Added support for many previously unhandled zero-width characters: - Soft hyphen (U+00AD) - Word joiner and invisible operators (U+2060-U+2064) - Lone surrogates (U+D800-U+DFFF) - Arabic formatting characters (U+0600-U+0605, U+06DD, U+070F, U+08E2) - Indic script combining marks (Devanagari through Malayalam) - Thai and Lao combining marks - Combining Diacritical Marks Extended and Supplement - Tag characters (U+E0000-U+E007F) ## ANSI escape sequence handling ### CSI sequences - Now properly handles ALL CSI final bytes (0x40-0x7E), not just `m` - This means cursor movement (A/B/C/D), erase (J/K), scroll (S/T), and other CSI commands are now correctly excluded from width calculation ### OSC sequences - Added support for OSC sequences (ESC ] ... BEL/ST) - OSC 8 hyperlinks are now properly handled - Supports both BEL (0x07) and ST (ESC \) terminators ### ESC ESC fix - Fixed state machine bug where `ESC ESC` would incorrectly reset state - Now correctly handles consecutive ESC characters ## Emoji handling Added proper grapheme-aware emoji width calculation: - Flag emoji (regional indicator pairs) → width 2 - Skin tone modifiers → width 2 - ZWJ sequences (family, professions, etc.) → width 2 - Keycap sequences → width 2 - Variation selectors (VS15 for text, VS16 for emoji presentation) - Uses ICU's `UCHAR_EMOJI` property for accurate emoji detection ## Test coverage Added comprehensive test suite with **94 tests** covering: - All zero-width character categories - All CSI final bytes - OSC sequences with various terminators - Emoji edge cases (flags, skin tones, ZWJ, keycaps, variation selectors) - East Asian width (CJK, fullwidth, halfwidth katakana) - Indic and Thai script combining marks - Fuzzer-like stress tests for robustness ## Breaking changes This is a behavior change - `stringWidth` will return different values for some inputs. However, the new values are more accurate representations of terminal display width: | Input | Old | New | Why | |-------|-----|-----|-----| | Flag emoji 🇺🇸 | 1 | 2 | Flags display as 2 cells | | Skin tone 👋🏽 | 4 | 2 | Emoji + modifier = 1 grapheme | | ZWJ family 👨‍👩‍👧 | 8 | 2 | ZWJ sequence = 1 grapheme | | Word joiner U+2060 | 1 | 0 | Invisible character | | OSC 8 hyperlinks | counted URL | just visible text | URLs are invisible | | Cursor movement ESC[5A | counted | 0 | Control sequence | 🤖 Generated with [Claude Code](https://claude.ai/code) --------- Co-authored-by: Claude <noreply@anthropic.com> Co-authored-by: Claude Bot <claude-bot@bun.sh>
2026-02-02 15:08:46 +00:00 · 2025-12-10 16:17:57 -08:00
parent ac0099ebc6
commit 98cee5a57e
3 changed files with 989 additions and 33 deletions
--- a/bench/bun.lock
+++ b/bench/bun.lock
@@ -1,5 +1,6 @@
 {
  "lockfileVersion": 1,
+  "configVersion": 0,
  "workspaces": {
    "": {
      "name": "bench",
--- a/src/string/immutable/visible.zig
+++ b/src/string/immutable/visible.zig
@@ -8,6 +8,10 @@ pub fn isZeroWidthCodepointType(comptime T: type, cp: T) bool {
        return true;
    }

+    // Soft hyphen (U+00AD) - invisible/zero-width
+    if (cp == 0xad)
+        return true;
+
    if (comptime @sizeOf(T) == 1) {
        return false;
    }
@@ -18,7 +22,12 @@ pub fn isZeroWidthCodepointType(comptime T: type, cp: T) bool {
    }

    if (cp >= 0x200b and cp <= 0x200f) {
-        // Modifying Invisible Characters
+        // Modifying Invisible Characters (ZWS, ZWNJ, ZWJ, LRM, RLM)
+        return true;
+    }
+
+    if (cp >= 0x2060 and cp <= 0x2064) {
+        // Word joiner (U+2060), invisible operators
        return true;
    }

@@ -37,8 +46,51 @@ pub fn isZeroWidthCodepointType(comptime T: type, cp: T) bool {
        // Zero Width No-Break Space (BOM, ZWNBSP)
        return true;

+    if (cp >= 0xd800 and cp <= 0xdfff)
+        // Surrogates (including lone surrogates)
+        return true;
+
+    // Arabic formatting characters
+    if ((cp >= 0x600 and cp <= 0x605) or cp == 0x6dd or cp == 0x70f or cp == 0x8e2)
+        return true;
+
+    // Indic script combining marks (Devanagari through Malayalam)
+    if (cp >= 0x900 and cp <= 0xd4f) {
+        const offset = cp & 0x7f;
+        // Signs at block start (except position 0x03 which is often a visible Visarga)
+        if (offset <= 0x02) return true;
+        // Vowel signs, virama (0x3a-0x4d), but exclude:
+        // - 0x3D (Avagraha - visible letter in most blocks)
+        if (offset >= 0x3a and offset <= 0x4d and offset != 0x3d) return true;
+        // Position 0x4E-0x4F are visible symbols in some blocks (e.g., Malayalam Sign Para)
+        // Stress signs (0x51-0x57)
+        if (offset >= 0x51 and offset <= 0x57) return true;
+        // Vowel signs (0x62-0x63)
+        if (offset >= 0x62 and offset <= 0x63) return true;
+    }
+
+    // Thai combining marks
+    if ((cp >= 0xe31 and cp <= 0xe3a) or (cp >= 0xe47 and cp <= 0xe4e))
+        return true;
+
+    // Lao combining marks
+    if ((cp >= 0xeb1 and cp <= 0xebc) or (cp >= 0xec8 and cp <= 0xecd))
+        return true;
+
+    // Combining Diacritical Marks Extended
+    if (cp >= 0x1ab0 and cp <= 0x1aff)
+        return true;
+
+    // Combining Diacritical Marks Supplement
+    if (cp >= 0x1dc0 and cp <= 0x1dff)
+        return true;
+
+    // Tag characters
+    if (cp >= 0xe0000 and cp <= 0xe007f)
+        return true;
+
    if (cp >= 0xe0100 and cp <= 0xe01ef)
-        // Variation Selectors
+        // Variation Selectors Supplement
        return true;

    return false;
@@ -643,7 +695,8 @@ pub const visible = struct {
    }

    fn visibleLatin1WidthScalar(c: u8) u1 {
-        return if ((c >= 127 and c <= 159) or c < 32) 0 else 1;
+        // Zero-width: control chars (0x00-0x1F, 0x7F-0x9F) and soft hyphen (0xAD)
+        return if ((c >= 127 and c <= 159) or c < 32 or c == 0xAD) 0 else 1;
    }

    fn visibleLatin1WidthExcludeANSIColors(input_: anytype) usize {
@@ -657,11 +710,35 @@ pub const visible = struct {
            length += visibleLatin1Width(input[0..i]);
            input = input[i..];

-            if (input.len < 3) return length;
+            if (input.len < 2) return length;

            if (input[1] == '[') {
-                const end = indexFn(input[2..], 'm') orelse return length;
-                input = input[end + 3 ..];
+                // CSI sequence: ESC [ <params> <final byte>
+                // Final byte is in range 0x40-0x7E (@ through ~)
+                if (input.len < 3) return length;
+                input = input[2..];
+                while (input.len > 0) {
+                    const c = input[0];
+                    input = input[1..];
+                    // Final byte terminates the sequence
+                    if (c >= 0x40 and c <= 0x7E) break;
+                }
+            } else if (input[1] == ']') {
+                // OSC sequence: ESC ] ... (BEL or ST)
+                // Find terminator: BEL (0x07) or ST (ESC \)
+                input = input[2..];
+                while (input.len > 0) {
+                    if (input[0] == 0x07) {
+                        // BEL terminator
+                        input = input[1..];
+                        break;
+                    } else if (input[0] == 0x1b and input.len > 1 and input[1] == '\\') {
+                        // ST terminator (ESC \)
+                        input = input[2..];
+                        break;
+                    }
+                    input = input[1..];
+                }
            } else {
                input = input[1..];
            }
@@ -702,84 +779,334 @@ pub const visible = struct {
        return len;
    }

+    /// Packed state for grapheme tracking - all small fields in one u32
+    const PackedState = packed struct(u32) {
+        non_emoji_width: u10 = 0, // Accumulated width (max 1024)
+        base_width: u2 = 0, // Width of first codepoint (0, 1, or 2)
+        count: u8 = 0, // Number of codepoints in grapheme
+        // Flags
+        emoji_base: bool = false,
+        keycap: bool = false,
+        regional_indicator: bool = false,
+        skin_tone: bool = false,
+        zwj: bool = false,
+        vs15: bool = false,
+        vs16: bool = false,
+        _pad: u5 = 0,
+    };
+
+    const GraphemeState = struct {
+        first_cp: u32 = 0,
+        last_cp: u32 = 0,
+        s: PackedState = .{},
+
+        inline fn reset(self: *GraphemeState, cp: u32, ambiguousAsWide: bool) void {
+            self.first_cp = cp;
+            self.last_cp = cp;
+
+            // Fast path for ASCII - no emoji complexity, simple width calculation
+            if (cp < 0x80) {
+                const w: u2 = if (cp >= 0x20 and cp < 0x7F) 1 else 0;
+                self.s = .{ .count = 1, .base_width = w, .non_emoji_width = w };
+                return;
+            }
+
+            const w: u3_fast = if (!isZeroWidthCodepointType(u32, cp))
+                visibleCodepointWidthType(u32, cp, ambiguousAsWide)
+            else
+                0;
+
+            self.s = .{
+                .count = 1,
+                .base_width = @truncate(w),
+                .non_emoji_width = w,
+                .emoji_base = isEmojiBase(cp),
+                .keycap = cp == 0x20E3,
+                .regional_indicator = isRegionalIndicator(cp),
+                .skin_tone = isSkinToneModifier(cp),
+                .zwj = cp == 0x200D,
+            };
+        }
+
+        fn add(self: *GraphemeState, cp: u32, ambiguousAsWide: bool) void {
+            self.last_cp = cp;
+            self.s.count +|= 1;
+            self.s.keycap = self.s.keycap or (cp == 0x20E3);
+            self.s.regional_indicator = self.s.regional_indicator or isRegionalIndicator(cp);
+            self.s.skin_tone = self.s.skin_tone or isSkinToneModifier(cp);
+            self.s.zwj = self.s.zwj or (cp == 0x200D);
+            self.s.vs15 = self.s.vs15 or (cp == 0xFE0E);
+            self.s.vs16 = self.s.vs16 or (cp == 0xFE0F);
+
+            if (!isZeroWidthCodepointType(u32, cp)) {
+                self.s.non_emoji_width +|= visibleCodepointWidthType(u32, cp, ambiguousAsWide);
+            }
+        }
+
+        inline fn width(self: *const GraphemeState) usize {
+            const s = self.s;
+            if (s.count == 0) return 0;
+
+            // Regional indicator pair (flag emoji) → width 2
+            if (s.regional_indicator and s.count >= 2) return 2;
+            // Keycap sequence → width 2
+            if (s.keycap) return 2;
+            // Single regional indicator → width 1
+            if (s.regional_indicator) return 1;
+            // Emoji with skin tone or ZWJ → width 2
+            if (s.emoji_base and (s.skin_tone or s.zwj)) return 2;
+
+            // Handle variation selectors
+            if (s.vs15 or s.vs16) {
+                if (s.base_width == 2) return 2;
+                if (s.vs16) {
+                    const cp = self.first_cp;
+                    if ((cp >= 0x30 and cp <= 0x39) or cp == 0x23 or cp == 0x2A) return 1;
+                    if (cp < 0x80) return 1;
+                    return 2;
+                }
+                return 1;
+            }
+
+            return s.non_emoji_width;
+        }
+
+        fn isEmojiBase(cp: u32) bool {
+            // Note: ASCII fast path is handled in reset(), so cp >= 0x80 here
+
+            // Fast path: nothing below U+203C can be an emoji base
+            if (cp < 0x203C) return false;
+
+            // Fast path: common non-emoji BMP ranges
+            if (cp >= 0x2C00 and cp < 0x1F000) return false;
+
+            // Exclude variation selectors and ZWJ which are handled separately
+            if (cp == 0xFE0E or cp == 0xFE0F or cp == 0x200D) return false;
+
+            // Use ICU for accurate emoji detection
+            // UCHAR_EMOJI = 57
+            return icu_hasBinaryProperty(cp, 57);
+        }
+
+        fn isRegionalIndicator(cp: u32) bool {
+            return cp >= 0x1F1E6 and cp <= 0x1F1FF;
+        }
+
+        fn isSkinToneModifier(cp: u32) bool {
+            return cp >= 0x1F3FB and cp <= 0x1F3FF;
+        }
+    };
+
+    /// Count printable ASCII characters (0x20-0x7E) in a UTF-16 slice using SIMD
+    fn countPrintableAscii16(input: []const u16) usize {
+        var total: usize = 0;
+        var remaining = input;
+
+        // Process 8 u16 values at a time using SIMD
+        const vec_len = 8;
+        while (remaining.len >= vec_len) {
+            const chunk: @Vector(vec_len, u16) = remaining[0..vec_len].*;
+            const low: @Vector(vec_len, u16) = @splat(0x20);
+            const high: @Vector(vec_len, u16) = @splat(0x7F);
+            const ge_low = chunk >= low;
+            const lt_high = chunk < high;
+            const printable = @select(bool, ge_low, lt_high, @as(@Vector(vec_len, bool), @splat(false)));
+            total += @popCount(@as(u8, @bitCast(printable)));
+            remaining = remaining[vec_len..];
+        }
+
+        // Handle remaining elements
+        for (remaining) |c| {
+            total += @intFromBool(c >= 0x20 and c < 0x7F);
+        }
+
+        return total;
+    }
+
    fn visibleUTF16WidthFn(input_: []const u16, exclude_ansi_colors: bool, ambiguousAsWide: bool) usize {
        var input = input_;
        var len: usize = 0;
-        var prev: ?u21 = 0;
+        var prev: ?u32 = null;
        var break_state = grapheme.BreakState{};
-        var break_start: u21 = 0;
+        var grapheme_state = GraphemeState{};
        var saw_1b = false;
-        var saw_bracket = false;
+        var saw_csi = false; // CSI: ESC [
+        var saw_osc = false; // OSC: ESC ]
        var stretch_len: usize = 0;

        while (true) {
            {
                const idx = firstNonASCII16(input) orelse input.len;
+
+                // Fast path: bulk ASCII processing when not in escape sequence
+                // ASCII chars are always their own graphemes, so we can count directly
+                if (idx > 0 and !saw_1b and !saw_csi and !saw_osc) {
+                    // Find how much we can bulk process
+                    // If stripping ANSI, stop at first ESC; otherwise process entire run
+                    const bulk_end = if (exclude_ansi_colors)
+                        strings.indexOfChar16Usize(input[0..idx], 0x1b) orelse idx
+                    else
+                        idx;
+
+                    if (bulk_end > 0) {
+                        // Flush any pending grapheme from previous non-ASCII
+                        if (grapheme_state.s.count > 0) {
+                            len += grapheme_state.width();
+                        }
+
+                        // Count all but last char in bulk using SIMD
+                        // Last char goes into grapheme_state in case combining mark follows
+                        if (bulk_end > 1) {
+                            len += countPrintableAscii16(input[0 .. bulk_end - 1]);
+                        }
+
+                        // Last char before ESC (or end) uses reset()
+                        const last_cp: u32 = input[bulk_end - 1];
+                        grapheme_state.reset(last_cp, ambiguousAsWide);
+                        prev = last_cp;
+                        break_state = grapheme.BreakState{};
+
+                        // If we consumed everything, advance and continue
+                        if (bulk_end == idx) {
+                            input = input[idx..];
+                            continue;
+                        }
+
+                        // Otherwise we hit ESC - start escape sequence handling
+                        saw_1b = true;
+                        prev = 0x1b;
+                        input = input[bulk_end + 1 ..];
+                        continue;
+                    }
+                }
+
                for (0..idx) |j| {
-                    const cp = input[j];
+                    const cp: u32 = input[j];
                    defer prev = cp;

-                    if (saw_bracket) {
-                        if (cp == 'm') {
+                    if (saw_osc) {
+                        // In OSC sequence, look for BEL (0x07) or ST (ESC \)
+                        if (cp == 0x07) {
                            saw_1b = false;
-                            saw_bracket = false;
+                            saw_osc = false;
                            stretch_len = 0;
                            continue;
+                        } else if (cp == '\\' and prev == 0x1b) {
+                            // ST terminator complete (ESC \)
+                            saw_1b = false;
+                            saw_osc = false;
+                            stretch_len = 0;
+                            continue;
+                        } else if (cp == 0x1b) {
+                            // ESC inside OSC - might be start of ST terminator
+                            // Don't exit OSC yet, wait to see if next char is '\'
+                            continue;
                        }
                        stretch_len += visibleCodepointWidth(cp, ambiguousAsWide);
                        continue;
                    }
-                    if (saw_1b) {
-                        if (cp == '[') {
-                            saw_bracket = true;
+                    if (saw_csi) {
+                        // CSI final byte is in range 0x40-0x7E (@ through ~)
+                        if (cp >= 0x40 and cp <= 0x7E) {
+                            saw_1b = false;
+                            saw_csi = false;
                            stretch_len = 0;
                            continue;
                        }
+                        // Parameter bytes - don't add to width
+                        continue;
+                    }
+                    if (saw_1b) {
+                        if (cp == '[') {
+                            saw_csi = true;
+                            stretch_len = 0;
+                            continue;
+                        } else if (cp == ']') {
+                            saw_osc = true;
+                            stretch_len = 0;
+                            continue;
+                        } else if (cp == 0x1b) {
+                            // Another ESC - this one starts a new potential sequence
+                            // Keep saw_1b = true, don't add width (ESC is control char anyway)
+                            continue;
+                        }
                        len += visibleCodepointWidth(cp, ambiguousAsWide);
+                        saw_1b = false;
                        continue;
                    }
                    if (!exclude_ansi_colors or cp != 0x1b) {
                        if (prev) |prev_| {
-                            const should_break = grapheme.graphemeBreak(prev_, cp, &break_state);
+                            const should_break = grapheme.graphemeBreak(@truncate(prev_), @truncate(cp), &break_state);
                            if (should_break) {
-                                len += visibleCodepointWidthMaybeEmoji(break_start, cp == 0xFE0F, ambiguousAsWide);
-                                break_start = cp;
+                                len += grapheme_state.width();
+                                grapheme_state.reset(@truncate(cp), ambiguousAsWide);
                            } else {
-                                //
+                                grapheme_state.add(cp, ambiguousAsWide);
                            }
                        } else {
-                            len += visibleCodepointWidth(cp, ambiguousAsWide);
-                            break_start = cp;
+                            grapheme_state.reset(@truncate(cp), ambiguousAsWide);
                        }
                        continue;
                    }
                    saw_1b = true;
                    continue;
                }
+                // Only add stretch_len if we completed the escape sequence
+                // (unterminated sequences should not contribute to width)
+                if (!saw_csi and !saw_osc) {
                    len += stretch_len;
+                }
+                stretch_len = 0;
                input = input[idx..];
            }
            if (input.len == 0) break;
            const replacement = utf16CodepointWithFFFD(input);
            defer input = input[replacement.len..];
-            if (replacement.fail) continue;
-            const cp: u21 = @intCast(replacement.code_point);
+            // Skip invalid sequences and lone surrogates (treat as zero-width)
+            if (replacement.fail or replacement.is_lead) continue;
+            const cp: u32 = @intCast(replacement.code_point);
            defer prev = cp;

+            // Handle non-ASCII characters inside escape sequences
+            if (saw_osc) {
+                // In OSC sequence, look for BEL (0x07) or ST (ESC \)
+                // Non-ASCII chars inside OSC should not contribute to width
+                if (cp == 0x07) {
+                    saw_1b = false;
+                    saw_osc = false;
+                    stretch_len = 0;
+                }
+                // Note: ST (ESC \) only uses ASCII chars, so we don't need to check here
+                continue;
+            }
+            if (saw_csi) {
+                // CSI sequences should only contain ASCII parameters and final bytes
+                // Non-ASCII char ends the CSI sequence abnormally - don't count it
+                saw_1b = false;
+                saw_csi = false;
+                stretch_len = 0;
+                continue;
+            }
+            if (saw_1b) {
+                // ESC followed by non-ASCII - not a valid sequence start
+                saw_1b = false;
+                // Don't count this char as part of escape, treat normally below
+            }
+
            if (prev) |prev_| {
-                const should_break = grapheme.graphemeBreak(prev_, cp, &break_state);
+                const should_break = grapheme.graphemeBreak(@truncate(prev_), @truncate(cp), &break_state);
                if (should_break) {
-                    len += visibleCodepointWidthMaybeEmoji(break_start, cp == 0xFE0F, ambiguousAsWide);
-                    break_start = cp;
+                    len += grapheme_state.width();
+                    grapheme_state.reset(cp, ambiguousAsWide);
+                } else {
+                    grapheme_state.add(cp, ambiguousAsWide);
                }
            } else {
-                len += visibleCodepointWidth(cp, ambiguousAsWide);
-                break_start = cp;
+                grapheme_state.reset(cp, ambiguousAsWide);
            }
        }
-        if (break_start > 0) {
-            len += visibleCodepointWidthMaybeEmoji(break_start, (prev orelse 0) == 0xFE0F, ambiguousAsWide);
-        }
+        // Add width of final grapheme
+        len += grapheme_state.width();
        return len;
    }

--- a/test/js/bun/util/stringWidth.test.ts
+++ b/test/js/bun/util/stringWidth.test.ts
@@ -149,3 +149,631 @@ for (let matcher of ["toMatchNPMStringWidth", "toMatchNPMStringWidthExcludeANSI"
    expect("👨‍❤️‍💋‍👨")[matcher]();
  });
 }
+
+// ============================================================================
+// Extended tests for stringWidth edge cases
+// These test exact expected values rather than comparing to npm string-width
+// ============================================================================
+
+describe("stringWidth extended", () => {
+  describe("zero-width characters", () => {
+    test("soft hyphen (U+00AD)", () => {
+      expect(Bun.stringWidth("\u00AD")).toBe(0);
+      expect(Bun.stringWidth("a\u00ADb")).toBe(2);
+      expect(Bun.stringWidth("\u00AD\u00AD\u00AD")).toBe(0);
+    });
+
+    test("word joiner and invisible operators (U+2060-U+2064)", () => {
+      expect(Bun.stringWidth("\u2060")).toBe(0); // Word joiner
+      expect(Bun.stringWidth("\u2061")).toBe(0); // Function application
+      expect(Bun.stringWidth("\u2062")).toBe(0); // Invisible times
+      expect(Bun.stringWidth("\u2063")).toBe(0); // Invisible separator
+      expect(Bun.stringWidth("\u2064")).toBe(0); // Invisible plus
+      expect(Bun.stringWidth("a\u2060b")).toBe(2);
+    });
+
+    test("zero-width space/joiner/non-joiner (U+200B-U+200D)", () => {
+      expect(Bun.stringWidth("\u200B")).toBe(0); // Zero-width space
+      expect(Bun.stringWidth("\u200C")).toBe(0); // Zero-width non-joiner
+      expect(Bun.stringWidth("\u200D")).toBe(0); // Zero-width joiner
+      expect(Bun.stringWidth("a\u200Bb\u200Cc\u200Dd")).toBe(4);
+    });
+
+    test("LRM and RLM (U+200E-U+200F)", () => {
+      expect(Bun.stringWidth("\u200E")).toBe(0); // Left-to-right mark
+      expect(Bun.stringWidth("\u200F")).toBe(0); // Right-to-left mark
+      expect(Bun.stringWidth("a\u200Eb\u200Fc")).toBe(3);
+    });
+
+    test("BOM / ZWNBSP (U+FEFF)", () => {
+      expect(Bun.stringWidth("\uFEFF")).toBe(0);
+      expect(Bun.stringWidth("\uFEFFhello")).toBe(5);
+    });
+
+    test("Arabic formatting characters", () => {
+      expect(Bun.stringWidth("\u0600")).toBe(0); // Arabic number sign
+      expect(Bun.stringWidth("\u0601")).toBe(0); // Arabic sign sanah
+      expect(Bun.stringWidth("\u0602")).toBe(0); // Arabic footnote marker
+      expect(Bun.stringWidth("\u0603")).toBe(0); // Arabic sign safha
+      expect(Bun.stringWidth("\u0604")).toBe(0); // Arabic sign samvat
+      expect(Bun.stringWidth("\u0605")).toBe(0); // Arabic number mark above
+      expect(Bun.stringWidth("\u06DD")).toBe(0); // Arabic end of ayah
+      expect(Bun.stringWidth("\u070F")).toBe(0); // Syriac abbreviation mark
+      expect(Bun.stringWidth("\u08E2")).toBe(0); // Arabic disputed end of ayah
+      expect(Bun.stringWidth("\u0600hello")).toBe(5);
+    });
+
+    test("variation selectors (U+FE00-U+FE0F)", () => {
+      expect(Bun.stringWidth("\uFE00")).toBe(0);
+      expect(Bun.stringWidth("\uFE0E")).toBe(0); // VS15 (text)
+      expect(Bun.stringWidth("\uFE0F")).toBe(0); // VS16 (emoji)
+    });
+
+    test("tag characters (U+E0000-U+E007F)", () => {
+      expect(Bun.stringWidth("\u{E0001}")).toBe(0); // Language tag
+      expect(Bun.stringWidth("\u{E0020}")).toBe(0); // Tag space
+      expect(Bun.stringWidth("\u{E007F}")).toBe(0); // Cancel tag
+    });
+
+    test("lone surrogates", () => {
+      expect(Bun.stringWidth("\uD800")).toBe(0); // High surrogate
+      expect(Bun.stringWidth("\uDBFF")).toBe(0); // High surrogate
+      expect(Bun.stringWidth("\uDC00")).toBe(0); // Low surrogate
+      expect(Bun.stringWidth("\uDFFF")).toBe(0); // Low surrogate
+    });
+
+    test("combining diacritical marks", () => {
+      expect(Bun.stringWidth("\u0300")).toBe(0); // Combining grave
+      expect(Bun.stringWidth("\u0301")).toBe(0); // Combining acute
+      expect(Bun.stringWidth("e\u0301")).toBe(1); // é as e + combining acute
+      expect(Bun.stringWidth("\u036F")).toBe(0); // Combining latin small letter x
+    });
+
+    test("combining diacritical marks extended", () => {
+      expect(Bun.stringWidth("\u1AB0")).toBe(0);
+      expect(Bun.stringWidth("\u1AFF")).toBe(0);
+    });
+
+    test("combining diacritical marks supplement", () => {
+      expect(Bun.stringWidth("\u1DC0")).toBe(0);
+      expect(Bun.stringWidth("\u1DFF")).toBe(0);
+    });
+
+    test("combining diacritical marks for symbols", () => {
+      expect(Bun.stringWidth("\u20D0")).toBe(0);
+      expect(Bun.stringWidth("\u20FF")).toBe(0);
+    });
+
+    test("combining half marks", () => {
+      expect(Bun.stringWidth("\uFE20")).toBe(0);
+      expect(Bun.stringWidth("\uFE2F")).toBe(0);
+    });
+
+    test("control characters", () => {
+      expect(Bun.stringWidth("\x00")).toBe(0);
+      expect(Bun.stringWidth("\x1F")).toBe(0);
+      expect(Bun.stringWidth("\x7F")).toBe(0); // DEL
+      expect(Bun.stringWidth("\x80")).toBe(0); // C1 control start
+      expect(Bun.stringWidth("\x9F")).toBe(0); // C1 control end
+    });
+  });
+
+  describe("CSI sequences (all final bytes)", () => {
+    // CSI final bytes are 0x40-0x7E (@ through ~)
+    test("cursor movement", () => {
+      expect(Bun.stringWidth("a\x1b[5Ab")).toBe(2); // Cursor up
+      expect(Bun.stringWidth("a\x1b[5Bb")).toBe(2); // Cursor down
+      expect(Bun.stringWidth("a\x1b[5Cb")).toBe(2); // Cursor forward
+      expect(Bun.stringWidth("a\x1b[5Db")).toBe(2); // Cursor back
+      expect(Bun.stringWidth("a\x1b[5Eb")).toBe(2); // Cursor next line
+      expect(Bun.stringWidth("a\x1b[5Fb")).toBe(2); // Cursor previous line
+      expect(Bun.stringWidth("a\x1b[5Gb")).toBe(2); // Cursor horizontal absolute
+    });
+
+    test("cursor position", () => {
+      expect(Bun.stringWidth("a\x1b[10;20Hb")).toBe(2); // Cursor position
+      expect(Bun.stringWidth("a\x1b[10;20fb")).toBe(2); // Horizontal vertical position
+    });
+
+    test("erase functions", () => {
+      expect(Bun.stringWidth("a\x1b[Jb")).toBe(2); // Erase in display
+      expect(Bun.stringWidth("a\x1b[0Jb")).toBe(2); // Erase below
+      expect(Bun.stringWidth("a\x1b[1Jb")).toBe(2); // Erase above
+      expect(Bun.stringWidth("a\x1b[2Jb")).toBe(2); // Erase all
+      expect(Bun.stringWidth("a\x1b[Kb")).toBe(2); // Erase in line
+      expect(Bun.stringWidth("a\x1b[0Kb")).toBe(2); // Erase to right
+      expect(Bun.stringWidth("a\x1b[1Kb")).toBe(2); // Erase to left
+      expect(Bun.stringWidth("a\x1b[2Kb")).toBe(2); // Erase entire line
+    });
+
+    test("scroll functions", () => {
+      expect(Bun.stringWidth("a\x1b[5Sb")).toBe(2); // Scroll up
+      expect(Bun.stringWidth("a\x1b[5Tb")).toBe(2); // Scroll down
+    });
+
+    test("SGR (colors)", () => {
+      expect(Bun.stringWidth("a\x1b[mb")).toBe(2); // Reset
+      expect(Bun.stringWidth("a\x1b[0mb")).toBe(2); // Reset
+      expect(Bun.stringWidth("a\x1b[1mb")).toBe(2); // Bold
+      expect(Bun.stringWidth("a\x1b[31mb")).toBe(2); // Red foreground
+      expect(Bun.stringWidth("a\x1b[41mb")).toBe(2); // Red background
+      expect(Bun.stringWidth("a\x1b[38;5;196mb")).toBe(2); // 256-color
+      expect(Bun.stringWidth("a\x1b[38;2;255;0;0mb")).toBe(2); // True color
+    });
+
+    test("other CSI sequences", () => {
+      expect(Bun.stringWidth("a\x1b[?25hb")).toBe(2); // Show cursor
+      expect(Bun.stringWidth("a\x1b[?25lb")).toBe(2); // Hide cursor
+      expect(Bun.stringWidth("a\x1b[sb")).toBe(2); // Save cursor position
+      expect(Bun.stringWidth("a\x1b[ub")).toBe(2); // Restore cursor position
+      expect(Bun.stringWidth("a\x1b[6nb")).toBe(2); // Device status report
+    });
+
+    test("CSI with various final bytes", () => {
+      // Test representative final bytes from 0x40-0x7E
+      expect(Bun.stringWidth("a\x1b[@b")).toBe(2); // @
+      expect(Bun.stringWidth("a\x1b[Lb")).toBe(2); // L - Insert lines
+      expect(Bun.stringWidth("a\x1b[Mb")).toBe(2); // M - Delete lines
+      expect(Bun.stringWidth("a\x1b[Pb")).toBe(2); // P - Delete chars
+      expect(Bun.stringWidth("a\x1b[Xb")).toBe(2); // X - Erase chars
+      expect(Bun.stringWidth("a\x1b[Zb")).toBe(2); // Z - Cursor back tab
+      expect(Bun.stringWidth("a\x1b[`b")).toBe(2); // ` - Character position absolute
+      expect(Bun.stringWidth("a\x1b[ab")).toBe(2); // a - Character position relative
+      expect(Bun.stringWidth("a\x1b[db")).toBe(2); // d - Line position absolute
+      expect(Bun.stringWidth("a\x1b[eb")).toBe(2); // e - Line position relative
+      expect(Bun.stringWidth("a\x1b[rb")).toBe(2); // r - Set scrolling region
+    });
+
+    test("multiple CSI sequences", () => {
+      expect(Bun.stringWidth("\x1b[31m\x1b[1mhello\x1b[0m")).toBe(5);
+      expect(Bun.stringWidth("a\x1b[5A\x1b[3Cb\x1b[2Jc")).toBe(3);
+    });
+
+    test("malformed CSI (no final byte)", () => {
+      // If CSI doesn't have a final byte, behavior depends on implementation
+      // Just ensure it doesn't crash
+      expect(() => Bun.stringWidth("a\x1b[")).not.toThrow();
+      expect(() => Bun.stringWidth("a\x1b[5")).not.toThrow();
+    });
+  });
+
+  describe("OSC sequences", () => {
+    test("OSC 8 hyperlinks with BEL terminator", () => {
+      expect(Bun.stringWidth("\x1b]8;;https://example.com\x07link\x1b]8;;\x07")).toBe(4);
+      expect(Bun.stringWidth("before\x1b]8;;url\x07click\x1b]8;;\x07after")).toBe(16);
+    });
+
+    test("OSC 8 hyperlinks with ST terminator", () => {
+      // ST terminator is ESC \ - the backslash must NOT be counted as visible
+      expect(Bun.stringWidth("\x1b]8;;https://example.com\x1b\\link\x1b]8;;\x1b\\")).toBe(4);
+      // Multiple OSC sequences with ST
+      expect(Bun.stringWidth("a\x1b]0;title\x1b\\b\x1b]0;title2\x1b\\c")).toBe(3);
+    });
+
+    test("OSC with various content", () => {
+      expect(Bun.stringWidth("\x1b]0;window title\x07text")).toBe(4); // Set window title
+      expect(Bun.stringWidth("\x1b]2;window title\x07text")).toBe(4); // Set window title
+    });
+
+    test("unterminated OSC in UTF-16 string", () => {
+      // Force UTF-16 by including non-Latin1 char, then unterminated OSC
+      // The OSC content should NOT contribute to width
+      expect(Bun.stringWidth("中\x1b]8;;" + "x".repeat(100))).toBe(2); // Just 中
+      expect(Bun.stringWidth("hello中\x1b]8;;url" + "y".repeat(50))).toBe(7); // hello + 中
+      expect(Bun.stringWidth("🎉\x1b]0;title")).toBe(2); // Just 🎉
+    });
+
+    test("mixed OSC and CSI", () => {
+      expect(Bun.stringWidth("\x1b[31m\x1b]8;;url\x07red link\x1b]8;;\x07\x1b[0m")).toBe(8);
+    });
+  });
+
+  describe("emoji handling", () => {
+    test("basic emoji", () => {
+      expect(Bun.stringWidth("😀")).toBe(2);
+      expect(Bun.stringWidth("🎉")).toBe(2);
+      expect(Bun.stringWidth("❤️")).toBe(2);
+    });
+
+    test("flag emoji (regional indicators)", () => {
+      expect(Bun.stringWidth("🇺🇸")).toBe(2); // US flag
+      expect(Bun.stringWidth("🇬🇧")).toBe(2); // UK flag
+      expect(Bun.stringWidth("🇯🇵")).toBe(2); // Japan flag
+      expect(Bun.stringWidth("🇦")).toBe(1); // Single regional indicator
+    });
+
+    test("skin tone modifiers", () => {
+      expect(Bun.stringWidth("👋")).toBe(2); // Wave without skin tone
+      expect(Bun.stringWidth("👋🏻")).toBe(2); // Light skin tone
+      expect(Bun.stringWidth("👋🏼")).toBe(2); // Medium-light skin tone
+      expect(Bun.stringWidth("👋🏽")).toBe(2); // Medium skin tone
+      expect(Bun.stringWidth("👋🏾")).toBe(2); // Medium-dark skin tone
+      expect(Bun.stringWidth("👋🏿")).toBe(2); // Dark skin tone
+    });
+
+    test("ZWJ sequences", () => {
+      expect(Bun.stringWidth("👨‍👩‍👧‍👦")).toBe(2); // Family
+      expect(Bun.stringWidth("👩‍💻")).toBe(2); // Woman technologist
+      expect(Bun.stringWidth("🏳️‍🌈")).toBe(2); // Rainbow flag
+      expect(Bun.stringWidth("👨‍❤️‍👨")).toBe(2); // Couple with heart
+    });
+
+    test("keycap sequences", () => {
+      expect(Bun.stringWidth("1️⃣")).toBe(2); // Keycap 1
+      expect(Bun.stringWidth("2️⃣")).toBe(2); // Keycap 2
+      expect(Bun.stringWidth("#️⃣")).toBe(2); // Keycap #
+      expect(Bun.stringWidth("*️⃣")).toBe(2); // Keycap *
+    });
+
+    test("variation selectors with emoji", () => {
+      // VS16 (emoji presentation)
+      expect(Bun.stringWidth("☀️")).toBe(2); // Sun with VS16
+      expect(Bun.stringWidth("❤️")).toBe(2); // Heart with VS16
+
+      // VS15 (text presentation) - these become narrow
+      expect(Bun.stringWidth("☀\uFE0E")).toBe(1); // Sun with VS15
+      expect(Bun.stringWidth("❤\uFE0E")).toBe(1); // Heart with VS15
+    });
+
+    test("variation selectors with non-emoji", () => {
+      // Digits with VS16 (no keycap) stay width 1
+      expect(Bun.stringWidth("0\uFE0F")).toBe(1);
+      expect(Bun.stringWidth("9\uFE0F")).toBe(1);
+      expect(Bun.stringWidth("#\uFE0F")).toBe(1);
+      expect(Bun.stringWidth("*\uFE0F")).toBe(1);
+
+      // Letters with VS16 stay width 1
+      expect(Bun.stringWidth("a\uFE0F")).toBe(1);
+      expect(Bun.stringWidth("A\uFE0F")).toBe(1);
+    });
+
+    test("symbols with variation selectors", () => {
+      // Symbols that become emoji with VS16
+      expect(Bun.stringWidth("©\uFE0F")).toBe(2); // Copyright
+      expect(Bun.stringWidth("®\uFE0F")).toBe(2); // Registered
+      expect(Bun.stringWidth("™\uFE0F")).toBe(2); // Trademark
+      expect(Bun.stringWidth("↩\uFE0F")).toBe(2); // Arrow
+      expect(Bun.stringWidth("ℹ\uFE0F")).toBe(2); // Info
+
+      // Same symbols with VS15 (text) - narrow
+      expect(Bun.stringWidth("©\uFE0E")).toBe(1);
+      expect(Bun.stringWidth("®\uFE0E")).toBe(1);
+    });
+
+    test("emoji in context", () => {
+      expect(Bun.stringWidth("Hello 👋 World")).toBe(14);
+      expect(Bun.stringWidth("🏠🏡🏢")).toBe(6);
+    });
+  });
+
+  describe("East Asian Width", () => {
+    test("CJK characters (wide)", () => {
+      expect(Bun.stringWidth("中")).toBe(2);
+      expect(Bun.stringWidth("文")).toBe(2);
+      expect(Bun.stringWidth("中文")).toBe(4);
+      expect(Bun.stringWidth("日本語")).toBe(6);
+      expect(Bun.stringWidth("한글")).toBe(4);
+    });
+
+    test("fullwidth characters", () => {
+      expect(Bun.stringWidth("Ａ")).toBe(2); // Fullwidth A
+      expect(Bun.stringWidth("１")).toBe(2); // Fullwidth 1
+      expect(Bun.stringWidth("！")).toBe(2); // Fullwidth !
+    });
+
+    test("halfwidth katakana", () => {
+      expect(Bun.stringWidth("ｱ")).toBe(1); // Halfwidth A
+      expect(Bun.stringWidth("ｶ")).toBe(1); // Halfwidth KA
+      expect(Bun.stringWidth("ﾊﾞ")).toBe(2); // Halfwidth HA + voiced mark
+    });
+
+    test("mixed width", () => {
+      expect(Bun.stringWidth("hello世界")).toBe(9); // 5 + 4
+      expect(Bun.stringWidth("abc中文def")).toBe(10); // 3 + 4 + 3
+    });
+  });
+
+  describe("Indic scripts", () => {
+    test("Devanagari with combining marks", () => {
+      expect(Bun.stringWidth("क")).toBe(1); // Ka
+      expect(Bun.stringWidth("क्")).toBe(1); // Ka + virama (combining)
+      expect(Bun.stringWidth("कि")).toBe(1); // Ka + vowel sign i (combining)
+    });
+
+    test("Thai with combining marks", () => {
+      expect(Bun.stringWidth("ก")).toBe(1); // Ko kai
+      expect(Bun.stringWidth("ก็")).toBe(1); // With maitaikhu
+      expect(Bun.stringWidth("ปฏัก")).toBe(3); // ป + ฏ + ั (combining) + ก = 3 visible
+    });
+  });
+
+  describe("non-ASCII in escape sequences and Indic script handling", () => {
+    test("OSC with non-ASCII (emoji) in URL should be invisible", () => {
+      // Non-ASCII characters inside OSC sequence should NOT be counted
+      // The emoji is part of the invisible hyperlink URL
+      const result = Bun.stringWidth("a\x1b]8;;https://🎉\x07b");
+      expect(result).toBe(2); // just "ab"
+    });
+
+    test("OSC with CJK in URL should be invisible", () => {
+      // CJK character inside OSC sequence should NOT be counted
+      const result = Bun.stringWidth("a\x1b]8;;https://中.com\x07b");
+      expect(result).toBe(2); // just "ab"
+    });
+
+    test("Indic Avagraha (U+093D) should have width 1", () => {
+      // U+093D (ऽ) is Devanagari Avagraha - a visible letter (category Lo)
+      // The Indic heuristic incorrectly marks it as zero-width
+      expect(Bun.stringWidth("\u093D")).toBe(1);
+      expect(Bun.stringWidth("a\u093Db")).toBe(3);
+    });
+
+    test("Malayalam Sign Para (U+0D4F) should have width 1", () => {
+      // U+0D4F (൏) is Malayalam Sign Para - a visible symbol (category So)
+      // The Indic heuristic incorrectly marks it as zero-width
+      expect(Bun.stringWidth("\u0D4F")).toBe(1);
+    });
+
+    test("Bengali Avagraha (U+09BD) should have width 1", () => {
+      // U+09BD (ঽ) is Bengali Avagraha - a visible letter (category Lo)
+      expect(Bun.stringWidth("\u09BD")).toBe(1);
+    });
+
+    test("Tamil Visarga (U+0B83) should have width 1", () => {
+      // U+0B83 (ஃ) is Tamil Sign Visarga - a visible letter (category Lo)
+      expect(Bun.stringWidth("\u0B83")).toBe(1);
+    });
+  });
+
+  describe("edge cases", () => {
+    test("empty string", () => {
+      expect(Bun.stringWidth("")).toBe(0);
+    });
+
+    test("only zero-width characters", () => {
+      expect(Bun.stringWidth("\u200B\u200C\u200D")).toBe(0);
+      expect(Bun.stringWidth("\uFEFF\u2060")).toBe(0);
+    });
+
+    test("only ANSI sequences", () => {
+      expect(Bun.stringWidth("\x1b[31m\x1b[0m")).toBe(0);
+      expect(Bun.stringWidth("\x1b[5A\x1b[3B")).toBe(0);
+    });
+
+    test("very long strings", () => {
+      const long = "a".repeat(10000);
+      expect(Bun.stringWidth(long)).toBe(10000);
+
+      const longEmoji = "😀".repeat(1000);
+      expect(Bun.stringWidth(longEmoji)).toBe(2000);
+    });
+
+    test("mixed content", () => {
+      expect(Bun.stringWidth("Hello\x1b[31m世界\x1b[0m👋")).toBe(11); // 5 + 4 + 2
+    });
+
+    test("bare ESC followed by non-sequence", () => {
+      expect(Bun.stringWidth("a\x1bXb")).toBe(3); // ESC + X is not a valid sequence
+    });
+  });
+
+  describe("fuzzer-like stress tests", () => {
+    test("many ESC characters without valid sequences", () => {
+      // Many bare ESC characters - should not hang
+      const input = "\x1b".repeat(10000);
+      // Each ESC is a control character with width 0
+      expect(Bun.stringWidth(input)).toBe(0);
+    });
+
+    test("CSI without final byte (unterminated)", () => {
+      // CSI sequence that never gets a final byte
+      const input = "a\x1b[" + "9".repeat(10000) + "b";
+      // Should consume the whole CSI as escape sequence, leaving just 'a'
+      // The 'b' at the end is outside the CSI if we hit end of params
+      expect(Bun.stringWidth(input)).toBeGreaterThanOrEqual(1);
+    });
+
+    test("OSC without terminator (unterminated)", () => {
+      // OSC sequence that never terminates
+      const input = "a\x1b]8;;" + "x".repeat(10000);
+      // Should consume the OSC, leaving just 'a'
+      expect(Bun.stringWidth(input)).toBe(1);
+    });
+
+    test("many incomplete CSI sequences", () => {
+      // Pattern: ESC [ digit ESC [ digit...
+      // '[' (0x5B) is a valid CSI final byte per ECMA-48 (range 0x40-0x7E)
+      // So ESC [ 1 ESC [ is a complete CSI ending with '[', leaving some digits visible
+      // The pattern alternates between 1 and 2 visible chars, averaging 1.5 per pattern
+      const input = "\x1b[1\x1b[2\x1b[3".repeat(1000);
+      expect(Bun.stringWidth(input)).toBe(1500);
+    });
+
+    test("alternating ESC and bracket", () => {
+      // ESC [ ESC [ pattern - could confuse state machine
+      const input = "\x1b[\x1b[".repeat(5000);
+      expect(Bun.stringWidth(input)).toBe(0);
+    });
+
+    test("ESC ESC starts new sequence correctly", () => {
+      // ESC ESC ] should parse as: first ESC ignored, second ESC + ] = OSC start
+      expect(Bun.stringWidth("\x1b\x1b]8;;url\x07link\x1b]8;;\x07")).toBe(4); // "link"
+      expect(Bun.stringWidth("\x1b\x1b[31mred\x1b[0m")).toBe(3); // "red"
+      expect(Bun.stringWidth("\x1b\x1b\x1b[31mred")).toBe(3); // ESC ESC ESC [ = CSI
+      expect(Bun.stringWidth("a\x1b\x1bb")).toBe(2); // ESC ESC followed by regular char
+    });
+
+    test("deeply nested combining marks", () => {
+      // Base character with many combining marks (zalgo-like)
+      const input = "a" + "\u0300\u0301\u0302\u0303\u0304".repeat(2000);
+      expect(Bun.stringWidth(input)).toBe(1); // All combining marks are zero-width
+    });
+
+    test("many ZWJ characters in sequence", () => {
+      // Many ZWJ without proper emoji structure
+      const input = "👨" + "\u200D".repeat(10000);
+      expect(Bun.stringWidth(input)).toBe(2); // Just the base emoji
+    });
+
+    test("many variation selectors", () => {
+      // Character followed by many variation selectors
+      const input = "A" + "\uFE0F".repeat(10000);
+      expect(Bun.stringWidth(input)).toBe(1);
+    });
+
+    test("alternating surrogates (invalid pairs)", () => {
+      // High-high-high pattern (invalid UTF-16)
+      const input = "\uD800\uD800\uD800".repeat(3000);
+      expect(Bun.stringWidth(input)).toBe(0); // Lone surrogates are zero-width
+    });
+
+    test("low surrogate without high (invalid)", () => {
+      const input = "\uDC00".repeat(10000);
+      expect(Bun.stringWidth(input)).toBe(0);
+    });
+
+    test("many regional indicators (odd count)", () => {
+      // Odd number of regional indicators
+      const input = "🇦🇧🇨🇩🇪🇫🇬🇭🇮🇯🇰".repeat(500);
+      // Should handle gracefully
+      expect(Bun.stringWidth(input)).toBeGreaterThan(0);
+    });
+
+    test("maximum codepoint values", () => {
+      // Characters near U+10FFFF
+      const input = "\u{10FFFF}\u{10FFFE}\u{10FFFD}".repeat(1000);
+      expect(Bun.stringWidth(input)).toBeGreaterThanOrEqual(0);
+    });
+
+    test("rapid encoding switches", () => {
+      // Mix of ASCII, Latin-1, BMP, and astral
+      const pattern = "a\x80\u0100\u1000\u{10000}";
+      const input = pattern.repeat(2000);
+      expect(Bun.stringWidth(input)).toBeGreaterThan(0);
+    });
+
+    test("all CSI final bytes", () => {
+      // Test every possible CSI final byte (0x40-0x7E)
+      let input = "";
+      for (let i = 0x40; i <= 0x7e; i++) {
+        input += `a\x1b[1${String.fromCharCode(i)}`;
+      }
+      input = input.repeat(100);
+      // 63 different final bytes * 'a' = 63 * 100
+      expect(Bun.stringWidth(input)).toBe(6300);
+    });
+
+    test("OSC with embedded ESC characters", () => {
+      // OSC containing ESC that isn't ST
+      const input = "a\x1b]8;;\x1bx\x1by\x1bz\x07b";
+      expect(Bun.stringWidth(input)).toBe(2); // 'a' and 'b'
+    });
+
+    test("interleaved ANSI and emoji", () => {
+      const input = "\x1b[31m👨‍👩‍👧\x1b[0m\x1b[32m🇺🇸\x1b[0m".repeat(1000);
+      expect(Bun.stringWidth(input)).toBe(4000); // 2 + 2 per iteration
+    });
+
+    test("string of only zero-width characters", () => {
+      // Many different zero-width characters
+      const zeroWidth = "\u200B\u200C\u200D\u200E\u200F\uFEFF\u2060\u2061\u2062\u2063\u2064";
+      const input = zeroWidth.repeat(1000);
+      expect(Bun.stringWidth(input)).toBe(0);
+    });
+
+    test("pathological grapheme cluster", () => {
+      // Emoji with skin tone, ZWJ, another emoji, VS16
+      const complex = "👩🏻‍🔬";
+      const input = complex.repeat(2000);
+      expect(Bun.stringWidth(input)).toBe(4000);
+    });
+
+    test("mixed valid and invalid escape sequences", () => {
+      // Pattern: valid CSI + ESC ESC + OSC + incomplete CSI
+      // - \x1b[31m: valid CSI, consumed
+      // - \x1b\x1b]: second ESC correctly starts new sequence, ] starts OSC
+      // - 0;title\x07: consumed by OSC, BEL terminates it
+      // - \x1b[: incomplete CSI that continues into next pattern
+      // At pattern boundaries, incomplete CSI (\x1b[) + next pattern's \x1b[31m:
+      // The [ in \x1b[31m is a valid CSI final byte (0x5B), so "31m" becomes visible (3 chars)
+      // 999 boundaries * 3 chars = 2997
+      const input = "\x1b[31m\x1b\x1b]0;title\x07\x1b[".repeat(1000);
+      expect(Bun.stringWidth(input)).toBe(2997);
+    });
+
+    test("random-like byte patterns", () => {
+      // Generate pseudo-random pattern that might trigger edge cases
+      let input = "";
+      for (let i = 0; i < 10000; i++) {
+        const code = (i * 7 + 13) % 128; // Pseudo-random ASCII
+        input += String.fromCharCode(code);
+      }
+      expect(() => Bun.stringWidth(input)).not.toThrow();
+    });
+
+    test("BOM at various positions", () => {
+      // BOM scattered throughout string
+      const input = "hello\uFEFFworld\uFEFFtest\uFEFF".repeat(1000);
+      expect(Bun.stringWidth(input)).toBe(14000); // "helloworldtest" = 14 * 1000
+    });
+
+    test("soft hyphen stress test", () => {
+      // Many soft hyphens
+      const input = "a\u00ADb\u00ADc\u00AD".repeat(3000);
+      expect(Bun.stringWidth(input)).toBe(9000); // 3 visible chars per iteration
+    });
+
+    test("Arabic formatting characters", () => {
+      // Arabic text with formatting characters
+      const input = "\u0600\u0601\u0602\u0603\u0604\u0605text".repeat(1000);
+      expect(Bun.stringWidth(input)).toBe(4000); // Only "text" is visible
+    });
+
+    test("tag characters (U+E0000 range)", () => {
+      // Many tag characters
+      const input = "\u{E0001}\u{E0020}\u{E0041}\u{E007F}".repeat(2500);
+      expect(Bun.stringWidth(input)).toBe(0);
+    });
+
+    test("variation selector supplement (U+E0100 range)", () => {
+      // Many variation selectors from supplement
+      const input = "字\u{E0100}".repeat(5000);
+      expect(Bun.stringWidth(input)).toBe(10000); // Each 字 is width 2
+    });
+
+    test("extremely long single grapheme", () => {
+      // One base + tons of combining marks = 1 grapheme
+      let input = "o";
+      for (let i = 0; i < 1000; i++) {
+        input += String.fromCharCode(0x0300 + (i % 112)); // Various combining marks
+      }
+      expect(Bun.stringWidth(input)).toBe(1);
+    });
+
+    test("null bytes interspersed", () => {
+      const input = "a\x00b\x00c\x00".repeat(3000);
+      expect(Bun.stringWidth(input)).toBe(9000); // NUL is zero-width
+    });
+
+    test("DEL characters (0x7F)", () => {
+      const input = "a\x7Fb\x7Fc".repeat(3000);
+      expect(Bun.stringWidth(input)).toBe(9000);
+    });
+
+    test("C1 control characters", () => {
+      // C1 controls: 0x80-0x9F
+      let input = "";
+      for (let i = 0x80; i <= 0x9f; i++) {
+        input += "a" + String.fromCharCode(i);
+      }
+      input = input.repeat(300);
+      expect(Bun.stringWidth(input)).toBe(9600); // 32 'a' chars per pattern * 300
+    });
+
+    test("worst case: every character needs special handling", () => {
+      // Mix that exercises every code path
+      const input = "a\x1b[31m中\u0300\uFE0F👨‍👩‍👧🇺🇸\x1b]8;;url\x07link\x1b]8;;\x07\u200B\x1b[0m".repeat(500);
+      expect(Bun.stringWidth(input)).toBeGreaterThan(0);
+    });
+  });
+});