pub fn isZeroWidthCodepointType(comptime T: type, cp: T) bool { if (cp <= 0x1f) { return true; } if (cp >= 0x7f and cp <= 0x9f) { // C1 control characters return true; } if (comptime @sizeOf(T) == 1) { return false; } if (cp >= 0x300 and cp <= 0x36f) { // Combining Diacritical Marks return true; } if (cp >= 0x200b and cp <= 0x200f) { // Modifying Invisible Characters return true; } if (cp >= 0x20d0 and cp <= 0x20ff) // Combining Diacritical Marks for Symbols return true; if (cp >= 0xfe00 and cp <= 0xfe0f) // Variation Selectors return true; if (cp >= 0xfe20 and cp <= 0xfe2f) // Combining Half Marks return true; if (cp == 0xfeff) // Zero Width No-Break Space (BOM, ZWNBSP) return true; if (cp >= 0xe0100 and cp <= 0xe01ef) // Variation Selectors return true; return false; } /// Official unicode reference: https://www.unicode.org/Public/UCD/latest/ucd/EastAsianWidth.txt /// Tag legend: /// - `W` (wide) -> true /// - `F` (full-width) -> true /// - `H` (half-width) -> false /// - `N` (neutral) -> false /// - `Na` (narrow) -> false /// - `A` (ambiguous) -> false? /// /// To regenerate the switch body list, run: /// ```js /// [...(await (await fetch("https://www.unicode.org/Public/UCD/latest/ucd/EastAsianWidth.txt")).text()).matchAll(/^([\dA-F]{4,})(?:\.\.([\dA-F]{4,}))?\s+;\s+(\w+)\s+#\s+(.*?)\s*$/gm)].flatMap(([,start, end, type, comment]) => ( /// (['W', 'F'].includes(type)) ? [` ${(end ? `0x${start}...0x${end}` : `0x${start}`)}, // ${''.padStart(17 - start.length - (end ? end.length + 5 : 0))}[${type}] ${comment}`] : [] /// )).join('\n') /// ``` pub fn isFullWidthCodepointType(comptime T: type, cp: T) bool { if (!(cp >= 0x1100)) { return false; } return switch (cp) { 0x1100...0x115F, // [W] Lo [96] HANGUL CHOSEONG KIYEOK..HANGUL CHOSEONG FILLER 0x231A...0x231B, // [W] So [2] WATCH..HOURGLASS 0x2329, // [W] Ps LEFT-POINTING ANGLE BRACKET 0x232A, // [W] Pe RIGHT-POINTING ANGLE BRACKET 0x23E9...0x23EC, // [W] So [4] BLACK RIGHT-POINTING DOUBLE TRIANGLE..BLACK DOWN-POINTING DOUBLE TRIANGLE 0x23F0, // [W] So ALARM CLOCK 0x23F3, // [W] So HOURGLASS WITH FLOWING SAND 0x25FD...0x25FE, // [W] Sm [2] WHITE MEDIUM SMALL SQUARE..BLACK MEDIUM SMALL SQUARE 0x2614...0x2615, // [W] So [2] UMBRELLA WITH RAIN DROPS..HOT BEVERAGE 0x2648...0x2653, // [W] So [12] ARIES..PISCES 0x267F, // [W] So WHEELCHAIR SYMBOL 0x2693, // [W] So ANCHOR 0x26A1, // [W] So HIGH VOLTAGE SIGN 0x26AA...0x26AB, // [W] So [2] MEDIUM WHITE CIRCLE..MEDIUM BLACK CIRCLE 0x26BD...0x26BE, // [W] So [2] SOCCER BALL..BASEBALL 0x26C4...0x26C5, // [W] So [2] SNOWMAN WITHOUT SNOW..SUN BEHIND CLOUD 0x26CE, // [W] So OPHIUCHUS 0x26D4, // [W] So NO ENTRY 0x26EA, // [W] So CHURCH 0x26F2...0x26F3, // [W] So [2] FOUNTAIN..FLAG IN HOLE 0x26F5, // [W] So SAILBOAT 0x26FA, // [W] So TENT 0x26FD, // [W] So FUEL PUMP 0x2705, // [W] So WHITE HEAVY CHECK MARK 0x270A...0x270B, // [W] So [2] RAISED FIST..RAISED HAND 0x2728, // [W] So SPARKLES 0x274C, // [W] So CROSS MARK 0x274E, // [W] So NEGATIVE SQUARED CROSS MARK 0x2753...0x2755, // [W] So [3] BLACK QUESTION MARK ORNAMENT..WHITE EXCLAMATION MARK ORNAMENT 0x2757, // [W] So HEAVY EXCLAMATION MARK SYMBOL 0x2795...0x2797, // [W] So [3] HEAVY PLUS SIGN..HEAVY DIVISION SIGN 0x27B0, // [W] So CURLY LOOP 0x27BF, // [W] So DOUBLE CURLY LOOP 0x2B1B...0x2B1C, // [W] So [2] BLACK LARGE SQUARE..WHITE LARGE SQUARE 0x2B50, // [W] So WHITE MEDIUM STAR 0x2B55, // [W] So HEAVY LARGE CIRCLE 0x2E80...0x2E99, // [W] So [26] CJK RADICAL REPEAT..CJK RADICAL RAP 0x2E9B...0x2EF3, // [W] So [89] CJK RADICAL CHOKE..CJK RADICAL C-SIMPLIFIED TURTLE 0x2F00...0x2FD5, // [W] So [214] KANGXI RADICAL ONE..KANGXI RADICAL FLUTE 0x2FF0...0x2FFF, // [W] So [16] IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO RIGHT..IDEOGRAPHIC DESCRIPTION CHARACTER ROTATION 0x3000, // [F] Zs IDEOGRAPHIC SPACE 0x3001...0x3003, // [W] Po [3] IDEOGRAPHIC COMMA..DITTO MARK 0x3004, // [W] So JAPANESE INDUSTRIAL STANDARD SYMBOL 0x3005, // [W] Lm IDEOGRAPHIC ITERATION MARK 0x3006, // [W] Lo IDEOGRAPHIC CLOSING MARK 0x3007, // [W] Nl IDEOGRAPHIC NUMBER ZERO 0x3008, // [W] Ps LEFT ANGLE BRACKET 0x3009, // [W] Pe RIGHT ANGLE BRACKET 0x300A, // [W] Ps LEFT DOUBLE ANGLE BRACKET 0x300B, // [W] Pe RIGHT DOUBLE ANGLE BRACKET 0x300C, // [W] Ps LEFT CORNER BRACKET 0x300D, // [W] Pe RIGHT CORNER BRACKET 0x300E, // [W] Ps LEFT WHITE CORNER BRACKET 0x300F, // [W] Pe RIGHT WHITE CORNER BRACKET 0x3010, // [W] Ps LEFT BLACK LENTICULAR BRACKET 0x3011, // [W] Pe RIGHT BLACK LENTICULAR BRACKET 0x3012...0x3013, // [W] So [2] POSTAL MARK..GETA MARK 0x3014, // [W] Ps LEFT TORTOISE SHELL BRACKET 0x3015, // [W] Pe RIGHT TORTOISE SHELL BRACKET 0x3016, // [W] Ps LEFT WHITE LENTICULAR BRACKET 0x3017, // [W] Pe RIGHT WHITE LENTICULAR BRACKET 0x3018, // [W] Ps LEFT WHITE TORTOISE SHELL BRACKET 0x3019, // [W] Pe RIGHT WHITE TORTOISE SHELL BRACKET 0x301A, // [W] Ps LEFT WHITE SQUARE BRACKET 0x301B, // [W] Pe RIGHT WHITE SQUARE BRACKET 0x301C, // [W] Pd WAVE DASH 0x301D, // [W] Ps REVERSED DOUBLE PRIME QUOTATION MARK 0x301E...0x301F, // [W] Pe [2] DOUBLE PRIME QUOTATION MARK..LOW DOUBLE PRIME QUOTATION MARK 0x3020, // [W] So POSTAL MARK FACE 0x3021...0x3029, // [W] Nl [9] HANGZHOU NUMERAL ONE..HANGZHOU NUMERAL NINE 0x302A...0x302D, // [W] Mn [4] IDEOGRAPHIC LEVEL TONE MARK..IDEOGRAPHIC ENTERING TONE MARK 0x302E...0x302F, // [W] Mc [2] HANGUL SINGLE DOT TONE MARK..HANGUL DOUBLE DOT TONE MARK 0x3030, // [W] Pd WAVY DASH 0x3031...0x3035, // [W] Lm [5] VERTICAL KANA REPEAT MARK..VERTICAL KANA REPEAT MARK LOWER HALF 0x3036...0x3037, // [W] So [2] CIRCLED POSTAL MARK..IDEOGRAPHIC TELEGRAPH LINE FEED SEPARATOR SYMBOL 0x3038...0x303A, // [W] Nl [3] HANGZHOU NUMERAL TEN..HANGZHOU NUMERAL THIRTY 0x303B, // [W] Lm VERTICAL IDEOGRAPHIC ITERATION MARK 0x303C, // [W] Lo MASU MARK 0x303D, // [W] Po PART ALTERNATION MARK 0x303E, // [W] So IDEOGRAPHIC VARIATION INDICATOR 0x3041...0x3096, // [W] Lo [86] HIRAGANA LETTER SMALL A..HIRAGANA LETTER SMALL KE 0x3099...0x309A, // [W] Mn [2] COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK 0x309B...0x309C, // [W] Sk [2] KATAKANA-HIRAGANA VOICED SOUND MARK..KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK 0x309D...0x309E, // [W] Lm [2] HIRAGANA ITERATION MARK..HIRAGANA VOICED ITERATION MARK 0x309F, // [W] Lo HIRAGANA DIGRAPH YORI 0x30A0, // [W] Pd KATAKANA-HIRAGANA DOUBLE HYPHEN 0x30A1...0x30FA, // [W] Lo [90] KATAKANA LETTER SMALL A..KATAKANA LETTER VO 0x30FB, // [W] Po KATAKANA MIDDLE DOT 0x30FC...0x30FE, // [W] Lm [3] KATAKANA-HIRAGANA PROLONGED SOUND MARK..KATAKANA VOICED ITERATION MARK 0x30FF, // [W] Lo KATAKANA DIGRAPH KOTO 0x3105...0x312F, // [W] Lo [43] BOPOMOFO LETTER B..BOPOMOFO LETTER NN 0x3131...0x318E, // [W] Lo [94] HANGUL LETTER KIYEOK..HANGUL LETTER ARAEAE 0x3190...0x3191, // [W] So [2] IDEOGRAPHIC ANNOTATION LINKING MARK..IDEOGRAPHIC ANNOTATION REVERSE MARK 0x3192...0x3195, // [W] No [4] IDEOGRAPHIC ANNOTATION ONE MARK..IDEOGRAPHIC ANNOTATION FOUR MARK 0x3196...0x319F, // [W] So [10] IDEOGRAPHIC ANNOTATION TOP MARK..IDEOGRAPHIC ANNOTATION MAN MARK 0x31A0...0x31BF, // [W] Lo [32] BOPOMOFO LETTER BU..BOPOMOFO LETTER AH 0x31C0...0x31E3, // [W] So [36] CJK STROKE T..CJK STROKE Q 0x31EF, // [W] So IDEOGRAPHIC DESCRIPTION CHARACTER SUBTRACTION 0x31F0...0x31FF, // [W] Lo [16] KATAKANA LETTER SMALL KU..KATAKANA LETTER SMALL RO 0x3200...0x321E, // [W] So [31] PARENTHESIZED HANGUL KIYEOK..PARENTHESIZED KOREAN CHARACTER O HU 0x3220...0x3229, // [W] No [10] PARENTHESIZED IDEOGRAPH ONE..PARENTHESIZED IDEOGRAPH TEN 0x322A...0x3247, // [W] So [30] PARENTHESIZED IDEOGRAPH MOON..CIRCLED IDEOGRAPH KOTO 0x3250, // [W] So PARTNERSHIP SIGN 0x3251...0x325F, // [W] No [15] CIRCLED NUMBER TWENTY ONE..CIRCLED NUMBER THIRTY FIVE 0x3260...0x327F, // [W] So [32] CIRCLED HANGUL KIYEOK..KOREAN STANDARD SYMBOL 0x3280...0x3289, // [W] No [10] CIRCLED IDEOGRAPH ONE..CIRCLED IDEOGRAPH TEN 0x328A...0x32B0, // [W] So [39] CIRCLED IDEOGRAPH MOON..CIRCLED IDEOGRAPH NIGHT 0x32B1...0x32BF, // [W] No [15] CIRCLED NUMBER THIRTY SIX..CIRCLED NUMBER FIFTY 0x32C0...0x32FF, // [W] So [64] IDEOGRAPHIC TELEGRAPH SYMBOL FOR JANUARY..SQUARE ERA NAME REIWA 0x3300...0x33FF, // [W] So [256] SQUARE APAATO..SQUARE GAL 0x3400...0x4DBF, // [W] Lo [6592] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DBF 0x4E00...0x9FFF, // [W] Lo [20992] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FFF 0xA000...0xA014, // [W] Lo [21] YI SYLLABLE IT..YI SYLLABLE E 0xA015, // [W] Lm YI SYLLABLE WU 0xA016...0xA48C, // [W] Lo [1143] YI SYLLABLE BIT..YI SYLLABLE YYR 0xA490...0xA4C6, // [W] So [55] YI RADICAL QOT..YI RADICAL KE 0xA960...0xA97C, // [W] Lo [29] HANGUL CHOSEONG TIKEUT-MIEUM..HANGUL CHOSEONG SSANGYEORINHIEUH 0xAC00...0xD7A3, // [W] Lo [11172] HANGUL SYLLABLE GA..HANGUL SYLLABLE HIH 0xF900...0xFA6D, // [W] Lo [366] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA6D 0xFA6E...0xFA6F, // [W] Cn [2] .. 0xFA70...0xFAD9, // [W] Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9 0xFADA...0xFAFF, // [W] Cn [38] .. 0xFE10...0xFE16, // [W] Po [7] PRESENTATION FORM FOR VERTICAL COMMA..PRESENTATION FORM FOR VERTICAL QUESTION MARK 0xFE17, // [W] Ps PRESENTATION FORM FOR VERTICAL LEFT WHITE LENTICULAR BRACKET 0xFE18, // [W] Pe PRESENTATION FORM FOR VERTICAL RIGHT WHITE LENTICULAR BRAKCET 0xFE19, // [W] Po PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS 0xFE30, // [W] Po PRESENTATION FORM FOR VERTICAL TWO DOT LEADER 0xFE31...0xFE32, // [W] Pd [2] PRESENTATION FORM FOR VERTICAL EM DASH..PRESENTATION FORM FOR VERTICAL EN DASH 0xFE33...0xFE34, // [W] Pc [2] PRESENTATION FORM FOR VERTICAL LOW LINE..PRESENTATION FORM FOR VERTICAL WAVY LOW LINE 0xFE35, // [W] Ps PRESENTATION FORM FOR VERTICAL LEFT PARENTHESIS 0xFE36, // [W] Pe PRESENTATION FORM FOR VERTICAL RIGHT PARENTHESIS 0xFE37, // [W] Ps PRESENTATION FORM FOR VERTICAL LEFT CURLY BRACKET 0xFE38, // [W] Pe PRESENTATION FORM FOR VERTICAL RIGHT CURLY BRACKET 0xFE39, // [W] Ps PRESENTATION FORM FOR VERTICAL LEFT TORTOISE SHELL BRACKET 0xFE3A, // [W] Pe PRESENTATION FORM FOR VERTICAL RIGHT TORTOISE SHELL BRACKET 0xFE3B, // [W] Ps PRESENTATION FORM FOR VERTICAL LEFT BLACK LENTICULAR BRACKET 0xFE3C, // [W] Pe PRESENTATION FORM FOR VERTICAL RIGHT BLACK LENTICULAR BRACKET 0xFE3D, // [W] Ps PRESENTATION FORM FOR VERTICAL LEFT DOUBLE ANGLE BRACKET 0xFE3E, // [W] Pe PRESENTATION FORM FOR VERTICAL RIGHT DOUBLE ANGLE BRACKET 0xFE3F, // [W] Ps PRESENTATION FORM FOR VERTICAL LEFT ANGLE BRACKET 0xFE40, // [W] Pe PRESENTATION FORM FOR VERTICAL RIGHT ANGLE BRACKET 0xFE41, // [W] Ps PRESENTATION FORM FOR VERTICAL LEFT CORNER BRACKET 0xFE42, // [W] Pe PRESENTATION FORM FOR VERTICAL RIGHT CORNER BRACKET 0xFE43, // [W] Ps PRESENTATION FORM FOR VERTICAL LEFT WHITE CORNER BRACKET 0xFE44, // [W] Pe PRESENTATION FORM FOR VERTICAL RIGHT WHITE CORNER BRACKET 0xFE45...0xFE46, // [W] Po [2] SESAME DOT..WHITE SESAME DOT 0xFE47, // [W] Ps PRESENTATION FORM FOR VERTICAL LEFT SQUARE BRACKET 0xFE48, // [W] Pe PRESENTATION FORM FOR VERTICAL RIGHT SQUARE BRACKET 0xFE49...0xFE4C, // [W] Po [4] DASHED OVERLINE..DOUBLE WAVY OVERLINE 0xFE4D...0xFE4F, // [W] Pc [3] DASHED LOW LINE..WAVY LOW LINE 0xFE50...0xFE52, // [W] Po [3] SMALL COMMA..SMALL FULL STOP 0xFE54...0xFE57, // [W] Po [4] SMALL SEMICOLON..SMALL EXCLAMATION MARK 0xFE58, // [W] Pd SMALL EM DASH 0xFE59, // [W] Ps SMALL LEFT PARENTHESIS 0xFE5A, // [W] Pe SMALL RIGHT PARENTHESIS 0xFE5B, // [W] Ps SMALL LEFT CURLY BRACKET 0xFE5C, // [W] Pe SMALL RIGHT CURLY BRACKET 0xFE5D, // [W] Ps SMALL LEFT TORTOISE SHELL BRACKET 0xFE5E, // [W] Pe SMALL RIGHT TORTOISE SHELL BRACKET 0xFE5F...0xFE61, // [W] Po [3] SMALL NUMBER SIGN..SMALL ASTERISK 0xFE62, // [W] Sm SMALL PLUS SIGN 0xFE63, // [W] Pd SMALL HYPHEN-MINUS 0xFE64...0xFE66, // [W] Sm [3] SMALL LESS-THAN SIGN..SMALL EQUALS SIGN 0xFE68, // [W] Po SMALL REVERSE SOLIDUS 0xFE69, // [W] Sc SMALL DOLLAR SIGN 0xFE6A...0xFE6B, // [W] Po [2] SMALL PERCENT SIGN..SMALL COMMERCIAL AT 0xFF01...0xFF03, // [F] Po [3] FULLWIDTH EXCLAMATION MARK..FULLWIDTH NUMBER SIGN 0xFF04, // [F] Sc FULLWIDTH DOLLAR SIGN 0xFF05...0xFF07, // [F] Po [3] FULLWIDTH PERCENT SIGN..FULLWIDTH APOSTROPHE 0xFF08, // [F] Ps FULLWIDTH LEFT PARENTHESIS 0xFF09, // [F] Pe FULLWIDTH RIGHT PARENTHESIS 0xFF0A, // [F] Po FULLWIDTH ASTERISK 0xFF0B, // [F] Sm FULLWIDTH PLUS SIGN 0xFF0C, // [F] Po FULLWIDTH COMMA 0xFF0D, // [F] Pd FULLWIDTH HYPHEN-MINUS 0xFF0E...0xFF0F, // [F] Po [2] FULLWIDTH FULL STOP..FULLWIDTH SOLIDUS 0xFF10...0xFF19, // [F] Nd [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE 0xFF1A...0xFF1B, // [F] Po [2] FULLWIDTH COLON..FULLWIDTH SEMICOLON 0xFF1C...0xFF1E, // [F] Sm [3] FULLWIDTH LESS-THAN SIGN..FULLWIDTH GREATER-THAN SIGN 0xFF1F...0xFF20, // [F] Po [2] FULLWIDTH QUESTION MARK..FULLWIDTH COMMERCIAL AT 0xFF21...0xFF3A, // [F] Lu [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z 0xFF3B, // [F] Ps FULLWIDTH LEFT SQUARE BRACKET 0xFF3C, // [F] Po FULLWIDTH REVERSE SOLIDUS 0xFF3D, // [F] Pe FULLWIDTH RIGHT SQUARE BRACKET 0xFF3E, // [F] Sk FULLWIDTH CIRCUMFLEX ACCENT 0xFF3F, // [F] Pc FULLWIDTH LOW LINE 0xFF40, // [F] Sk FULLWIDTH GRAVE ACCENT 0xFF41...0xFF5A, // [F] Ll [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL LETTER Z 0xFF5B, // [F] Ps FULLWIDTH LEFT CURLY BRACKET 0xFF5C, // [F] Sm FULLWIDTH VERTICAL LINE 0xFF5D, // [F] Pe FULLWIDTH RIGHT CURLY BRACKET 0xFF5E, // [F] Sm FULLWIDTH TILDE 0xFF5F, // [F] Ps FULLWIDTH LEFT WHITE PARENTHESIS 0xFF60, // [F] Pe FULLWIDTH RIGHT WHITE PARENTHESIS 0xFFE0...0xFFE1, // [F] Sc [2] FULLWIDTH CENT SIGN..FULLWIDTH POUND SIGN 0xFFE2, // [F] Sm FULLWIDTH NOT SIGN 0xFFE3, // [F] Sk FULLWIDTH MACRON 0xFFE4, // [F] So FULLWIDTH BROKEN BAR 0xFFE5...0xFFE6, // [F] Sc [2] FULLWIDTH YEN SIGN..FULLWIDTH WON SIGN 0x16FE0...0x16FE1, // [W] Lm [2] TANGUT ITERATION MARK..NUSHU ITERATION MARK 0x16FE2, // [W] Po OLD CHINESE HOOK MARK 0x16FE3, // [W] Lm OLD CHINESE ITERATION MARK 0x16FE4, // [W] Mn KHITAN SMALL SCRIPT FILLER 0x16FF0...0x16FF1, // [W] Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY 0x17000...0x187F7, // [W] Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 0x18800...0x18AFF, // [W] Lo [768] TANGUT COMPONENT-001..TANGUT COMPONENT-768 0x18B00...0x18CD5, // [W] Lo [470] KHITAN SMALL SCRIPT CHARACTER-18B00..KHITAN SMALL SCRIPT CHARACTER-18CD5 0x18D00...0x18D08, // [W] Lo [9] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D08 0x1AFF0...0x1AFF3, // [W] Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 0x1AFF5...0x1AFFB, // [W] Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 0x1AFFD...0x1AFFE, // [W] Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 0x1B000...0x1B0FF, // [W] Lo [256] KATAKANA LETTER ARCHAIC E..HENTAIGANA LETTER RE-2 0x1B100...0x1B122, // [W] Lo [35] HENTAIGANA LETTER RE-3..KATAKANA LETTER ARCHAIC WU 0x1B132, // [W] Lo HIRAGANA LETTER SMALL KO 0x1B150...0x1B152, // [W] Lo [3] HIRAGANA LETTER SMALL WI..HIRAGANA LETTER SMALL WO 0x1B155, // [W] Lo KATAKANA LETTER SMALL KO 0x1B164...0x1B167, // [W] Lo [4] KATAKANA LETTER SMALL WI..KATAKANA LETTER SMALL N 0x1B170...0x1B2FB, // [W] Lo [396] NUSHU CHARACTER-1B170..NUSHU CHARACTER-1B2FB 0x1F004, // [W] So MAHJONG TILE RED DRAGON 0x1F0CF, // [W] So PLAYING CARD BLACK JOKER 0x1F18E, // [W] So NEGATIVE SQUARED AB 0x1F191...0x1F19A, // [W] So [10] SQUARED CL..SQUARED VS 0x1F200...0x1F202, // [W] So [3] SQUARE HIRAGANA HOKA..SQUARED KATAKANA SA 0x1F210...0x1F23B, // [W] So [44] SQUARED CJK UNIFIED IDEOGRAPH-624B..SQUARED CJK UNIFIED IDEOGRAPH-914D 0x1F240...0x1F248, // [W] So [9] TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-672C..TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-6557 0x1F250...0x1F251, // [W] So [2] CIRCLED IDEOGRAPH ADVANTAGE..CIRCLED IDEOGRAPH ACCEPT 0x1F260...0x1F265, // [W] So [6] ROUNDED SYMBOL FOR FU..ROUNDED SYMBOL FOR CAI 0x1F300...0x1F320, // [W] So [33] CYCLONE..SHOOTING STAR 0x1F32D...0x1F335, // [W] So [9] HOT DOG..CACTUS 0x1F337...0x1F37C, // [W] So [70] TULIP..BABY BOTTLE 0x1F37E...0x1F393, // [W] So [22] BOTTLE WITH POPPING CORK..GRADUATION CAP 0x1F3A0...0x1F3CA, // [W] So [43] CAROUSEL HORSE..SWIMMER 0x1F3CF...0x1F3D3, // [W] So [5] CRICKET BAT AND BALL..TABLE TENNIS PADDLE AND BALL 0x1F3E0...0x1F3F0, // [W] So [17] HOUSE BUILDING..EUROPEAN CASTLE 0x1F3F4, // [W] So WAVING BLACK FLAG 0x1F3F8...0x1F3FA, // [W] So [3] BADMINTON RACQUET AND SHUTTLECOCK..AMPHORA 0x1F3FB...0x1F3FF, // [W] Sk [5] EMOJI MODIFIER FITZPATRICK TYPE-1-2..EMOJI MODIFIER FITZPATRICK TYPE-6 0x1F400...0x1F43E, // [W] So [63] RAT..PAW PRINTS 0x1F440, // [W] So EYES 0x1F442...0x1F4FC, // [W] So [187] EAR..VIDEOCASSETTE 0x1F4FF...0x1F53D, // [W] So [63] PRAYER BEADS..DOWN-POINTING SMALL RED TRIANGLE 0x1F54B...0x1F54E, // [W] So [4] KAABA..MENORAH WITH NINE BRANCHES 0x1F550...0x1F567, // [W] So [24] CLOCK FACE ONE OCLOCK..CLOCK FACE TWELVE-THIRTY 0x1F57A, // [W] So MAN DANCING 0x1F595...0x1F596, // [W] So [2] REVERSED HAND WITH MIDDLE FINGER EXTENDED..RAISED HAND WITH PART BETWEEN MIDDLE AND RING FINGERS 0x1F5A4, // [W] So BLACK HEART 0x1F5FB...0x1F5FF, // [W] So [5] MOUNT FUJI..MOYAI 0x1F600...0x1F64F, // [W] So [80] GRINNING FACE..PERSON WITH FOLDED HANDS 0x1F680...0x1F6C5, // [W] So [70] ROCKET..LEFT LUGGAGE 0x1F6CC, // [W] So SLEEPING ACCOMMODATION 0x1F6D0...0x1F6D2, // [W] So [3] PLACE OF WORSHIP..SHOPPING TROLLEY 0x1F6D5...0x1F6D7, // [W] So [3] HINDU TEMPLE..ELEVATOR 0x1F6DC...0x1F6DF, // [W] So [4] WIRELESS..RING BUOY 0x1F6EB...0x1F6EC, // [W] So [2] AIRPLANE DEPARTURE..AIRPLANE ARRIVING 0x1F6F4...0x1F6FC, // [W] So [9] SCOOTER..ROLLER SKATE 0x1F7E0...0x1F7EB, // [W] So [12] LARGE ORANGE CIRCLE..LARGE BROWN SQUARE 0x1F7F0, // [W] So HEAVY EQUALS SIGN 0x1F90C...0x1F93A, // [W] So [47] PINCHED FINGERS..FENCER 0x1F93C...0x1F945, // [W] So [10] WRESTLERS..GOAL NET 0x1F947...0x1F9FF, // [W] So [185] FIRST PLACE MEDAL..NAZAR AMULET 0x1FA70...0x1FA7C, // [W] So [13] BALLET SHOES..CRUTCH 0x1FA80...0x1FA88, // [W] So [9] YO-YO..FLUTE 0x1FA90...0x1FABD, // [W] So [46] RINGED PLANET..WING 0x1FABF...0x1FAC5, // [W] So [7] GOOSE..PERSON WITH CROWN 0x1FACE...0x1FADB, // [W] So [14] MOOSE..PEA POD 0x1FAE0...0x1FAE8, // [W] So [9] MELTING FACE..SHAKING FACE 0x1FAF0...0x1FAF8, // [W] So [9] HAND WITH INDEX FINGER AND THUMB CROSSED..RIGHTWARDS PUSHING HAND 0x20000...0x2A6DF, // [W] Lo [42720] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6DF 0x2A6E0...0x2A6FF, // [W] Cn [32] .. 0x2A700...0x2B739, // [W] Lo [4154] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B739 0x2B73A...0x2B73F, // [W] Cn [6] .. 0x2B740...0x2B81D, // [W] Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D 0x2B81E...0x2B81F, // [W] Cn [2] .. 0x2B820...0x2CEA1, // [W] Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1 0x2CEA2...0x2CEAF, // [W] Cn [14] .. 0x2CEB0...0x2EBE0, // [W] Lo [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0 0x2EBE1...0x2EBEF, // [W] Cn [15] .. 0x2EBF0...0x2EE5D, // [W] Lo [622] CJK UNIFIED IDEOGRAPH-2EBF0..CJK UNIFIED IDEOGRAPH-2EE5D 0x2EE5E...0x2F7FF, // [W] Cn [2466] .. 0x2F800...0x2FA1D, // [W] Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D 0x2FA1E...0x2FA1F, // [W] Cn [2] .. 0x2FA20...0x2FFFD, // [W] Cn [1502] .. 0x30000...0x3134A, // [W] Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 0x3134B...0x3134F, // [W] Cn [5] .. 0x31350...0x323AF, // [W] Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF 0x323B0...0x3FFFD, // [W] Cn [56398] .. => true, else => false, }; } pub fn isAmgiguousCodepointType(comptime T: type, cp: T) bool { return switch (cp) { 0xA1, 0xA4, 0xA7, 0xA8, 0xAA, 0xAD, 0xAE, 0xB0...0xB4, 0xB6...0xBA, 0xBC...0xBF, 0xC6, 0xD0, 0xD7, 0xD8, 0xDE...0xE1, 0xE6, 0xE8...0xEA, 0xEC, 0xED, 0xF0, 0xF2, 0xF3, 0xF7...0xFA, 0xFC, 0xFE, 0x101, 0x111, 0x113, 0x11B, 0x126, 0x127, 0x12B, 0x131...0x133, 0x138, 0x13F...0x142, 0x144, 0x148...0x14B, 0x14D, 0x152, 0x153, 0x166, 0x167, 0x16B, 0x1CE, 0x1D0, 0x1D2, 0x1D4, 0x1D6, 0x1D8, 0x1DA, 0x1DC, 0x251, 0x261, 0x2C4, 0x2C7, 0x2C9...0x2CB, 0x2CD, 0x2D0, 0x2D8...0x2DB, 0x2DD, 0x2DF, 0x300...0x36F, 0x391...0x3A1, 0x3A3...0x3A9, 0x3B1...0x3C1, 0x3C3...0x3C9, 0x401, 0x410...0x44F, 0x451, 0x2010, 0x2013...0x2016, 0x2018, 0x2019, 0x201C, 0x201D, 0x2020...0x2022, 0x2024...0x2027, 0x2030, 0x2032, 0x2033, 0x2035, 0x203B, 0x203E, 0x2074, 0x207F, 0x2081...0x2084, 0x20AC, 0x2103, 0x2105, 0x2109, 0x2113, 0x2116, 0x2121, 0x2122, 0x2126, 0x212B, 0x2153, 0x2154, 0x215B...0x215E, 0x2160...0x216B, 0x2170...0x2179, 0x2189, 0x2190...0x2199, 0x21B8, 0x21B9, 0x21D2, 0x21D4, 0x21E7, 0x2200, 0x2202, 0x2203, 0x2207, 0x2208, 0x220B, 0x220F, 0x2211, 0x2215, 0x221A, 0x221D...0x2220, 0x2223, 0x2225, 0x2227...0x222C, 0x222E, 0x2234...0x2237, 0x223C, 0x223D, 0x2248, 0x224C, 0x2252, 0x2260, 0x2261, 0x2264...0x2267, 0x226A, 0x226B, 0x226E, 0x226F, 0x2282, 0x2283, 0x2286, 0x2287, 0x2295, 0x2299, 0x22A5, 0x22BF, 0x2312, 0x2460...0x24E9, 0x24EB...0x254B, 0x2550...0x2573, 0x2580...0x258F, 0x2592...0x2595, 0x25A0, 0x25A1, 0x25A3...0x25A9, 0x25B2, 0x25B3, 0x25B6, 0x25B7, 0x25BC, 0x25BD, 0x25C0, 0x25C1, 0x25C6...0x25C8, 0x25CB, 0x25CE...0x25D1, 0x25E2...0x25E5, 0x25EF, 0x2605, 0x2606, 0x2609, 0x260E, 0x260F, 0x261C, 0x261E, 0x2640, 0x2642, 0x2660, 0x2661, 0x2663...0x2665, 0x2667...0x266A, 0x266C, 0x266D, 0x266F, 0x269E, 0x269F, 0x26BF, 0x26C6...0x26CD, 0x26CF...0x26D3, 0x26D5...0x26E1, 0x26E3, 0x26E8, 0x26E9, 0x26EB...0x26F1, 0x26F4, 0x26F6...0x26F9, 0x26FB, 0x26FC, 0x26FE, 0x26FF, 0x273D, 0x2776...0x277F, 0x2B56...0x2B59, 0x3248...0x324F, 0xE000...0xF8FF, 0xFE00...0xFE0F, 0xFFFD, 0x1F100...0x1F10A, 0x1F110...0x1F12D, 0x1F130...0x1F169, 0x1F170...0x1F18D, 0x1F18F, 0x1F190, 0x1F19B...0x1F1AC, 0xE0100...0xE01EF, 0xF0000...0xFFFFD, 0x100000...0x10FFFD, => true, else => false, }; } pub fn visibleCodepointWidth(cp: u32, ambiguousAsWide: bool) u3_fast { return visibleCodepointWidthType(u32, cp, ambiguousAsWide); } pub fn visibleCodepointWidthMaybeEmoji(cp: u32, maybe_emoji: bool, ambiguousAsWide: bool) u3_fast { // UCHAR_EMOJI=57, if (maybe_emoji and icu_hasBinaryProperty(cp, 57)) { return 2; } return visibleCodepointWidth(cp, ambiguousAsWide); } pub fn visibleCodepointWidthType(comptime T: type, cp: T, ambiguousAsWide: bool) u3_fast { if (isZeroWidthCodepointType(T, cp)) { return 0; } if (isFullWidthCodepointType(T, cp)) { return 2; } if (ambiguousAsWide and isAmgiguousCodepointType(T, cp)) { return 2; } return 1; } pub const visible = struct { // Ref: https://cs.stanford.edu/people/miles/iso8859.html fn visibleLatin1Width(input_: []const u8) usize { var length: usize = 0; var input = input_; const input_end_ptr = input.ptr + input.len - (input.len % 16); var input_ptr = input.ptr; while (input_ptr != input_end_ptr) { const input_chunk: [16]u8 = input_ptr[0..16].*; const sums: @Vector(16, u8) = [16]u8{ visibleLatin1WidthScalar(input_chunk[0]), visibleLatin1WidthScalar(input_chunk[1]), visibleLatin1WidthScalar(input_chunk[2]), visibleLatin1WidthScalar(input_chunk[3]), visibleLatin1WidthScalar(input_chunk[4]), visibleLatin1WidthScalar(input_chunk[5]), visibleLatin1WidthScalar(input_chunk[6]), visibleLatin1WidthScalar(input_chunk[7]), visibleLatin1WidthScalar(input_chunk[8]), visibleLatin1WidthScalar(input_chunk[9]), visibleLatin1WidthScalar(input_chunk[10]), visibleLatin1WidthScalar(input_chunk[11]), visibleLatin1WidthScalar(input_chunk[12]), visibleLatin1WidthScalar(input_chunk[13]), visibleLatin1WidthScalar(input_chunk[14]), visibleLatin1WidthScalar(input_chunk[15]), }; length += @reduce(.Add, sums); input_ptr += 16; } input.len %= 16; input.ptr = input_ptr; for (input) |byte| length += visibleLatin1WidthScalar(byte); return length; } fn visibleLatin1WidthScalar(c: u8) u1 { return if ((c >= 127 and c <= 159) or c < 32) 0 else 1; } fn visibleLatin1WidthExcludeANSIColors(input_: anytype) usize { var length: usize = 0; var input = input_; const ElementType = std.meta.Child(@TypeOf(input_)); const indexFn = if (comptime ElementType == u8) strings.indexOfCharUsize else strings.indexOfChar16Usize; while (indexFn(input, '\x1b')) |i| { length += visibleLatin1Width(input[0..i]); input = input[i..]; if (input.len < 3) return length; if (input[1] == '[') { const end = indexFn(input[2..], 'm') orelse return length; input = input[end + 3 ..]; } else { input = input[1..]; } } length += visibleLatin1Width(input); return length; } fn visibleUTF8WidthFn(input: []const u8, comptime asciiFn: anytype) usize { var bytes = input; var len: usize = 0; while (bun.strings.firstNonASCII(bytes)) |i| { len += asciiFn(bytes[0..i]); const this_chunk = bytes[i..]; const byte = this_chunk[0]; const skip = bun.strings.wtf8ByteSequenceLengthWithInvalid(byte); const cp_bytes: [4]u8 = switch (@min(@as(usize, skip), this_chunk.len)) { inline 1, 2, 3, 4 => |cp_len| .{ byte, if (comptime cp_len > 1) this_chunk[1] else 0, if (comptime cp_len > 2) this_chunk[2] else 0, if (comptime cp_len > 3) this_chunk[3] else 0, }, else => unreachable, }; const cp = decodeWTF8RuneTMultibyte(&cp_bytes, skip, u32, unicode_replacement); len += visibleCodepointWidth(cp, false); bytes = bytes[@min(i + skip, bytes.len)..]; } len += asciiFn(bytes); return len; } fn visibleUTF16WidthFn(input_: []const u16, exclude_ansi_colors: bool, ambiguousAsWide: bool) usize { var input = input_; var len: usize = 0; var prev: ?u21 = 0; var break_state = grapheme.BreakState{}; var break_start: u21 = 0; var saw_1b = false; var saw_bracket = false; var stretch_len: usize = 0; while (true) { { const idx = firstNonASCII16([]const u16, input) orelse input.len; for (0..idx) |j| { const cp = input[j]; defer prev = cp; if (saw_bracket) { if (cp == 'm') { saw_1b = false; saw_bracket = false; stretch_len = 0; continue; } stretch_len += visibleCodepointWidth(cp, ambiguousAsWide); continue; } if (saw_1b) { if (cp == '[') { saw_bracket = true; stretch_len = 0; continue; } len += visibleCodepointWidth(cp, ambiguousAsWide); continue; } if (!exclude_ansi_colors or cp != 0x1b) { if (prev) |prev_| { const should_break = grapheme.graphemeBreak(prev_, cp, &break_state); if (should_break) { len += visibleCodepointWidthMaybeEmoji(break_start, cp == 0xFE0F, ambiguousAsWide); break_start = cp; } else { // } } else { len += visibleCodepointWidth(cp, ambiguousAsWide); break_start = cp; } continue; } saw_1b = true; continue; } len += stretch_len; input = input[idx..]; } if (input.len == 0) break; const replacement = utf16CodepointWithFFFD([]const u16, input); defer input = input[replacement.len..]; if (replacement.fail) continue; const cp: u21 = @intCast(replacement.code_point); defer prev = cp; if (prev) |prev_| { const should_break = grapheme.graphemeBreak(prev_, cp, &break_state); if (should_break) { len += visibleCodepointWidthMaybeEmoji(break_start, cp == 0xFE0F, ambiguousAsWide); break_start = cp; } } else { len += visibleCodepointWidth(cp, ambiguousAsWide); break_start = cp; } } if (break_start > 0) { len += visibleCodepointWidthMaybeEmoji(break_start, (prev orelse 0) == 0xFE0F, ambiguousAsWide); } return len; } fn visibleLatin1WidthFn(input: []const u8) usize { return visibleLatin1Width(input); } pub const width = struct { pub fn latin1(input: []const u8) usize { return visibleLatin1Width(input); } pub fn utf8(input: []const u8) usize { return visibleUTF8WidthFn(input, visibleLatin1Width); } pub fn utf16(input: []const u16, ambiguousAsWide: bool) usize { return visibleUTF16WidthFn(input, false, ambiguousAsWide); } pub const exclude_ansi_colors = struct { pub fn latin1(input: []const u8) usize { return visibleLatin1WidthExcludeANSIColors(input); } pub fn utf8(input: []const u8) usize { return visibleUTF8WidthFn(input, visibleLatin1WidthExcludeANSIColors); } pub fn utf16(input: []const u16, ambiguousAsWide: bool) usize { return visibleUTF16WidthFn(input, true, ambiguousAsWide); } }; }; }; // extern "C" bool icu_hasBinaryProperty(UChar32 cp, unsigned int prop) extern fn icu_hasBinaryProperty(c: u32, which: c_uint) bool; const bun = @import("bun"); const std = @import("std"); const u3_fast = strings.u3_fast; const decodeWTF8RuneTMultibyte = strings.decodeWTF8RuneTMultibyte; const grapheme = strings.grapheme; const strings = bun.strings; const unicode_replacement = strings.unicode_replacement; const firstNonASCII16 = strings.firstNonASCII16; const firstNonASCII = strings.firstNonASCII; const utf16CodepointWithFFFD = strings.utf16CodepointWithFFFD;