improve Bun.stringWidth's algorithm (#9022)

* improve Bun.stringWidth's algorithm

* add a bunch more tests from string-width package

* make typescript happy

* undo typescript changes

* use better #define check for debug mode

* properly handle latin1 width tests

* support grapheme clusters

* fix trailing newline

* visibleUTF16WidthFn- add fast path for leading ascii

* add firstNonASCII16IgnoreMin

* fix firstNonASCII16CheckMin

* vectorize visibleUTF16WidthFn

* support emoji variation selector

* expose stringWidth in release mode too

* vectorize visibleLatin1Width

* support ambiguousIsNarrow option

* add typescript definition for stringWidth
This commit is contained in:
Meghan Denny
2024-02-22 19:16:17 -08:00
committed by GitHub
parent 22c25fad92
commit ed339b367d
13 changed files with 2238 additions and 100 deletions

View File

@@ -370,7 +370,7 @@ pub const String = extern struct {
pub fn createUTF16(bytes: []const u16) String {
if (bytes.len == 0) return String.empty;
if (bun.strings.firstNonASCII16CheckMin([]const u16, bytes, false) == null) {
if (bun.strings.firstNonASCII16IgnoreMin([]const u16, bytes) == null) {
return BunString__fromUTF16ToLatin1(bytes.ptr, bytes.len);
}
return BunString__fromUTF16(bytes.ptr, bytes.len);
@@ -938,23 +938,23 @@ pub const String = extern struct {
};
}
pub fn visibleWidth(this: *const String) usize {
pub fn visibleWidth(this: *const String, ambiguousAsWide: bool) usize {
if (this.isUTF8()) {
return bun.strings.visible.width.utf8(this.utf8());
} else if (this.isUTF16()) {
return bun.strings.visible.width.utf16(this.utf16());
return bun.strings.visible.width.utf16(this.utf16(), ambiguousAsWide);
} else {
return bun.strings.visible.width.ascii(this.latin1());
return bun.strings.visible.width.latin1(this.latin1());
}
}
pub fn visibleWidthExcludeANSIColors(this: *const String) usize {
pub fn visibleWidthExcludeANSIColors(this: *const String, ambiguousAsWide: bool) usize {
if (this.isUTF8()) {
return bun.strings.visible.width.exclude_ansi_colors.utf8(this.utf8());
} else if (this.isUTF16()) {
return bun.strings.visible.width.exclude_ansi_colors.utf16(this.utf16());
return bun.strings.visible.width.exclude_ansi_colors.utf16(this.utf16(), ambiguousAsWide);
} else {
return bun.strings.visible.width.exclude_ansi_colors.ascii(this.latin1());
return bun.strings.visible.width.exclude_ansi_colors.latin1(this.latin1());
}
}
@@ -1166,7 +1166,7 @@ pub const String = extern struct {
return JSC.jsNumber(@as(i32, 0));
}
const width = str.visibleWidth();
const width = str.visibleWidth(false);
return JSC.jsNumber(width);
}
};