mirror of
https://github.com/oven-sh/bun
synced 2026-02-11 19:38:58 +00:00
Introduce Bun.stringWidth (#8327)
* Introduce `Bun.stringWidth` * [autofix.ci] apply automated fixes * Update utils.md --------- Co-authored-by: Jarred Sumner <709451+Jarred-Sumner@users.noreply.github.com> Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
This commit is contained in:
@@ -3793,6 +3793,10 @@ pub fn indexOfCharUsize(slice: []const u8, char: u8) ?usize {
|
||||
return i;
|
||||
}
|
||||
|
||||
pub fn indexOfChar16Usize(slice: []const u16, char: u16) ?usize {
|
||||
return std.mem.indexOfScalar(u16, slice, char);
|
||||
}
|
||||
|
||||
test "indexOfChar" {
|
||||
const pairs = .{
|
||||
.{
|
||||
@@ -5659,89 +5663,145 @@ pub fn visibleCodepointWidthType(comptime T: type, cp: T) usize {
|
||||
return 1;
|
||||
}
|
||||
|
||||
pub fn visibleASCIIWidth(input_: anytype) usize {
|
||||
var length: usize = 0;
|
||||
var input = input_;
|
||||
pub const visible = struct {
|
||||
fn visibleASCIIWidth(input_: anytype) usize {
|
||||
var length: usize = 0;
|
||||
var input = input_;
|
||||
|
||||
if (comptime Environment.enableSIMD) {
|
||||
// https://zig.godbolt.org/z/hxhjncvq7
|
||||
const ElementType = std.meta.Child(@TypeOf(input_));
|
||||
const simd = 16 / @sizeOf(ElementType);
|
||||
if (input.len >= simd) {
|
||||
const input_end = input.ptr + input.len - (input.len % simd);
|
||||
while (input.ptr != input_end) {
|
||||
const chunk: @Vector(simd, ElementType) = input[0..simd].*;
|
||||
input = input[simd..];
|
||||
|
||||
const cmp: @Vector(simd, ElementType) = @splat(0x1f);
|
||||
const match1: @Vector(simd, u1) = @bitCast(chunk >= cmp);
|
||||
const match: @Vector(simd, ElementType) = match1;
|
||||
|
||||
length += @reduce(.Add, match);
|
||||
}
|
||||
}
|
||||
|
||||
// this is a deliberate compiler optimization
|
||||
// it disables auto-vectorizing the "input" for loop.
|
||||
if (!(input.len < simd)) unreachable;
|
||||
}
|
||||
|
||||
for (input) |c| {
|
||||
length += if (c > 0x1f) 1 else 0;
|
||||
}
|
||||
|
||||
return length;
|
||||
}
|
||||
|
||||
fn visibleASCIIWidthExcludeANSIColors(input_: anytype) usize {
|
||||
var length: usize = 0;
|
||||
var input = input_;
|
||||
|
||||
if (comptime Environment.enableSIMD) {
|
||||
// https://zig.godbolt.org/z/hxhjncvq7
|
||||
const ElementType = std.meta.Child(@TypeOf(input_));
|
||||
const simd = 16 / @sizeOf(ElementType);
|
||||
if (input.len >= simd) {
|
||||
const input_end = input.ptr + input.len - (input.len % simd);
|
||||
while (input.ptr != input_end) {
|
||||
const chunk: @Vector(simd, ElementType) = input[0..simd].*;
|
||||
input = input[simd..];
|
||||
const indexFn = if (comptime ElementType == u8) strings.indexOfCharUsize else strings.indexOfChar16Usize;
|
||||
|
||||
const cmp: @Vector(simd, ElementType) = @splat(0x1f);
|
||||
const match1: @Vector(simd, u1) = @bitCast(chunk >= cmp);
|
||||
const match: @Vector(simd, ElementType) = match1;
|
||||
while (indexFn(input, '\x1b')) |i| {
|
||||
length += visibleASCIIWidth(input[0..i]);
|
||||
input = input[i..];
|
||||
|
||||
length += @reduce(.Add, match);
|
||||
if (input.len < 3) return length;
|
||||
|
||||
if (input[1] == '[') {
|
||||
const end = indexFn(input[2..], 'm') orelse return length;
|
||||
input = input[end + 3 ..];
|
||||
} else {
|
||||
input = input[1..];
|
||||
}
|
||||
}
|
||||
|
||||
// this is a deliberate compiler optimization
|
||||
// it disables auto-vectorizing the "input" for loop.
|
||||
if (!(input.len < simd)) unreachable;
|
||||
length += visibleASCIIWidth(input);
|
||||
|
||||
return length;
|
||||
}
|
||||
|
||||
for (input) |c| {
|
||||
length += if (c > 0x1f) 1 else 0;
|
||||
fn visibleUTF8WidthFn(input: []const u8, comptime asciiFn: anytype) usize {
|
||||
var bytes = input;
|
||||
var len: usize = 0;
|
||||
while (bun.strings.firstNonASCII(bytes)) |i| {
|
||||
len += asciiFn(bytes[0..i]);
|
||||
|
||||
const byte = bytes[i];
|
||||
const skip = bun.strings.wtf8ByteSequenceLengthWithInvalid(byte);
|
||||
const cp_bytes: [4]u8 = switch (skip) {
|
||||
inline 1, 2, 3, 4 => |cp_len| .{
|
||||
byte,
|
||||
if (comptime cp_len > 1) bytes[1] else 0,
|
||||
if (comptime cp_len > 2) bytes[2] else 0,
|
||||
if (comptime cp_len > 3) bytes[3] else 0,
|
||||
},
|
||||
else => unreachable,
|
||||
};
|
||||
|
||||
const cp = decodeWTF8RuneTMultibyte(&cp_bytes, skip, u32, unicode_replacement);
|
||||
len += visibleCodepointWidthType(u32, cp);
|
||||
|
||||
bytes = bytes[@min(i + skip, bytes.len)..];
|
||||
}
|
||||
|
||||
len += asciiFn(bytes);
|
||||
|
||||
return len;
|
||||
}
|
||||
|
||||
return length;
|
||||
}
|
||||
fn visibleUTF16WidthFn(input: []const u16, comptime asciiFn: anytype) usize {
|
||||
var bytes = input;
|
||||
var len: usize = 0;
|
||||
while (bun.strings.firstNonASCII16CheckMin([]const u16, bytes, false)) |i| {
|
||||
len += asciiFn(bytes[0..i]);
|
||||
bytes = bytes[i..];
|
||||
|
||||
pub fn visibleUTF8Width(input: []const u8) usize {
|
||||
var bytes = input;
|
||||
var len: usize = 0;
|
||||
while (bun.strings.firstNonASCII(bytes)) |i| {
|
||||
len += visibleASCIIWidth(bytes[0..i]);
|
||||
const utf8 = utf16CodepointWithFFFD([]const u16, bytes);
|
||||
len += visibleCodepointWidthType(u32, utf8.code_point);
|
||||
bytes = bytes[@min(@as(usize, utf8.len), bytes.len)..];
|
||||
}
|
||||
|
||||
const byte = bytes[i];
|
||||
const skip = bun.strings.wtf8ByteSequenceLengthWithInvalid(byte);
|
||||
const cp_bytes: [4]u8 = switch (skip) {
|
||||
inline 1, 2, 3, 4 => |cp_len| .{
|
||||
byte,
|
||||
if (comptime cp_len > 1) bytes[1] else 0,
|
||||
if (comptime cp_len > 2) bytes[2] else 0,
|
||||
if (comptime cp_len > 3) bytes[3] else 0,
|
||||
},
|
||||
else => unreachable,
|
||||
len += asciiFn(bytes);
|
||||
|
||||
return len;
|
||||
}
|
||||
|
||||
fn visibleLatin1WidthFn(input: []const u8) usize {
|
||||
return visibleASCIIWidth(input);
|
||||
}
|
||||
|
||||
pub const width = struct {
|
||||
pub fn ascii(input: []const u8) usize {
|
||||
return visibleASCIIWidth(input);
|
||||
}
|
||||
|
||||
pub fn utf8(input: []const u8) usize {
|
||||
return visibleUTF8WidthFn(input, visibleASCIIWidth);
|
||||
}
|
||||
|
||||
pub fn utf16(input: []const u16) usize {
|
||||
return visibleUTF16WidthFn(input, visibleASCIIWidth);
|
||||
}
|
||||
|
||||
pub const exclude_ansi_colors = struct {
|
||||
pub fn ascii(input: []const u8) usize {
|
||||
return visibleASCIIWidthExcludeANSIColors(input);
|
||||
}
|
||||
|
||||
pub fn utf8(input: []const u8) usize {
|
||||
return visibleUTF8WidthFn(input, visibleASCIIWidthExcludeANSIColors);
|
||||
}
|
||||
|
||||
pub fn utf16(input: []const u16) usize {
|
||||
return visibleUTF16WidthFn(input, visibleASCIIWidthExcludeANSIColors);
|
||||
}
|
||||
};
|
||||
|
||||
const cp = decodeWTF8RuneTMultibyte(&cp_bytes, skip, u32, unicode_replacement);
|
||||
len += visibleCodepointWidthType(u32, cp);
|
||||
|
||||
bytes = bytes[@min(i + skip, bytes.len)..];
|
||||
}
|
||||
|
||||
len += visibleASCIIWidth(bytes);
|
||||
|
||||
return len;
|
||||
}
|
||||
|
||||
pub fn visibleUTF16Width(input: []const u16) usize {
|
||||
var bytes = input;
|
||||
var len: usize = 0;
|
||||
while (bun.strings.firstNonASCII16CheckMin([]const u16, bytes, false)) |i| {
|
||||
len += visibleASCIIWidth(bytes[0..i]);
|
||||
bytes = bytes[i..];
|
||||
|
||||
const utf8 = utf16CodepointWithFFFD([]const u16, bytes);
|
||||
len += visibleCodepointWidthType(u32, utf8.code_point);
|
||||
bytes = bytes[@min(@as(usize, utf8.len), bytes.len)..];
|
||||
}
|
||||
|
||||
len += visibleASCIIWidth(bytes);
|
||||
|
||||
return len;
|
||||
}
|
||||
|
||||
pub fn visibleLatin1Width(input: []const u8) usize {
|
||||
return visibleASCIIWidth(input);
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
pub const QuoteEscapeFormat = struct {
|
||||
data: []const u8,
|
||||
|
||||
Reference in New Issue
Block a user