mirror of
https://github.com/oven-sh/bun
synced 2026-02-10 10:58:56 +00:00
refactor: move string-like structs into string module (#17369)
This commit is contained in:
43
src/string/HashedString.zig
Normal file
43
src/string/HashedString.zig
Normal file
@@ -0,0 +1,43 @@
|
||||
const HashedString = @This();
|
||||
const bun = @import("root").bun;
|
||||
|
||||
ptr: [*]const u8,
|
||||
len: u32,
|
||||
hash: u32,
|
||||
|
||||
pub const empty = HashedString{ .ptr = @as([*]const u8, @ptrFromInt(0xDEADBEEF)), .len = 0, .hash = 0 };
|
||||
|
||||
pub fn init(buf: []const u8) HashedString {
|
||||
return HashedString{
|
||||
.ptr = buf.ptr,
|
||||
.len = @as(u32, @truncate(buf.len)),
|
||||
.hash = @as(u32, @truncate(bun.hash(buf))),
|
||||
};
|
||||
}
|
||||
|
||||
pub fn initNoHash(buf: []const u8) HashedString {
|
||||
return HashedString{
|
||||
.ptr = buf.ptr,
|
||||
.len = @as(u32, @truncate(buf.len)),
|
||||
.hash = 0,
|
||||
};
|
||||
}
|
||||
|
||||
pub fn eql(this: HashedString, other: anytype) bool {
|
||||
return Eql(this, @TypeOf(other), other);
|
||||
}
|
||||
|
||||
fn Eql(this: HashedString, comptime Other: type, other: Other) bool {
|
||||
switch (comptime Other) {
|
||||
HashedString, *HashedString, *const HashedString => {
|
||||
return ((@max(this.hash, other.hash) > 0 and this.hash == other.hash) or (this.ptr == other.ptr)) and this.len == other.len;
|
||||
},
|
||||
else => {
|
||||
return @as(usize, this.len) == other.len and @as(u32, @truncate(bun.hash(other[0..other.len]))) == this.hash;
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
pub fn str(this: HashedString) []const u8 {
|
||||
return this.ptr[0..this.len];
|
||||
}
|
||||
452
src/string/MutableString.zig
Normal file
452
src/string/MutableString.zig
Normal file
@@ -0,0 +1,452 @@
|
||||
const std = @import("std");
|
||||
const bun = @import("root").bun;
|
||||
|
||||
const Allocator = std.mem.Allocator;
|
||||
const strings = bun.strings;
|
||||
const js_lexer = bun.js_lexer;
|
||||
const string = bun.string;
|
||||
const stringZ = bun.stringZ;
|
||||
const CodePoint = bun.CodePoint;
|
||||
|
||||
const MutableString = @This();
|
||||
|
||||
allocator: Allocator,
|
||||
list: std.ArrayListUnmanaged(u8),
|
||||
|
||||
pub fn init2048(allocator: Allocator) Allocator.Error!MutableString {
|
||||
return MutableString.init(allocator, 2048);
|
||||
}
|
||||
|
||||
pub fn clone(self: *MutableString) Allocator.Error!MutableString {
|
||||
return MutableString.initCopy(self.allocator, self.list.items);
|
||||
}
|
||||
|
||||
pub const Writer = std.io.Writer(*@This(), Allocator.Error, MutableString.writeAll);
|
||||
pub fn writer(self: *MutableString) Writer {
|
||||
return Writer{
|
||||
.context = self,
|
||||
};
|
||||
}
|
||||
|
||||
pub fn isEmpty(this: *const MutableString) bool {
|
||||
return this.list.items.len == 0;
|
||||
}
|
||||
|
||||
pub fn deinit(str: *MutableString) void {
|
||||
if (str.list.capacity > 0) {
|
||||
str.list.expandToCapacity();
|
||||
str.list.clearAndFree(str.allocator);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn owns(this: *const MutableString, items: []const u8) bool {
|
||||
return bun.isSliceInBuffer(items, this.list.items.ptr[0..this.list.capacity]);
|
||||
}
|
||||
|
||||
pub fn growIfNeeded(self: *MutableString, amount: usize) Allocator.Error!void {
|
||||
try self.list.ensureUnusedCapacity(self.allocator, amount);
|
||||
}
|
||||
|
||||
pub fn write(self: *MutableString, bytes: anytype) Allocator.Error!usize {
|
||||
bun.debugAssert(bytes.len == 0 or !bun.isSliceInBuffer(bytes, self.list.allocatedSlice()));
|
||||
try self.list.appendSlice(self.allocator, bytes);
|
||||
return bytes.len;
|
||||
}
|
||||
|
||||
pub fn bufferedWriter(self: *MutableString) BufferedWriter {
|
||||
return BufferedWriter{ .context = self };
|
||||
}
|
||||
|
||||
pub fn init(allocator: Allocator, capacity: usize) Allocator.Error!MutableString {
|
||||
return MutableString{ .allocator = allocator, .list = if (capacity > 0)
|
||||
try std.ArrayListUnmanaged(u8).initCapacity(allocator, capacity)
|
||||
else
|
||||
std.ArrayListUnmanaged(u8){} };
|
||||
}
|
||||
|
||||
pub fn initEmpty(allocator: Allocator) MutableString {
|
||||
return MutableString{ .allocator = allocator, .list = .{} };
|
||||
}
|
||||
|
||||
pub const ensureUnusedCapacity = growIfNeeded;
|
||||
|
||||
pub fn initCopy(allocator: Allocator, str: anytype) Allocator.Error!MutableString {
|
||||
var mutable = try MutableString.init(allocator, str.len);
|
||||
try mutable.copy(str);
|
||||
return mutable;
|
||||
}
|
||||
|
||||
/// Convert it to an ASCII identifier. Note: If you change this to a non-ASCII
|
||||
/// identifier, you're going to potentially cause trouble with non-BMP code
|
||||
/// points in target environments that don't support bracketed Unicode escapes.
|
||||
pub fn ensureValidIdentifier(str: string, allocator: Allocator) Allocator.Error!string {
|
||||
if (str.len == 0) {
|
||||
return "_";
|
||||
}
|
||||
|
||||
var iterator = strings.CodepointIterator.init(str);
|
||||
var cursor = strings.CodepointIterator.Cursor{};
|
||||
|
||||
var has_needed_gap = false;
|
||||
var needs_gap = false;
|
||||
var start_i: usize = 0;
|
||||
|
||||
if (!iterator.next(&cursor)) return "_";
|
||||
|
||||
const JSLexerTables = @import("../js_lexer_tables.zig");
|
||||
|
||||
// Common case: no gap necessary. No allocation necessary.
|
||||
needs_gap = !js_lexer.isIdentifierStart(cursor.c);
|
||||
if (!needs_gap) {
|
||||
// Are there any non-alphanumeric chars at all?
|
||||
while (iterator.next(&cursor)) {
|
||||
if (!js_lexer.isIdentifierContinue(cursor.c) or cursor.width > 1) {
|
||||
needs_gap = true;
|
||||
start_i = cursor.i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!needs_gap) {
|
||||
return JSLexerTables.StrictModeReservedWordsRemap.get(str) orelse str;
|
||||
}
|
||||
|
||||
if (needs_gap) {
|
||||
var mutable = try MutableString.initCopy(allocator, if (start_i == 0)
|
||||
// the first letter can be a non-identifier start
|
||||
// https://github.com/oven-sh/bun/issues/2946
|
||||
"_"
|
||||
else
|
||||
str[0..start_i]);
|
||||
needs_gap = false;
|
||||
|
||||
var items = str[start_i..];
|
||||
iterator = strings.CodepointIterator.init(items);
|
||||
cursor = strings.CodepointIterator.Cursor{};
|
||||
|
||||
while (iterator.next(&cursor)) {
|
||||
if (js_lexer.isIdentifierContinue(cursor.c) and cursor.width == 1) {
|
||||
if (needs_gap) {
|
||||
try mutable.appendChar('_');
|
||||
needs_gap = false;
|
||||
has_needed_gap = true;
|
||||
}
|
||||
try mutable.append(items[cursor.i .. cursor.i + @as(u32, cursor.width)]);
|
||||
} else if (!needs_gap) {
|
||||
needs_gap = true;
|
||||
// skip the code point, replace it with a single _
|
||||
}
|
||||
}
|
||||
|
||||
// If it ends with an emoji
|
||||
if (needs_gap) {
|
||||
try mutable.appendChar('_');
|
||||
needs_gap = false;
|
||||
has_needed_gap = true;
|
||||
}
|
||||
|
||||
if (comptime bun.Environment.allow_assert) {
|
||||
bun.assert(js_lexer.isIdentifier(mutable.list.items));
|
||||
}
|
||||
|
||||
return try mutable.list.toOwnedSlice(allocator);
|
||||
}
|
||||
|
||||
return str;
|
||||
}
|
||||
|
||||
pub fn len(self: *const MutableString) usize {
|
||||
return self.list.items.len;
|
||||
}
|
||||
|
||||
pub fn copy(self: *MutableString, str: anytype) Allocator.Error!void {
|
||||
try self.list.ensureTotalCapacity(self.allocator, str[0..].len);
|
||||
|
||||
if (self.list.items.len == 0) {
|
||||
try self.list.insertSlice(self.allocator, 0, str);
|
||||
} else {
|
||||
try self.list.replaceRange(self.allocator, 0, str[0..].len, str[0..]);
|
||||
}
|
||||
}
|
||||
|
||||
pub inline fn growBy(self: *MutableString, amount: usize) Allocator.Error!void {
|
||||
try self.list.ensureUnusedCapacity(self.allocator, amount);
|
||||
}
|
||||
|
||||
pub inline fn appendSlice(self: *MutableString, items: []const u8) Allocator.Error!void {
|
||||
try self.list.appendSlice(self.allocator, items);
|
||||
}
|
||||
|
||||
pub inline fn appendSliceExact(self: *MutableString, items: []const u8) Allocator.Error!void {
|
||||
if (items.len == 0) return;
|
||||
try self.list.ensureTotalCapacityPrecise(self.allocator, self.list.items.len + items.len);
|
||||
var end = self.list.items.ptr + self.list.items.len;
|
||||
self.list.items.len += items.len;
|
||||
@memcpy(end[0..items.len], items);
|
||||
}
|
||||
|
||||
pub inline fn reset(
|
||||
self: *MutableString,
|
||||
) void {
|
||||
self.list.clearRetainingCapacity();
|
||||
}
|
||||
|
||||
pub inline fn resetTo(
|
||||
self: *MutableString,
|
||||
index: usize,
|
||||
) void {
|
||||
bun.assert(index <= self.list.capacity);
|
||||
self.list.items.len = index;
|
||||
}
|
||||
|
||||
pub fn inflate(self: *MutableString, amount: usize) Allocator.Error!void {
|
||||
try self.list.resize(self.allocator, amount);
|
||||
}
|
||||
|
||||
pub inline fn appendCharNTimes(self: *MutableString, char: u8, n: usize) Allocator.Error!void {
|
||||
try self.list.appendNTimes(self.allocator, char, n);
|
||||
}
|
||||
|
||||
pub inline fn appendChar(self: *MutableString, char: u8) Allocator.Error!void {
|
||||
try self.list.append(self.allocator, char);
|
||||
}
|
||||
pub inline fn appendCharAssumeCapacity(self: *MutableString, char: u8) void {
|
||||
self.list.appendAssumeCapacity(char);
|
||||
}
|
||||
pub inline fn append(self: *MutableString, char: []const u8) Allocator.Error!void {
|
||||
try self.list.appendSlice(self.allocator, char);
|
||||
}
|
||||
pub inline fn appendInt(self: *MutableString, int: u64) Allocator.Error!void {
|
||||
const count = bun.fmt.fastDigitCount(int);
|
||||
try self.list.ensureUnusedCapacity(self.allocator, count);
|
||||
const old = self.list.items.len;
|
||||
self.list.items.len += count;
|
||||
bun.assert(count == std.fmt.formatIntBuf(self.list.items.ptr[old .. old + count], int, 10, .lower, .{}));
|
||||
}
|
||||
|
||||
pub inline fn appendAssumeCapacity(self: *MutableString, char: []const u8) void {
|
||||
self.list.appendSliceAssumeCapacity(
|
||||
char,
|
||||
);
|
||||
}
|
||||
pub inline fn lenI(self: *MutableString) i32 {
|
||||
return @as(i32, @intCast(self.list.items.len));
|
||||
}
|
||||
|
||||
pub fn toOwnedSlice(self: *MutableString) string {
|
||||
return self.list.toOwnedSlice(self.allocator) catch bun.outOfMemory(); // TODO
|
||||
}
|
||||
|
||||
pub fn slice(self: *MutableString) []u8 {
|
||||
return self.list.items;
|
||||
}
|
||||
|
||||
/// Clear the existing value without freeing the memory or shrinking the capacity.
|
||||
pub fn move(self: *MutableString) []u8 {
|
||||
const out = self.list.items;
|
||||
self.list = .{};
|
||||
return out;
|
||||
}
|
||||
|
||||
/// Appends `0` if needed
|
||||
pub fn sliceWithSentinel(self: *MutableString) [:0]u8 {
|
||||
if (self.list.items.len > 0 and self.list.items[self.list.items.len - 1] != 0) {
|
||||
self.list.append(
|
||||
self.allocator,
|
||||
0,
|
||||
) catch unreachable;
|
||||
}
|
||||
|
||||
return self.list.items[0 .. self.list.items.len - 1 :0];
|
||||
}
|
||||
|
||||
pub fn toOwnedSliceLength(self: *MutableString, length: usize) string {
|
||||
self.list.shrinkAndFree(self.allocator, length);
|
||||
return self.list.toOwnedSlice(self.allocator) catch bun.outOfMemory(); // TODO
|
||||
}
|
||||
|
||||
pub fn containsChar(self: *const MutableString, char: u8) bool {
|
||||
return self.indexOfChar(char) != null;
|
||||
}
|
||||
|
||||
pub fn indexOfChar(self: *const MutableString, char: u8) ?u32 {
|
||||
return strings.indexOfChar(self.list.items, char);
|
||||
}
|
||||
|
||||
pub fn lastIndexOfChar(self: *const MutableString, char: u8) ?usize {
|
||||
return strings.lastIndexOfChar(self.list.items, char);
|
||||
}
|
||||
|
||||
pub fn lastIndexOf(self: *const MutableString, str: u8) ?usize {
|
||||
return strings.lastIndexOfChar(self.list.items, str);
|
||||
}
|
||||
|
||||
pub fn indexOf(self: *const MutableString, str: u8) ?usize {
|
||||
return std.mem.indexOf(u8, self.list.items, str);
|
||||
}
|
||||
|
||||
pub fn eql(self: *MutableString, other: anytype) bool {
|
||||
return std.mem.eql(u8, self.list.items, other);
|
||||
}
|
||||
|
||||
pub fn toSocketBuffers(self: *MutableString, comptime count: usize, ranges: anytype) [count]std.posix.iovec_const {
|
||||
var buffers: [count]std.posix.iovec_const = undefined;
|
||||
inline for (&buffers, ranges) |*b, r| {
|
||||
b.* = .{
|
||||
.iov_base = self.list.items[r[0]..r[1]].ptr,
|
||||
.iov_len = self.list.items[r[0]..r[1]].len,
|
||||
};
|
||||
}
|
||||
return buffers;
|
||||
}
|
||||
|
||||
pub const BufferedWriter = struct {
|
||||
context: *MutableString,
|
||||
buffer: [max]u8 = undefined,
|
||||
pos: usize = 0,
|
||||
|
||||
const max = 2048;
|
||||
|
||||
pub const Writer = std.io.Writer(*BufferedWriter, Allocator.Error, BufferedWriter.writeAll);
|
||||
|
||||
inline fn remain(this: *BufferedWriter) []u8 {
|
||||
return this.buffer[this.pos..];
|
||||
}
|
||||
|
||||
pub fn flush(this: *BufferedWriter) Allocator.Error!void {
|
||||
_ = try this.context.writeAll(this.buffer[0..this.pos]);
|
||||
this.pos = 0;
|
||||
}
|
||||
|
||||
pub fn writeAll(this: *BufferedWriter, bytes: []const u8) Allocator.Error!usize {
|
||||
const pending = bytes;
|
||||
|
||||
if (pending.len >= max) {
|
||||
try this.flush();
|
||||
try this.context.append(pending);
|
||||
return pending.len;
|
||||
}
|
||||
|
||||
if (pending.len > 0) {
|
||||
if (pending.len + this.pos > max) {
|
||||
try this.flush();
|
||||
}
|
||||
@memcpy(this.remain()[0..pending.len], pending);
|
||||
this.pos += pending.len;
|
||||
}
|
||||
|
||||
return pending.len;
|
||||
}
|
||||
|
||||
const E = bun.JSAst.E;
|
||||
|
||||
/// Write a E.String to the buffer.
|
||||
/// This automatically encodes UTF-16 into UTF-8 using
|
||||
/// the same code path as TextEncoder
|
||||
pub fn writeString(this: *BufferedWriter, bytes: *E.String) Allocator.Error!usize {
|
||||
if (bytes.isUTF8()) {
|
||||
return try this.writeAll(bytes.slice(this.context.allocator));
|
||||
}
|
||||
|
||||
return try this.writeAll16(bytes.slice16());
|
||||
}
|
||||
|
||||
/// Write a UTF-16 string to the (UTF-8) buffer
|
||||
/// This automatically encodes UTF-16 into UTF-8 using
|
||||
/// the same code path as TextEncoder
|
||||
pub fn writeAll16(this: *BufferedWriter, bytes: []const u16) Allocator.Error!usize {
|
||||
const pending = bytes;
|
||||
|
||||
if (pending.len >= max) {
|
||||
try this.flush();
|
||||
try this.context.list.ensureUnusedCapacity(this.context.allocator, bytes.len * 2);
|
||||
const decoded = strings.copyUTF16IntoUTF8(
|
||||
this.remain()[0 .. bytes.len * 2],
|
||||
[]const u16,
|
||||
bytes,
|
||||
true,
|
||||
);
|
||||
this.context.list.items.len += @as(usize, decoded.written);
|
||||
return pending.len;
|
||||
}
|
||||
|
||||
if (pending.len > 0) {
|
||||
if ((pending.len * 2) + this.pos > max) {
|
||||
try this.flush();
|
||||
}
|
||||
const decoded = strings.copyUTF16IntoUTF8(
|
||||
this.remain()[0 .. bytes.len * 2],
|
||||
[]const u16,
|
||||
bytes,
|
||||
true,
|
||||
);
|
||||
this.pos += @as(usize, decoded.written);
|
||||
}
|
||||
|
||||
return pending.len;
|
||||
}
|
||||
|
||||
pub fn writeHTMLAttributeValueString(this: *BufferedWriter, str: *E.String) Allocator.Error!void {
|
||||
if (str.isUTF8()) {
|
||||
try this.writeHTMLAttributeValue(str.slice(this.context.allocator));
|
||||
return;
|
||||
}
|
||||
|
||||
try this.writeHTMLAttributeValue16(str.slice16());
|
||||
}
|
||||
|
||||
pub fn writeHTMLAttributeValue(this: *BufferedWriter, bytes: []const u8) Allocator.Error!void {
|
||||
var items = bytes;
|
||||
while (items.len > 0) {
|
||||
// TODO: SIMD
|
||||
if (strings.indexOfAny(items, "\"<>")) |j| {
|
||||
_ = try this.writeAll(items[0..j]);
|
||||
_ = switch (items[j]) {
|
||||
'"' => try this.writeAll("""),
|
||||
'<' => try this.writeAll("<"),
|
||||
'>' => try this.writeAll(">"),
|
||||
else => unreachable,
|
||||
};
|
||||
|
||||
items = items[j + 1 ..];
|
||||
continue;
|
||||
}
|
||||
|
||||
_ = try this.writeAll(items);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
pub fn writeHTMLAttributeValue16(this: *BufferedWriter, bytes: []const u16) Allocator.Error!void {
|
||||
var items = bytes;
|
||||
while (items.len > 0) {
|
||||
if (strings.indexOfAny16(items, "\"<>")) |j| {
|
||||
// this won't handle strings larger than 4 GB
|
||||
// that's fine though, 4 GB of SSR'd HTML is quite a lot...
|
||||
_ = try this.writeAll16(items[0..j]);
|
||||
_ = switch (items[j]) {
|
||||
'"' => try this.writeAll("""),
|
||||
'<' => try this.writeAll("<"),
|
||||
'>' => try this.writeAll(">"),
|
||||
else => unreachable,
|
||||
};
|
||||
|
||||
items = items[j + 1 ..];
|
||||
continue;
|
||||
}
|
||||
|
||||
_ = try this.writeAll16(items);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
pub fn writer(this: *BufferedWriter) BufferedWriter.Writer {
|
||||
return BufferedWriter.Writer{ .context = this };
|
||||
}
|
||||
};
|
||||
|
||||
pub fn writeAll(self: *MutableString, bytes: string) Allocator.Error!usize {
|
||||
try self.list.appendSlice(self.allocator, bytes);
|
||||
return bytes.len;
|
||||
}
|
||||
58
src/string/PathString.zig
Normal file
58
src/string/PathString.zig
Normal file
@@ -0,0 +1,58 @@
|
||||
const std = @import("std");
|
||||
const bun = @import("root").bun;
|
||||
|
||||
// macOS sets file path limit to 1024
|
||||
// Since a pointer on x64 is 64 bits and only 46 bits are used
|
||||
// We can safely store the entire path slice in a single u64.
|
||||
pub const PathString = packed struct {
|
||||
const PathIntLen = std.math.IntFittingRange(0, bun.MAX_PATH_BYTES);
|
||||
pub const use_small_path_string = @bitSizeOf(usize) - @bitSizeOf(PathIntLen) >= 53;
|
||||
pub const PathInt = if (use_small_path_string) PathIntLen else usize;
|
||||
pub const PointerIntType = if (use_small_path_string) u53 else usize;
|
||||
ptr: PointerIntType = 0,
|
||||
len: PathInt = 0,
|
||||
|
||||
const JSC = bun.JSC;
|
||||
|
||||
pub fn estimatedSize(this: *const PathString) usize {
|
||||
return @as(usize, this.len);
|
||||
}
|
||||
|
||||
pub inline fn slice(this: anytype) []const u8 {
|
||||
@setRuntimeSafety(false); // "cast causes pointer to be null" is fine here. if it is null, the len will be 0.
|
||||
return @as([*]u8, @ptrFromInt(@as(usize, @intCast(this.ptr))))[0..this.len];
|
||||
}
|
||||
|
||||
pub inline fn sliceAssumeZ(this: anytype) [:0]const u8 {
|
||||
@setRuntimeSafety(false); // "cast causes pointer to be null" is fine here. if it is null, the len will be 0.
|
||||
return @as([*:0]u8, @ptrFromInt(@as(usize, @intCast(this.ptr))))[0..this.len :0];
|
||||
}
|
||||
|
||||
pub inline fn init(str: []const u8) @This() {
|
||||
@setRuntimeSafety(false); // "cast causes pointer to be null" is fine here. if it is null, the len will be 0.
|
||||
|
||||
return .{
|
||||
.ptr = @as(PointerIntType, @truncate(@intFromPtr(str.ptr))),
|
||||
.len = @as(PathInt, @truncate(str.len)),
|
||||
};
|
||||
}
|
||||
|
||||
pub inline fn isEmpty(this: anytype) bool {
|
||||
return this.len == 0;
|
||||
}
|
||||
|
||||
pub fn format(self: PathString, comptime _: []const u8, _: std.fmt.FormatOptions, writer: anytype) !void {
|
||||
try writer.writeAll(self.slice());
|
||||
}
|
||||
|
||||
pub const empty = @This(){ .ptr = 0, .len = 0 };
|
||||
comptime {
|
||||
if (!bun.Environment.isWasm) {
|
||||
if (use_small_path_string and @bitSizeOf(@This()) != 64) {
|
||||
@compileError("PathString must be 64 bits");
|
||||
} else if (!use_small_path_string and @bitSizeOf(@This()) != 128) {
|
||||
@compileError("PathString must be 128 bits");
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
208
src/string/SmolStr.zig
Normal file
208
src/string/SmolStr.zig
Normal file
@@ -0,0 +1,208 @@
|
||||
const std = @import("std");
|
||||
const BabyList = @import("../baby_list.zig").BabyList;
|
||||
const Allocator = std.mem.Allocator;
|
||||
const assert = std.debug.assert;
|
||||
|
||||
/// This is a string type that stores up to 15 bytes inline on the stack, and heap allocates if it is longer
|
||||
pub const SmolStr = packed struct {
|
||||
__len: u32,
|
||||
cap: u32,
|
||||
__ptr: [*]u8,
|
||||
|
||||
const Tag: usize = 0x8000000000000000;
|
||||
const NegatedTag: usize = ~Tag;
|
||||
|
||||
pub fn jsonStringify(self: *const SmolStr, writer: anytype) !void {
|
||||
try writer.write(self.slice());
|
||||
}
|
||||
|
||||
pub const Inlined = packed struct {
|
||||
data: u120,
|
||||
__len: u7,
|
||||
_tag: u1,
|
||||
|
||||
pub fn len(this: Inlined) u8 {
|
||||
return @intCast(this.__len);
|
||||
}
|
||||
|
||||
pub fn setLen(this: *Inlined, new_len: u7) void {
|
||||
this.__len = new_len;
|
||||
}
|
||||
|
||||
pub fn slice(this: *Inlined) []const u8 {
|
||||
return this.allChars()[0..this.__len];
|
||||
}
|
||||
|
||||
pub fn allChars(this: *Inlined) *[15]u8 {
|
||||
return @as([*]u8, @ptrCast(@as(*u128, @ptrCast(this))))[0..15];
|
||||
}
|
||||
};
|
||||
|
||||
comptime {
|
||||
assert(@sizeOf(SmolStr) == @sizeOf(Inlined));
|
||||
}
|
||||
|
||||
pub fn empty() SmolStr {
|
||||
const inlined = Inlined{
|
||||
.data = 0,
|
||||
.__len = 0,
|
||||
._tag = 1,
|
||||
};
|
||||
return SmolStr.fromInlined(inlined);
|
||||
}
|
||||
|
||||
pub fn len(this: *const SmolStr) u32 {
|
||||
if (this.isInlined()) {
|
||||
return @intCast((@intFromPtr(this.__ptr) >> 56) & 0b01111111);
|
||||
}
|
||||
|
||||
return this.__len;
|
||||
}
|
||||
|
||||
pub fn ptr(this: *SmolStr) [*]u8 {
|
||||
return @ptrFromInt(@as(usize, @intFromPtr(this.__ptr)) & NegatedTag);
|
||||
}
|
||||
|
||||
pub fn ptrConst(this: *const SmolStr) [*]const u8 {
|
||||
return @ptrFromInt(@as(usize, @intFromPtr(this.__ptr)) & NegatedTag);
|
||||
}
|
||||
|
||||
pub fn markInlined(this: *SmolStr) void {
|
||||
this.__ptr = @ptrFromInt(@as(usize, @intFromPtr(this.__ptr)) | Tag);
|
||||
}
|
||||
|
||||
pub fn markHeap(this: *SmolStr) void {
|
||||
this.__ptr = @ptrFromInt(@as(usize, @intFromPtr(this.__ptr)) & NegatedTag);
|
||||
}
|
||||
|
||||
pub fn isInlined(this: *const SmolStr) bool {
|
||||
return @as(usize, @intFromPtr(this.__ptr)) & Tag != 0;
|
||||
}
|
||||
|
||||
pub fn toInlined(this: *const SmolStr) Inlined {
|
||||
var inlined: Inlined = @bitCast(@as(u128, @bitCast(this.*)));
|
||||
inlined._tag = 1;
|
||||
return inlined;
|
||||
}
|
||||
|
||||
pub fn fromBabyList(baby_list: BabyList(u8)) SmolStr {
|
||||
var smol_str: SmolStr = .{
|
||||
.__len = baby_list.len,
|
||||
.cap = baby_list.cap,
|
||||
.__ptr = baby_list.ptr,
|
||||
};
|
||||
smol_str.markHeap();
|
||||
return smol_str;
|
||||
}
|
||||
|
||||
pub fn fromInlined(inlined: Inlined) SmolStr {
|
||||
var smol_str: SmolStr = @bitCast(inlined);
|
||||
smol_str.markInlined();
|
||||
return smol_str;
|
||||
}
|
||||
|
||||
pub fn fromChar(char: u8) SmolStr {
|
||||
var inlined = Inlined{
|
||||
.data = 0,
|
||||
.__len = 1,
|
||||
._tag = 1,
|
||||
};
|
||||
|
||||
inlined.allChars()[0] = char;
|
||||
inlined.setLen(1);
|
||||
|
||||
return SmolStr.fromInlined(inlined);
|
||||
}
|
||||
|
||||
pub fn fromSlice(allocator: Allocator, values: []const u8) Allocator.Error!SmolStr {
|
||||
if (values.len > 15) {
|
||||
var baby_list = try BabyList(u8).initCapacity(allocator, values.len);
|
||||
baby_list.appendSliceAssumeCapacity(values);
|
||||
return SmolStr.fromBabyList(baby_list);
|
||||
}
|
||||
|
||||
var inlined = Inlined{
|
||||
.data = 0,
|
||||
.__len = 0,
|
||||
._tag = 1,
|
||||
};
|
||||
|
||||
if (values.len > 0) {
|
||||
@memcpy(inlined.allChars()[0..values.len], values[0..values.len]);
|
||||
inlined.setLen(@intCast(values.len));
|
||||
}
|
||||
|
||||
return SmolStr.fromInlined(inlined);
|
||||
}
|
||||
|
||||
pub fn slice(this: *const SmolStr) []const u8 {
|
||||
if (this.isInlined()) {
|
||||
const bytes: [*]const u8 = @ptrCast(this);
|
||||
return bytes[0..this.len()];
|
||||
}
|
||||
return this.ptrConst()[0..this.__len];
|
||||
}
|
||||
|
||||
pub fn appendChar(this: *SmolStr, allocator: Allocator, char: u8) Allocator.Error!void {
|
||||
if (this.isInlined()) {
|
||||
var inlined = this.toInlined();
|
||||
if (inlined.len() + 1 > 15) {
|
||||
var baby_list = try BabyList(u8).initCapacity(allocator, inlined.len() + 1);
|
||||
baby_list.appendSliceAssumeCapacity(inlined.slice());
|
||||
try baby_list.push(allocator, char);
|
||||
// this.* = SmolStr.fromBabyList(baby_list);
|
||||
this.__len = baby_list.len;
|
||||
this.__ptr = baby_list.ptr;
|
||||
this.cap = baby_list.cap;
|
||||
this.markHeap();
|
||||
return;
|
||||
}
|
||||
inlined.allChars()[inlined.len()] = char;
|
||||
inlined.setLen(@intCast(inlined.len() + 1));
|
||||
// this.* = SmolStr.fromInlined(inlined);
|
||||
this.* = @bitCast(inlined);
|
||||
this.markInlined();
|
||||
return;
|
||||
}
|
||||
|
||||
var baby_list = BabyList(u8){
|
||||
.ptr = this.ptr(),
|
||||
.len = this.__len,
|
||||
.cap = this.cap,
|
||||
};
|
||||
try baby_list.push(allocator, char);
|
||||
|
||||
// this.* = SmolStr.fromBabyList(baby_list);
|
||||
this.__len = baby_list.len;
|
||||
this.__ptr = baby_list.ptr;
|
||||
this.cap = baby_list.cap;
|
||||
return;
|
||||
}
|
||||
|
||||
pub fn appendSlice(this: *SmolStr, allocator: Allocator, values: []const u8) Allocator.Error!void {
|
||||
if (this.isInlined()) {
|
||||
var inlined = this.toInlined();
|
||||
if (inlined.len() + values.len > 15) {
|
||||
var baby_list = try BabyList(u8).initCapacity(allocator, inlined.len() + values.len);
|
||||
baby_list.appendSliceAssumeCapacity(inlined.slice());
|
||||
baby_list.appendSliceAssumeCapacity(values);
|
||||
this.* = SmolStr.fromBabyList(baby_list);
|
||||
return;
|
||||
}
|
||||
@memcpy(inlined.allChars()[inlined.len() .. inlined.len() + values.len], values);
|
||||
inlined.setLen(@intCast(inlined.len() + values.len));
|
||||
this.* = SmolStr.fromInlined(inlined);
|
||||
return;
|
||||
}
|
||||
|
||||
var baby_list = BabyList(u8){
|
||||
.ptr = this.ptr(),
|
||||
.len = this.__len,
|
||||
.cap = this.cap,
|
||||
};
|
||||
try baby_list.append(allocator, values);
|
||||
|
||||
this.* = SmolStr.fromBabyList(baby_list);
|
||||
return;
|
||||
}
|
||||
};
|
||||
244
src/string/StringBuilder.zig
Normal file
244
src/string/StringBuilder.zig
Normal file
@@ -0,0 +1,244 @@
|
||||
const StringBuilder = @This();
|
||||
const std = @import("std");
|
||||
const bun = @import("root").bun;
|
||||
const Allocator = std.mem.Allocator;
|
||||
const Environment = bun.Environment;
|
||||
const assert = bun.assert;
|
||||
|
||||
const DebugHashTable = if (Environment.allow_assert) std.AutoHashMapUnmanaged(u64, void) else void;
|
||||
|
||||
len: usize = 0,
|
||||
cap: usize = 0,
|
||||
ptr: ?[*]u8 = null,
|
||||
|
||||
pub fn initCapacity(
|
||||
allocator: std.mem.Allocator,
|
||||
cap: usize,
|
||||
) Allocator.Error!StringBuilder {
|
||||
return StringBuilder{
|
||||
.cap = cap,
|
||||
.len = 0,
|
||||
.ptr = (try allocator.alloc(u8, cap)).ptr,
|
||||
};
|
||||
}
|
||||
|
||||
pub fn countZ(this: *StringBuilder, slice: []const u8) void {
|
||||
this.cap += slice.len + 1;
|
||||
}
|
||||
|
||||
pub fn count(this: *StringBuilder, slice: []const u8) void {
|
||||
this.cap += slice.len;
|
||||
}
|
||||
|
||||
pub fn allocate(this: *StringBuilder, allocator: Allocator) Allocator.Error!void {
|
||||
const slice = try allocator.alloc(u8, this.cap);
|
||||
this.ptr = slice.ptr;
|
||||
this.len = 0;
|
||||
}
|
||||
|
||||
pub fn deinit(this: *StringBuilder, allocator: Allocator) void {
|
||||
if (this.ptr == null or this.cap == 0) return;
|
||||
allocator.free(this.ptr.?[0..this.cap]);
|
||||
}
|
||||
|
||||
pub fn count16(this: *StringBuilder, slice: []const u16) void {
|
||||
const result = bun.simdutf.length.utf8.from.utf16.le(slice);
|
||||
this.cap += result;
|
||||
}
|
||||
|
||||
pub fn count16Z(this: *StringBuilder, slice: [:0]const u16) void {
|
||||
const result = bun.strings.elementLengthUTF16IntoUTF8([:0]const u16, slice);
|
||||
this.cap += result + 1;
|
||||
}
|
||||
|
||||
pub fn append16(this: *StringBuilder, slice: []const u16, fallback_allocator: std.mem.Allocator) ?[:0]u8 {
|
||||
var buf = this.writable();
|
||||
if (slice.len == 0) {
|
||||
buf[0] = 0;
|
||||
this.len += 1;
|
||||
return buf[0..0 :0];
|
||||
}
|
||||
|
||||
const result = bun.simdutf.convert.utf16.to.utf8.with_errors.le(slice, buf);
|
||||
if (result.status == .success) {
|
||||
this.len += result.count + 1;
|
||||
buf[result.count] = 0;
|
||||
return buf[0..result.count :0];
|
||||
} else {
|
||||
var list = std.ArrayList(u8).init(fallback_allocator);
|
||||
var out = bun.strings.toUTF8ListWithTypeBun(&list, []const u16, slice, false) catch return null;
|
||||
out.append(0) catch return null;
|
||||
return list.items[0 .. list.items.len - 1 :0];
|
||||
}
|
||||
}
|
||||
|
||||
pub fn appendZ(this: *StringBuilder, slice: []const u8) [:0]const u8 {
|
||||
if (comptime Environment.allow_assert) {
|
||||
assert(this.len + 1 <= this.cap); // didn't count everything
|
||||
assert(this.ptr != null); // must call allocate first
|
||||
}
|
||||
|
||||
bun.copy(u8, this.ptr.?[this.len..this.cap], slice);
|
||||
this.ptr.?[this.len + slice.len] = 0;
|
||||
const result = this.ptr.?[this.len..this.cap][0..slice.len :0];
|
||||
this.len += slice.len + 1;
|
||||
|
||||
if (comptime Environment.allow_assert) assert(this.len <= this.cap);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
pub fn appendStr(this: *StringBuilder, str: bun.String) []const u8 {
|
||||
const slice = str.toUTF8(bun.default_allocator);
|
||||
defer slice.deinit();
|
||||
return this.append(slice.slice());
|
||||
}
|
||||
|
||||
pub fn append(this: *StringBuilder, slice: []const u8) []const u8 {
|
||||
if (comptime Environment.allow_assert) {
|
||||
assert(this.len <= this.cap); // didn't count everything
|
||||
assert(this.ptr != null); // must call allocate first
|
||||
}
|
||||
|
||||
bun.copy(u8, this.ptr.?[this.len..this.cap], slice);
|
||||
const result = this.ptr.?[this.len..this.cap][0..slice.len];
|
||||
this.len += slice.len;
|
||||
|
||||
if (comptime Environment.allow_assert) assert(this.len <= this.cap);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
pub fn addConcat(this: *StringBuilder, slices: []const []const u8) bun.StringPointer {
|
||||
var remain = this.allocatedSlice()[this.len..];
|
||||
var len: usize = 0;
|
||||
for (slices) |slice| {
|
||||
@memcpy(remain[0..slice.len], slice);
|
||||
remain = remain[slice.len..];
|
||||
len += slice.len;
|
||||
}
|
||||
return this.add(len);
|
||||
}
|
||||
|
||||
pub fn add(this: *StringBuilder, len: usize) bun.StringPointer {
|
||||
if (comptime Environment.allow_assert) {
|
||||
assert(this.len <= this.cap); // didn't count everything
|
||||
assert(this.ptr != null); // must call allocate first
|
||||
}
|
||||
|
||||
const start = this.len;
|
||||
this.len += len;
|
||||
|
||||
if (comptime Environment.allow_assert) assert(this.len <= this.cap);
|
||||
|
||||
return bun.StringPointer{ .offset = @as(u32, @truncate(start)), .length = @as(u32, @truncate(len)) };
|
||||
}
|
||||
pub fn appendCount(this: *StringBuilder, slice: []const u8) bun.StringPointer {
|
||||
if (comptime Environment.allow_assert) {
|
||||
assert(this.len <= this.cap); // didn't count everything
|
||||
assert(this.ptr != null); // must call allocate first
|
||||
}
|
||||
|
||||
const start = this.len;
|
||||
bun.copy(u8, this.ptr.?[this.len..this.cap], slice);
|
||||
const result = this.ptr.?[this.len..this.cap][0..slice.len];
|
||||
_ = result;
|
||||
this.len += slice.len;
|
||||
|
||||
if (comptime Environment.allow_assert) assert(this.len <= this.cap);
|
||||
|
||||
return bun.StringPointer{ .offset = @as(u32, @truncate(start)), .length = @as(u32, @truncate(slice.len)) };
|
||||
}
|
||||
|
||||
pub fn appendCountZ(this: *StringBuilder, slice: []const u8) bun.StringPointer {
|
||||
if (comptime Environment.allow_assert) {
|
||||
assert(this.len <= this.cap); // didn't count everything
|
||||
assert(this.ptr != null); // must call allocate first
|
||||
}
|
||||
|
||||
const start = this.len;
|
||||
bun.copy(u8, this.ptr.?[this.len..this.cap], slice);
|
||||
this.ptr.?[this.len + slice.len] = 0;
|
||||
const result = this.ptr.?[this.len..this.cap][0..slice.len];
|
||||
_ = result;
|
||||
this.len += slice.len;
|
||||
this.len += 1;
|
||||
|
||||
if (comptime Environment.allow_assert) assert(this.len <= this.cap);
|
||||
|
||||
return bun.StringPointer{ .offset = @as(u32, @truncate(start)), .length = @as(u32, @truncate(slice.len)) };
|
||||
}
|
||||
|
||||
pub fn fmt(this: *StringBuilder, comptime str: []const u8, args: anytype) []const u8 {
|
||||
if (comptime Environment.allow_assert) {
|
||||
assert(this.len <= this.cap); // didn't count everything
|
||||
assert(this.ptr != null); // must call allocate first
|
||||
}
|
||||
|
||||
const buf = this.ptr.?[this.len..this.cap];
|
||||
const out = std.fmt.bufPrint(buf, str, args) catch unreachable;
|
||||
this.len += out.len;
|
||||
|
||||
if (comptime Environment.allow_assert) assert(this.len <= this.cap);
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
pub fn fmtAppendCount(this: *StringBuilder, comptime str: []const u8, args: anytype) bun.StringPointer {
|
||||
if (comptime Environment.allow_assert) {
|
||||
assert(this.len <= this.cap); // didn't count everything
|
||||
assert(this.ptr != null); // must call allocate first
|
||||
}
|
||||
|
||||
const buf = this.ptr.?[this.len..this.cap];
|
||||
const out = std.fmt.bufPrint(buf, str, args) catch unreachable;
|
||||
const off = this.len;
|
||||
this.len += out.len;
|
||||
|
||||
if (comptime Environment.allow_assert) assert(this.len <= this.cap);
|
||||
|
||||
return bun.StringPointer{
|
||||
.offset = @as(u32, @truncate(off)),
|
||||
.length = @as(u32, @truncate(out.len)),
|
||||
};
|
||||
}
|
||||
|
||||
pub fn fmtAppendCountZ(this: *StringBuilder, comptime str: []const u8, args: anytype) bun.StringPointer {
|
||||
if (comptime Environment.allow_assert) {
|
||||
assert(this.len <= this.cap); // didn't count everything
|
||||
assert(this.ptr != null); // must call allocate first
|
||||
}
|
||||
|
||||
const buf = this.ptr.?[this.len..this.cap];
|
||||
const out = std.fmt.bufPrintZ(buf, str, args) catch unreachable;
|
||||
const off = this.len;
|
||||
this.len += out.len;
|
||||
this.len += 1;
|
||||
|
||||
if (comptime Environment.allow_assert) assert(this.len <= this.cap);
|
||||
|
||||
return bun.StringPointer{
|
||||
.offset = @as(u32, @truncate(off)),
|
||||
.length = @as(u32, @truncate(out.len)),
|
||||
};
|
||||
}
|
||||
|
||||
pub fn fmtCount(this: *StringBuilder, comptime str: []const u8, args: anytype) void {
|
||||
this.cap += std.fmt.count(str, args);
|
||||
}
|
||||
|
||||
pub fn allocatedSlice(this: *StringBuilder) []u8 {
|
||||
var ptr = this.ptr orelse return &[_]u8{};
|
||||
if (comptime Environment.allow_assert) {
|
||||
assert(this.cap > 0);
|
||||
}
|
||||
return ptr[0..this.cap];
|
||||
}
|
||||
|
||||
pub fn writable(this: *StringBuilder) []u8 {
|
||||
var ptr = this.ptr orelse return &[_]u8{};
|
||||
if (comptime Environment.allow_assert) {
|
||||
assert(this.cap > 0);
|
||||
}
|
||||
return ptr[this.len..this.cap];
|
||||
}
|
||||
164
src/string/StringJoiner.zig
Normal file
164
src/string/StringJoiner.zig
Normal file
@@ -0,0 +1,164 @@
|
||||
//! Rope-like data structure for joining many small strings into one big string.
|
||||
//! Implemented as a linked list of potentially-owned slices and a length.
|
||||
const StringJoiner = @This();
|
||||
const std = @import("std");
|
||||
const default_allocator = bun.default_allocator;
|
||||
const bun = @import("root").bun;
|
||||
const Allocator = std.mem.Allocator;
|
||||
const NullableAllocator = bun.NullableAllocator;
|
||||
const assert = bun.assert;
|
||||
|
||||
/// Temporary allocator used for nodes and duplicated strings.
|
||||
/// It is recommended to use a stack-fallback allocator for this.
|
||||
allocator: Allocator,
|
||||
|
||||
/// Total length of all nodes
|
||||
len: usize = 0,
|
||||
|
||||
head: ?*Node = null,
|
||||
tail: ?*Node = null,
|
||||
|
||||
/// Avoid an extra pass over the list when joining
|
||||
watcher: Watcher = .{},
|
||||
|
||||
const Node = struct {
|
||||
allocator: NullableAllocator = .{},
|
||||
slice: []const u8 = "",
|
||||
next: ?*Node = null,
|
||||
|
||||
pub fn init(joiner_alloc: Allocator, slice: []const u8, slice_alloc: ?Allocator) *Node {
|
||||
const node = joiner_alloc.create(Node) catch bun.outOfMemory();
|
||||
node.* = .{
|
||||
.slice = slice,
|
||||
.allocator = NullableAllocator.init(slice_alloc),
|
||||
};
|
||||
return node;
|
||||
}
|
||||
|
||||
pub fn deinit(node: *Node, joiner_alloc: Allocator) void {
|
||||
node.allocator.free(node.slice);
|
||||
joiner_alloc.destroy(node);
|
||||
}
|
||||
};
|
||||
|
||||
pub const Watcher = struct {
|
||||
input: []const u8 = "",
|
||||
estimated_count: u32 = 0,
|
||||
needs_newline: bool = false,
|
||||
};
|
||||
|
||||
/// `data` is expected to live until `.done` is called
|
||||
pub fn pushStatic(this: *StringJoiner, data: []const u8) void {
|
||||
this.push(data, null);
|
||||
}
|
||||
|
||||
/// `data` is cloned
|
||||
pub fn pushCloned(this: *StringJoiner, data: []const u8) void {
|
||||
if (data.len == 0) return;
|
||||
this.push(
|
||||
this.allocator.dupe(u8, data) catch bun.outOfMemory(),
|
||||
this.allocator,
|
||||
);
|
||||
}
|
||||
|
||||
pub fn push(this: *StringJoiner, data: []const u8, allocator: ?Allocator) void {
|
||||
if (data.len == 0) return;
|
||||
this.len += data.len;
|
||||
|
||||
const new_tail = Node.init(this.allocator, data, allocator);
|
||||
|
||||
if (data.len > 0) {
|
||||
this.watcher.estimated_count += @intFromBool(
|
||||
this.watcher.input.len > 0 and
|
||||
bun.strings.contains(data, this.watcher.input),
|
||||
);
|
||||
this.watcher.needs_newline = data[data.len - 1] != '\n';
|
||||
}
|
||||
|
||||
if (this.tail) |current_tail| {
|
||||
current_tail.next = new_tail;
|
||||
} else {
|
||||
assert(this.head == null);
|
||||
this.head = new_tail;
|
||||
}
|
||||
this.tail = new_tail;
|
||||
}
|
||||
|
||||
/// This deinits the string joiner on success, the new string is owned by `allocator`
|
||||
pub fn done(this: *StringJoiner, allocator: Allocator) ![]u8 {
|
||||
var current: ?*Node = this.head orelse {
|
||||
assert(this.tail == null);
|
||||
assert(this.len == 0);
|
||||
return &.{};
|
||||
};
|
||||
|
||||
const slice = try allocator.alloc(u8, this.len);
|
||||
|
||||
var remaining = slice;
|
||||
while (current) |node| {
|
||||
@memcpy(remaining[0..node.slice.len], node.slice);
|
||||
remaining = remaining[node.slice.len..];
|
||||
|
||||
const prev = node;
|
||||
current = node.next;
|
||||
prev.deinit(this.allocator);
|
||||
}
|
||||
|
||||
bun.assert(remaining.len == 0);
|
||||
|
||||
return slice;
|
||||
}
|
||||
|
||||
/// Same as `.done`, but appends extra slice `end`
|
||||
pub fn doneWithEnd(this: *StringJoiner, allocator: Allocator, end: []const u8) ![]u8 {
|
||||
var current: ?*Node = this.head orelse {
|
||||
assert(this.tail == null);
|
||||
assert(this.len == 0);
|
||||
|
||||
if (end.len > 0) {
|
||||
return allocator.dupe(u8, end);
|
||||
}
|
||||
|
||||
return &.{};
|
||||
};
|
||||
|
||||
const slice = try allocator.alloc(u8, this.len + end.len);
|
||||
|
||||
var remaining = slice;
|
||||
while (current) |node| {
|
||||
@memcpy(remaining[0..node.slice.len], node.slice);
|
||||
remaining = remaining[node.slice.len..];
|
||||
|
||||
const prev = node;
|
||||
current = node.next;
|
||||
prev.deinit(this.allocator);
|
||||
}
|
||||
|
||||
bun.assert(remaining.len == end.len);
|
||||
@memcpy(remaining, end);
|
||||
|
||||
return slice;
|
||||
}
|
||||
|
||||
pub fn lastByte(this: *const StringJoiner) u8 {
|
||||
const slice = (this.tail orelse return 0).slice;
|
||||
assert(slice.len > 0);
|
||||
return slice[slice.len - 1];
|
||||
}
|
||||
|
||||
pub fn ensureNewlineAtEnd(this: *StringJoiner) void {
|
||||
if (this.watcher.needs_newline) {
|
||||
this.watcher.needs_newline = false;
|
||||
this.pushStatic("\n");
|
||||
}
|
||||
}
|
||||
|
||||
pub fn contains(this: *const StringJoiner, slice: []const u8) bool {
|
||||
var el = this.head;
|
||||
while (el) |node| {
|
||||
el = node.next;
|
||||
if (bun.strings.contains(node.slice, slice)) return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
268
src/string/WTFStringImpl.zig
Normal file
268
src/string/WTFStringImpl.zig
Normal file
@@ -0,0 +1,268 @@
|
||||
const std = @import("std");
|
||||
const bun = @import("root").bun;
|
||||
const JSC = bun.JSC;
|
||||
const OOM = bun.OOM;
|
||||
|
||||
pub const WTFStringImpl = *WTFStringImplStruct;
|
||||
const ZigString = bun.JSC.ZigString;
|
||||
|
||||
pub const WTFStringImplStruct = extern struct {
|
||||
m_refCount: u32 = 0,
|
||||
m_length: u32 = 0,
|
||||
m_ptr: extern union { latin1: [*]const u8, utf16: [*]const u16 },
|
||||
m_hashAndFlags: u32 = 0,
|
||||
|
||||
// ---------------------------------------------------------------------
|
||||
// These details must stay in sync with WTFStringImpl.h in WebKit!
|
||||
// ---------------------------------------------------------------------
|
||||
const s_flagCount: u32 = 8;
|
||||
|
||||
const s_flagMask: u32 = (1 << s_flagCount) - 1;
|
||||
const s_flagStringKindCount: u32 = 4;
|
||||
const s_hashZeroValue: u32 = 0;
|
||||
const s_hashFlagStringKindIsAtom: u32 = @as(1, u32) << (s_flagStringKindCount);
|
||||
const s_hashFlagStringKindIsSymbol: u32 = @as(1, u32) << (s_flagStringKindCount + 1);
|
||||
const s_hashMaskStringKind: u32 = s_hashFlagStringKindIsAtom | s_hashFlagStringKindIsSymbol;
|
||||
const s_hashFlagDidReportCost: u32 = @as(1, u32) << 3;
|
||||
const s_hashFlag8BitBuffer: u32 = 1 << 2;
|
||||
const s_hashMaskBufferOwnership: u32 = (1 << 0) | (1 << 1);
|
||||
|
||||
/// The bottom bit in the ref count indicates a static (immortal) string.
|
||||
const s_refCountFlagIsStaticString = 0x1;
|
||||
|
||||
/// This allows us to ref / deref without disturbing the static string flag.
|
||||
const s_refCountIncrement = 0x2;
|
||||
|
||||
// ---------------------------------------------------------------------
|
||||
|
||||
pub fn refCount(this: WTFStringImpl) u32 {
|
||||
return this.m_refCount / s_refCountIncrement;
|
||||
}
|
||||
|
||||
pub fn memoryCost(this: WTFStringImpl) usize {
|
||||
return this.byteLength();
|
||||
}
|
||||
|
||||
pub fn isStatic(this: WTFStringImpl) bool {
|
||||
return this.m_refCount & s_refCountIncrement != 0;
|
||||
}
|
||||
|
||||
pub fn byteLength(this: WTFStringImpl) usize {
|
||||
return if (this.is8Bit()) this.m_length else this.m_length * 2;
|
||||
}
|
||||
|
||||
extern fn WTFStringImpl__isThreadSafe(WTFStringImpl) bool;
|
||||
pub fn isThreadSafe(this: WTFStringImpl) bool {
|
||||
return WTFStringImpl__isThreadSafe(this);
|
||||
}
|
||||
|
||||
pub fn byteSlice(this: WTFStringImpl) []const u8 {
|
||||
return this.m_ptr.latin1[0..this.byteLength()];
|
||||
}
|
||||
|
||||
pub inline fn is8Bit(self: WTFStringImpl) bool {
|
||||
return (self.m_hashAndFlags & s_hashFlag8BitBuffer) != 0;
|
||||
}
|
||||
|
||||
pub inline fn length(self: WTFStringImpl) u32 {
|
||||
return self.m_length;
|
||||
}
|
||||
|
||||
pub inline fn utf16Slice(self: WTFStringImpl) []const u16 {
|
||||
bun.assert(!is8Bit(self));
|
||||
return self.m_ptr.utf16[0..length(self)];
|
||||
}
|
||||
|
||||
pub inline fn latin1Slice(self: WTFStringImpl) []const u8 {
|
||||
bun.assert(is8Bit(self));
|
||||
return self.m_ptr.latin1[0..length(self)];
|
||||
}
|
||||
|
||||
/// Caller must ensure that the string is 8-bit and ASCII.
|
||||
pub inline fn utf8Slice(self: WTFStringImpl) []const u8 {
|
||||
if (comptime bun.Environment.allow_assert)
|
||||
bun.assert(canUseAsUTF8(self));
|
||||
return self.m_ptr.latin1[0..length(self)];
|
||||
}
|
||||
|
||||
pub fn toZigString(this: WTFStringImpl) ZigString {
|
||||
if (this.is8Bit()) {
|
||||
return ZigString.init(this.latin1Slice());
|
||||
} else {
|
||||
return ZigString.initUTF16(this.utf16Slice());
|
||||
}
|
||||
}
|
||||
|
||||
pub inline fn deref(self: WTFStringImpl) void {
|
||||
JSC.markBinding(@src());
|
||||
const current_count = self.refCount();
|
||||
bun.assert(current_count > 0);
|
||||
Bun__WTFStringImpl__deref(self);
|
||||
if (comptime bun.Environment.allow_assert) {
|
||||
if (current_count > 1) {
|
||||
bun.assert(self.refCount() < current_count or self.isStatic());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub inline fn ref(self: WTFStringImpl) void {
|
||||
JSC.markBinding(@src());
|
||||
const current_count = self.refCount();
|
||||
bun.assert(current_count > 0);
|
||||
Bun__WTFStringImpl__ref(self);
|
||||
bun.assert(self.refCount() > current_count or self.isStatic());
|
||||
}
|
||||
|
||||
pub fn toLatin1Slice(this: WTFStringImpl) ZigString.Slice {
|
||||
this.ref();
|
||||
return ZigString.Slice.init(this.refCountAllocator(), this.latin1Slice());
|
||||
}
|
||||
|
||||
extern fn Bun__WTFStringImpl__ensureHash(this: WTFStringImpl) void;
|
||||
/// Compute the hash() if necessary
|
||||
pub fn ensureHash(this: WTFStringImpl) void {
|
||||
JSC.markBinding(@src());
|
||||
Bun__WTFStringImpl__ensureHash(this);
|
||||
}
|
||||
|
||||
pub fn toUTF8(this: WTFStringImpl, allocator: std.mem.Allocator) ZigString.Slice {
|
||||
if (this.is8Bit()) {
|
||||
if (bun.strings.toUTF8FromLatin1(allocator, this.latin1Slice()) catch bun.outOfMemory()) |utf8| {
|
||||
return ZigString.Slice.init(allocator, utf8.items);
|
||||
}
|
||||
|
||||
return this.toLatin1Slice();
|
||||
}
|
||||
|
||||
return ZigString.Slice.init(
|
||||
allocator,
|
||||
bun.strings.toUTF8Alloc(allocator, this.utf16Slice()) catch bun.outOfMemory(),
|
||||
);
|
||||
}
|
||||
|
||||
pub const max = std.math.maxInt(u32);
|
||||
|
||||
pub fn toUTF8WithoutRef(this: WTFStringImpl, allocator: std.mem.Allocator) ZigString.Slice {
|
||||
if (this.is8Bit()) {
|
||||
if (bun.strings.toUTF8FromLatin1(allocator, this.latin1Slice()) catch bun.outOfMemory()) |utf8| {
|
||||
return ZigString.Slice.init(allocator, utf8.items);
|
||||
}
|
||||
|
||||
return ZigString.Slice.fromUTF8NeverFree(this.latin1Slice());
|
||||
}
|
||||
|
||||
return ZigString.Slice.init(
|
||||
allocator,
|
||||
bun.strings.toUTF8Alloc(allocator, this.utf16Slice()) catch bun.outOfMemory(),
|
||||
);
|
||||
}
|
||||
|
||||
pub fn toOwnedSliceZ(this: WTFStringImpl, allocator: std.mem.Allocator) [:0]u8 {
|
||||
if (this.is8Bit()) {
|
||||
if (bun.strings.toUTF8FromLatin1Z(allocator, this.latin1Slice()) catch bun.outOfMemory()) |utf8| {
|
||||
return utf8.items[0 .. utf8.items.len - 1 :0];
|
||||
}
|
||||
|
||||
return allocator.dupeZ(u8, this.latin1Slice()) catch bun.outOfMemory();
|
||||
}
|
||||
return bun.strings.toUTF8AllocZ(allocator, this.utf16Slice()) catch bun.outOfMemory();
|
||||
}
|
||||
|
||||
pub fn toUTF8IfNeeded(this: WTFStringImpl, allocator: std.mem.Allocator) ?ZigString.Slice {
|
||||
if (this.is8Bit()) {
|
||||
if (bun.strings.toUTF8FromLatin1(allocator, this.latin1Slice()) catch bun.outOfMemory()) |utf8| {
|
||||
return ZigString.Slice.init(allocator, utf8.items);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
return ZigString.Slice.init(
|
||||
allocator,
|
||||
bun.strings.toUTF8Alloc(allocator, this.utf16Slice()) catch bun.outOfMemory(),
|
||||
);
|
||||
}
|
||||
|
||||
/// Avoid using this in code paths that are about to get the string as a UTF-8
|
||||
/// In that case, use toUTF8IfNeeded instead.
|
||||
pub fn canUseAsUTF8(this: WTFStringImpl) bool {
|
||||
return this.is8Bit() and bun.strings.isAllASCII(this.latin1Slice());
|
||||
}
|
||||
|
||||
pub fn utf16ByteLength(this: WTFStringImpl) usize {
|
||||
if (this.is8Bit()) {
|
||||
return this.length() * 2;
|
||||
} else {
|
||||
return this.length();
|
||||
}
|
||||
}
|
||||
|
||||
pub fn utf8ByteLength(this: WTFStringImpl) usize {
|
||||
if (this.is8Bit()) {
|
||||
const input = this.latin1Slice();
|
||||
return if (input.len > 0) JSC.WebCore.Encoder.byteLengthU8(input.ptr, input.len, .utf8) else 0;
|
||||
} else {
|
||||
const input = this.utf16Slice();
|
||||
return if (input.len > 0) JSC.WebCore.Encoder.byteLengthU16(input.ptr, input.len, .utf8) else 0;
|
||||
}
|
||||
}
|
||||
|
||||
pub fn latin1ByteLength(this: WTFStringImpl) usize {
|
||||
// Not all UTF-16 characters fit are representable in latin1.
|
||||
// Those get truncated?
|
||||
return this.length();
|
||||
}
|
||||
|
||||
pub fn refCountAllocator(self: WTFStringImpl) std.mem.Allocator {
|
||||
return std.mem.Allocator{ .ptr = self, .vtable = StringImplAllocator.VTablePtr };
|
||||
}
|
||||
|
||||
pub fn hasPrefix(self: WTFStringImpl, text: []const u8) bool {
|
||||
return Bun__WTFStringImpl__hasPrefix(self, text.ptr, text.len);
|
||||
}
|
||||
|
||||
extern fn Bun__WTFStringImpl__deref(self: WTFStringImpl) void;
|
||||
extern fn Bun__WTFStringImpl__ref(self: WTFStringImpl) void;
|
||||
extern fn Bun__WTFStringImpl__hasPrefix(self: *const WTFStringImplStruct, offset: [*]const u8, length: usize) bool;
|
||||
};
|
||||
|
||||
pub const StringImplAllocator = struct {
|
||||
fn alloc(ptr: *anyopaque, len: usize, _: u8, _: usize) ?[*]u8 {
|
||||
var this = bun.cast(WTFStringImpl, ptr);
|
||||
const len_ = this.byteLength();
|
||||
|
||||
if (len_ != len) {
|
||||
// we don't actually allocate, we just reference count
|
||||
return null;
|
||||
}
|
||||
|
||||
this.ref();
|
||||
|
||||
// we should never actually allocate
|
||||
return @constCast(this.m_ptr.latin1);
|
||||
}
|
||||
|
||||
fn resize(_: *anyopaque, _: []u8, _: u8, _: usize, _: usize) bool {
|
||||
return false;
|
||||
}
|
||||
|
||||
pub fn free(
|
||||
ptr: *anyopaque,
|
||||
buf: []u8,
|
||||
_: u8,
|
||||
_: usize,
|
||||
) void {
|
||||
var this = bun.cast(WTFStringImpl, ptr);
|
||||
bun.assert(this.latin1Slice().ptr == buf.ptr);
|
||||
bun.assert(this.latin1Slice().len == buf.len);
|
||||
this.deref();
|
||||
}
|
||||
|
||||
pub const VTable = std.mem.Allocator.VTable{
|
||||
.alloc = &alloc,
|
||||
.resize = &resize,
|
||||
.free = &free,
|
||||
};
|
||||
|
||||
pub const VTablePtr = &VTable;
|
||||
};
|
||||
Reference in New Issue
Block a user