diff --git a/src/baby_list.zig b/src/baby_list.zig index da4148b549..0d8be0abf6 100644 --- a/src/baby_list.zig +++ b/src/baby_list.zig @@ -174,7 +174,7 @@ pub fn BabyList(comptime Type: type) type { bun.assert(this.cap >= this.len); } - pub fn initCapacity(allocator: std.mem.Allocator, len: usize) !ListType { + pub fn initCapacity(allocator: std.mem.Allocator, len: usize) std.mem.Allocator.Error!ListType { return initWithBuffer(try allocator.alloc(Type, len)); } diff --git a/src/bun.js/node/path.zig b/src/bun.js/node/path.zig index cabfc69be5..7e41636641 100644 --- a/src/bun.js/node/path.zig +++ b/src/bun.js/node/path.zig @@ -72,7 +72,7 @@ const CHAR_STR_BACKWARD_SLASH = "\\"; const CHAR_STR_FORWARD_SLASH = "/"; const CHAR_STR_DOT = "."; -const StringBuilder = @import("../../string_builder.zig"); +const StringBuilder = bun.StringBuilder; const toJSString = JSC.JSValue.toJSString; diff --git a/src/bun.zig b/src/bun.zig index d2bc5963dd..33f5240bca 100644 --- a/src/bun.zig +++ b/src/bun.zig @@ -315,10 +315,6 @@ pub const StringTypes = @import("string_types.zig"); pub const stringZ = StringTypes.stringZ; pub const string = StringTypes.string; pub const CodePoint = StringTypes.CodePoint; -pub const PathString = StringTypes.PathString; -pub const HashedString = StringTypes.HashedString; -pub const strings = @import("string_immutable.zig"); -pub const MutableString = @import("string_mutable.zig").MutableString; pub const RefCount = @import("./ref_count.zig").RefCount; pub const MAX_PATH_BYTES: usize = if (Environment.isWasm) 1024 else std.fs.max_path_bytes; @@ -548,7 +544,7 @@ pub fn clone(item: anytype, allocator: std.mem.Allocator) !@TypeOf(item) { return try allocator.dupe(Child, item); } -pub const StringBuilder = @import("./string_builder.zig"); +pub const StringBuilder = @import("./string.zig").StringBuilder; pub const LinearFifo = @import("./linear_fifo.zig").LinearFifo; pub const linux = struct { @@ -1595,7 +1591,6 @@ pub const fast_debug_build_mode = fast_debug_build_cmd != .None and Environment.isDebug; pub const MultiArrayList = @import("./multi_array_list.zig").MultiArrayList; -pub const StringJoiner = @import("./StringJoiner.zig"); pub const NullableAllocator = @import("./allocators/NullableAllocator.zig"); pub const renamer = @import("./renamer.zig"); @@ -2089,12 +2084,18 @@ pub const zstd = @import("./deps/zstd.zig"); pub const StringPointer = Schema.Api.StringPointer; pub const StandaloneModuleGraph = @import("./StandaloneModuleGraph.zig").StandaloneModuleGraph; -pub const String = @import("./string.zig").String; -pub const SliceWithUnderlyingString = @import("./string.zig").SliceWithUnderlyingString; +const _string = @import("./string.zig"); +pub const strings = @import("string_immutable.zig"); +pub const String = _string.String; +pub const StringJoiner = _string.StringJoiner; +pub const SliceWithUnderlyingString = _string.SliceWithUnderlyingString; +pub const PathString = _string.PathString; +pub const HashedString = _string.HashedString; +pub const MutableString = _string.MutableString; pub const WTF = struct { /// The String type from WebKit's WTF library. - pub const StringImpl = @import("./string.zig").WTFStringImpl; + pub const StringImpl = _string.WTFStringImpl; }; pub const Wyhash11 = @import("./wyhash.zig").Wyhash11; diff --git a/src/http.zig b/src/http.zig index e028730b65..bffc2b3796 100644 --- a/src/http.zig +++ b/src/http.zig @@ -22,7 +22,7 @@ const Lock = bun.Mutex; const HTTPClient = @This(); const Zlib = @import("./zlib.zig"); const Brotli = bun.brotli; -const StringBuilder = @import("./string_builder.zig"); +const StringBuilder = bun.StringBuilder; const ThreadPool = bun.ThreadPool; const ObjectPool = @import("./pool.zig").ObjectPool; const posix = std.posix; diff --git a/src/http/header_builder.zig b/src/http/header_builder.zig index 247bcf1cad..965a15119f 100644 --- a/src/http/header_builder.zig +++ b/src/http/header_builder.zig @@ -1,5 +1,5 @@ const HeaderBuilder = @This(); -const StringBuilder = @import("../string_builder.zig"); +const StringBuilder = bun.StringBuilder; const Headers = bun.JSC.WebCore.Headers; const string = bun.string; const HTTPClient = @import("../http.zig"); diff --git a/src/install/install.zig b/src/install/install.zig index da7806f7eb..b7ab475375 100644 --- a/src/install/install.zig +++ b/src/install/install.zig @@ -148,7 +148,7 @@ const PatchTaskFifo = std.fifo.LinearFifo(*PatchTask, .{ .Static = 32 }); const Semver = bun.Semver; const ExternalString = Semver.ExternalString; const String = Semver.String; -const GlobalStringBuilder = @import("../string_builder.zig"); +const GlobalStringBuilder = bun.StringBuilder; const SlicedString = Semver.SlicedString; pub const Repository = @import("./repository.zig").Repository; pub const Bin = @import("./bin.zig").Bin; diff --git a/src/install/lockfile.zig b/src/install/lockfile.zig index 2b08466afb..01412d23ff 100644 --- a/src/install/lockfile.zig +++ b/src/install/lockfile.zig @@ -59,7 +59,7 @@ const ArrayIdentityContext = @import("../identity_context.zig").ArrayIdentityCon const Semver = bun.Semver; const ExternalString = Semver.ExternalString; const String = Semver.String; -const GlobalStringBuilder = @import("../string_builder.zig"); +const GlobalStringBuilder = bun.StringBuilder; const SlicedString = Semver.SlicedString; const Repository = @import("./repository.zig").Repository; const Bin = @import("./bin.zig").Bin; diff --git a/src/install/npm.zig b/src/install/npm.zig index de733109d6..c83707863f 100644 --- a/src/install/npm.zig +++ b/src/install/npm.zig @@ -1,7 +1,7 @@ const URL = @import("../url.zig").URL; const bun = @import("root").bun; const std = @import("std"); -const MutableString = @import("../string_mutable.zig").MutableString; +const MutableString = bun.MutableString; const Semver = bun.Semver; const ExternalString = Semver.ExternalString; const String = Semver.String; diff --git a/src/js_parser.zig b/src/js_parser.zig index b7a62f0864..526a172720 100644 --- a/src/js_parser.zig +++ b/src/js_parser.zig @@ -21,7 +21,7 @@ const Output = bun.Output; const Global = bun.Global; const Environment = bun.Environment; const strings = bun.strings; -const MutableString = @import("./string_mutable.zig").MutableString; +const MutableString = bun.MutableString; const stringZ = bun.stringZ; const default_allocator = bun.default_allocator; const C = bun.C; diff --git a/src/logger.zig b/src/logger.zig index 2045716991..ecb5ca5a94 100644 --- a/src/logger.zig +++ b/src/logger.zig @@ -18,7 +18,7 @@ const unicode = std.unicode; const Ref = @import("./ast/base.zig").Ref; const expect = std.testing.expect; const assert = bun.assert; -const StringBuilder = @import("./string_builder.zig"); +const StringBuilder = bun.StringBuilder; const Index = @import("./ast/base.zig").Index; const OOM = bun.OOM; const JSError = bun.JSError; diff --git a/src/shell/braces.zig b/src/shell/braces.zig index 3532be44f8..07ba07a384 100644 --- a/src/shell/braces.zig +++ b/src/shell/braces.zig @@ -4,7 +4,7 @@ const std = @import("std"); const builtin = @import("builtin"); const Arena = std.heap.ArenaAllocator; const Allocator = std.mem.Allocator; -const SmolStr = @import("../string_types.zig").SmolStr; +const SmolStr = @import("../string.zig").SmolStr; const TaggedPointerUnion = @import("../tagged_pointer.zig").TaggedPointerUnion; /// Using u16 because anymore tokens than that results in an unreasonably high diff --git a/src/string.zig b/src/string.zig index 6589bac10e..f86406d223 100644 --- a/src/string.zig +++ b/src/string.zig @@ -2,278 +2,16 @@ const std = @import("std"); const bun = @import("root").bun; const JSC = bun.JSC; const JSValue = bun.JSC.JSValue; -const Parent = @This(); const OOM = bun.OOM; -pub const BufferOwnership = enum(u32) { - BufferInternal, - BufferOwned, - BufferSubstring, - BufferExternal, -}; - -pub const WTFStringImpl = *WTFStringImplStruct; - -pub const WTFStringImplStruct = extern struct { - m_refCount: u32 = 0, - m_length: u32 = 0, - m_ptr: extern union { latin1: [*]const u8, utf16: [*]const u16 }, - m_hashAndFlags: u32 = 0, - - // --------------------------------------------------------------------- - // These details must stay in sync with WTFStringImpl.h in WebKit! - // --------------------------------------------------------------------- - const s_flagCount: u32 = 8; - - const s_flagMask: u32 = (1 << s_flagCount) - 1; - const s_flagStringKindCount: u32 = 4; - const s_hashZeroValue: u32 = 0; - const s_hashFlagStringKindIsAtom: u32 = @as(1, u32) << (s_flagStringKindCount); - const s_hashFlagStringKindIsSymbol: u32 = @as(1, u32) << (s_flagStringKindCount + 1); - const s_hashMaskStringKind: u32 = s_hashFlagStringKindIsAtom | s_hashFlagStringKindIsSymbol; - const s_hashFlagDidReportCost: u32 = @as(1, u32) << 3; - const s_hashFlag8BitBuffer: u32 = 1 << 2; - const s_hashMaskBufferOwnership: u32 = (1 << 0) | (1 << 1); - - /// The bottom bit in the ref count indicates a static (immortal) string. - const s_refCountFlagIsStaticString = 0x1; - - /// This allows us to ref / deref without disturbing the static string flag. - const s_refCountIncrement = 0x2; - - // --------------------------------------------------------------------- - - pub fn refCount(this: WTFStringImpl) u32 { - return this.m_refCount / s_refCountIncrement; - } - - pub fn memoryCost(this: WTFStringImpl) usize { - return this.byteLength(); - } - - pub fn isStatic(this: WTFStringImpl) bool { - return this.m_refCount & s_refCountIncrement != 0; - } - - pub fn byteLength(this: WTFStringImpl) usize { - return if (this.is8Bit()) this.m_length else this.m_length * 2; - } - - extern fn WTFStringImpl__isThreadSafe(WTFStringImpl) bool; - pub fn isThreadSafe(this: WTFStringImpl) bool { - return WTFStringImpl__isThreadSafe(this); - } - - pub fn byteSlice(this: WTFStringImpl) []const u8 { - return this.m_ptr.latin1[0..this.byteLength()]; - } - - pub inline fn is8Bit(self: WTFStringImpl) bool { - return (self.m_hashAndFlags & s_hashFlag8BitBuffer) != 0; - } - - pub inline fn length(self: WTFStringImpl) u32 { - return self.m_length; - } - - pub inline fn utf16Slice(self: WTFStringImpl) []const u16 { - bun.assert(!is8Bit(self)); - return self.m_ptr.utf16[0..length(self)]; - } - - pub inline fn latin1Slice(self: WTFStringImpl) []const u8 { - bun.assert(is8Bit(self)); - return self.m_ptr.latin1[0..length(self)]; - } - - /// Caller must ensure that the string is 8-bit and ASCII. - pub inline fn utf8Slice(self: WTFStringImpl) []const u8 { - if (comptime bun.Environment.allow_assert) - bun.assert(canUseAsUTF8(self)); - return self.m_ptr.latin1[0..length(self)]; - } - - pub fn toZigString(this: WTFStringImpl) ZigString { - if (this.is8Bit()) { - return ZigString.init(this.latin1Slice()); - } else { - return ZigString.initUTF16(this.utf16Slice()); - } - } - - pub inline fn deref(self: WTFStringImpl) void { - JSC.markBinding(@src()); - const current_count = self.refCount(); - bun.assert(current_count > 0); - Bun__WTFStringImpl__deref(self); - if (comptime bun.Environment.allow_assert) { - if (current_count > 1) { - bun.assert(self.refCount() < current_count or self.isStatic()); - } - } - } - - pub inline fn ref(self: WTFStringImpl) void { - JSC.markBinding(@src()); - const current_count = self.refCount(); - bun.assert(current_count > 0); - Bun__WTFStringImpl__ref(self); - bun.assert(self.refCount() > current_count or self.isStatic()); - } - - pub fn toLatin1Slice(this: WTFStringImpl) ZigString.Slice { - this.ref(); - return ZigString.Slice.init(this.refCountAllocator(), this.latin1Slice()); - } - - extern fn Bun__WTFStringImpl__ensureHash(this: WTFStringImpl) void; - /// Compute the hash() if necessary - pub fn ensureHash(this: WTFStringImpl) void { - JSC.markBinding(@src()); - Bun__WTFStringImpl__ensureHash(this); - } - - pub fn toUTF8(this: WTFStringImpl, allocator: std.mem.Allocator) ZigString.Slice { - if (this.is8Bit()) { - if (bun.strings.toUTF8FromLatin1(allocator, this.latin1Slice()) catch bun.outOfMemory()) |utf8| { - return ZigString.Slice.init(allocator, utf8.items); - } - - return this.toLatin1Slice(); - } - - return ZigString.Slice.init( - allocator, - bun.strings.toUTF8Alloc(allocator, this.utf16Slice()) catch bun.outOfMemory(), - ); - } - - pub const max = std.math.maxInt(u32); - - pub fn toUTF8WithoutRef(this: WTFStringImpl, allocator: std.mem.Allocator) ZigString.Slice { - if (this.is8Bit()) { - if (bun.strings.toUTF8FromLatin1(allocator, this.latin1Slice()) catch bun.outOfMemory()) |utf8| { - return ZigString.Slice.init(allocator, utf8.items); - } - - return ZigString.Slice.fromUTF8NeverFree(this.latin1Slice()); - } - - return ZigString.Slice.init( - allocator, - bun.strings.toUTF8Alloc(allocator, this.utf16Slice()) catch bun.outOfMemory(), - ); - } - - pub fn toOwnedSliceZ(this: WTFStringImpl, allocator: std.mem.Allocator) [:0]u8 { - if (this.is8Bit()) { - if (bun.strings.toUTF8FromLatin1Z(allocator, this.latin1Slice()) catch bun.outOfMemory()) |utf8| { - return utf8.items[0 .. utf8.items.len - 1 :0]; - } - - return allocator.dupeZ(u8, this.latin1Slice()) catch bun.outOfMemory(); - } - return bun.strings.toUTF8AllocZ(allocator, this.utf16Slice()) catch bun.outOfMemory(); - } - - pub fn toUTF8IfNeeded(this: WTFStringImpl, allocator: std.mem.Allocator) ?ZigString.Slice { - if (this.is8Bit()) { - if (bun.strings.toUTF8FromLatin1(allocator, this.latin1Slice()) catch bun.outOfMemory()) |utf8| { - return ZigString.Slice.init(allocator, utf8.items); - } - - return null; - } - - return ZigString.Slice.init( - allocator, - bun.strings.toUTF8Alloc(allocator, this.utf16Slice()) catch bun.outOfMemory(), - ); - } - - /// Avoid using this in code paths that are about to get the string as a UTF-8 - /// In that case, use toUTF8IfNeeded instead. - pub fn canUseAsUTF8(this: WTFStringImpl) bool { - return this.is8Bit() and bun.strings.isAllASCII(this.latin1Slice()); - } - - pub fn utf16ByteLength(this: WTFStringImpl) usize { - if (this.is8Bit()) { - return this.length() * 2; - } else { - return this.length(); - } - } - - pub fn utf8ByteLength(this: WTFStringImpl) usize { - if (this.is8Bit()) { - const input = this.latin1Slice(); - return if (input.len > 0) JSC.WebCore.Encoder.byteLengthU8(input.ptr, input.len, .utf8) else 0; - } else { - const input = this.utf16Slice(); - return if (input.len > 0) JSC.WebCore.Encoder.byteLengthU16(input.ptr, input.len, .utf8) else 0; - } - } - - pub fn latin1ByteLength(this: WTFStringImpl) usize { - // Not all UTF-16 characters fit are representable in latin1. - // Those get truncated? - return this.length(); - } - - pub fn refCountAllocator(self: WTFStringImpl) std.mem.Allocator { - return std.mem.Allocator{ .ptr = self, .vtable = StringImplAllocator.VTablePtr }; - } - - pub fn hasPrefix(self: WTFStringImpl, text: []const u8) bool { - return Bun__WTFStringImpl__hasPrefix(self, text.ptr, text.len); - } - - extern fn Bun__WTFStringImpl__deref(self: WTFStringImpl) void; - extern fn Bun__WTFStringImpl__ref(self: WTFStringImpl) void; - extern fn Bun__WTFStringImpl__hasPrefix(self: *const WTFStringImplStruct, offset: [*]const u8, length: usize) bool; -}; - -pub const StringImplAllocator = struct { - fn alloc(ptr: *anyopaque, len: usize, _: u8, _: usize) ?[*]u8 { - var this = bun.cast(WTFStringImpl, ptr); - const len_ = this.byteLength(); - - if (len_ != len) { - // we don't actually allocate, we just reference count - return null; - } - - this.ref(); - - // we should never actually allocate - return @constCast(this.m_ptr.latin1); - } - - fn resize(_: *anyopaque, _: []u8, _: u8, _: usize, _: usize) bool { - return false; - } - - pub fn free( - ptr: *anyopaque, - buf: []u8, - _: u8, - _: usize, - ) void { - var this = bun.cast(WTFStringImpl, ptr); - bun.assert(this.latin1Slice().ptr == buf.ptr); - bun.assert(this.latin1Slice().len == buf.len); - this.deref(); - } - - pub const VTable = std.mem.Allocator.VTable{ - .alloc = &alloc, - .resize = &resize, - .free = &free, - }; - - pub const VTablePtr = &VTable; -}; +pub const HashedString = @import("string/HashedString.zig"); +pub const MutableString = @import("string/MutableString.zig"); +pub const PathString = @import("string/PathString.zig").PathString; +pub const SmolStr = @import("string/SmolStr.zig").SmolStr; +pub const StringBuilder = @import("string/StringBuilder.zig"); +pub const StringJoiner = @import("string/StringJoiner.zig"); +pub const WTFStringImpl = @import("string/WTFStringImpl.zig").WTFStringImpl; +pub const WTFStringImplStruct = @import("string/WTFStringImpl.zig").WTFStringImplStruct; pub const Tag = enum(u8) { /// String is not valid. Observed on some failed operations. @@ -315,7 +53,7 @@ pub const String = extern struct { pub const empty = String{ .tag = .Empty, .value = .{ .Empty = {} } }; pub const dead = String{ .tag = .Dead, .value = .{ .Dead = {} } }; - pub const StringImplAllocator = Parent.StringImplAllocator; + pub const StringImplAllocator = @import("string/WTFStringImpl.zig").StringImplAllocator; extern fn BunString__fromLatin1(bytes: [*]const u8, len: usize) String; extern fn BunString__fromBytes(bytes: [*]const u8, len: usize) String; @@ -1263,7 +1001,7 @@ pub const String = extern struct { } pub fn isWTFAllocator(this: std.mem.Allocator) bool { - return this.vtable == @This().StringImplAllocator.VTablePtr; + return this.vtable == StringImplAllocator.VTablePtr; } pub fn eqlBytes(this: String, value: []const u8) bool { diff --git a/src/string/HashedString.zig b/src/string/HashedString.zig new file mode 100644 index 0000000000..f14731442a --- /dev/null +++ b/src/string/HashedString.zig @@ -0,0 +1,43 @@ +const HashedString = @This(); +const bun = @import("root").bun; + +ptr: [*]const u8, +len: u32, +hash: u32, + +pub const empty = HashedString{ .ptr = @as([*]const u8, @ptrFromInt(0xDEADBEEF)), .len = 0, .hash = 0 }; + +pub fn init(buf: []const u8) HashedString { + return HashedString{ + .ptr = buf.ptr, + .len = @as(u32, @truncate(buf.len)), + .hash = @as(u32, @truncate(bun.hash(buf))), + }; +} + +pub fn initNoHash(buf: []const u8) HashedString { + return HashedString{ + .ptr = buf.ptr, + .len = @as(u32, @truncate(buf.len)), + .hash = 0, + }; +} + +pub fn eql(this: HashedString, other: anytype) bool { + return Eql(this, @TypeOf(other), other); +} + +fn Eql(this: HashedString, comptime Other: type, other: Other) bool { + switch (comptime Other) { + HashedString, *HashedString, *const HashedString => { + return ((@max(this.hash, other.hash) > 0 and this.hash == other.hash) or (this.ptr == other.ptr)) and this.len == other.len; + }, + else => { + return @as(usize, this.len) == other.len and @as(u32, @truncate(bun.hash(other[0..other.len]))) == this.hash; + }, + } +} + +pub fn str(this: HashedString) []const u8 { + return this.ptr[0..this.len]; +} diff --git a/src/string/MutableString.zig b/src/string/MutableString.zig new file mode 100644 index 0000000000..b5f9f75505 --- /dev/null +++ b/src/string/MutableString.zig @@ -0,0 +1,452 @@ +const std = @import("std"); +const bun = @import("root").bun; + +const Allocator = std.mem.Allocator; +const strings = bun.strings; +const js_lexer = bun.js_lexer; +const string = bun.string; +const stringZ = bun.stringZ; +const CodePoint = bun.CodePoint; + +const MutableString = @This(); + +allocator: Allocator, +list: std.ArrayListUnmanaged(u8), + +pub fn init2048(allocator: Allocator) Allocator.Error!MutableString { + return MutableString.init(allocator, 2048); +} + +pub fn clone(self: *MutableString) Allocator.Error!MutableString { + return MutableString.initCopy(self.allocator, self.list.items); +} + +pub const Writer = std.io.Writer(*@This(), Allocator.Error, MutableString.writeAll); +pub fn writer(self: *MutableString) Writer { + return Writer{ + .context = self, + }; +} + +pub fn isEmpty(this: *const MutableString) bool { + return this.list.items.len == 0; +} + +pub fn deinit(str: *MutableString) void { + if (str.list.capacity > 0) { + str.list.expandToCapacity(); + str.list.clearAndFree(str.allocator); + } +} + +pub fn owns(this: *const MutableString, items: []const u8) bool { + return bun.isSliceInBuffer(items, this.list.items.ptr[0..this.list.capacity]); +} + +pub fn growIfNeeded(self: *MutableString, amount: usize) Allocator.Error!void { + try self.list.ensureUnusedCapacity(self.allocator, amount); +} + +pub fn write(self: *MutableString, bytes: anytype) Allocator.Error!usize { + bun.debugAssert(bytes.len == 0 or !bun.isSliceInBuffer(bytes, self.list.allocatedSlice())); + try self.list.appendSlice(self.allocator, bytes); + return bytes.len; +} + +pub fn bufferedWriter(self: *MutableString) BufferedWriter { + return BufferedWriter{ .context = self }; +} + +pub fn init(allocator: Allocator, capacity: usize) Allocator.Error!MutableString { + return MutableString{ .allocator = allocator, .list = if (capacity > 0) + try std.ArrayListUnmanaged(u8).initCapacity(allocator, capacity) + else + std.ArrayListUnmanaged(u8){} }; +} + +pub fn initEmpty(allocator: Allocator) MutableString { + return MutableString{ .allocator = allocator, .list = .{} }; +} + +pub const ensureUnusedCapacity = growIfNeeded; + +pub fn initCopy(allocator: Allocator, str: anytype) Allocator.Error!MutableString { + var mutable = try MutableString.init(allocator, str.len); + try mutable.copy(str); + return mutable; +} + +/// Convert it to an ASCII identifier. Note: If you change this to a non-ASCII +/// identifier, you're going to potentially cause trouble with non-BMP code +/// points in target environments that don't support bracketed Unicode escapes. +pub fn ensureValidIdentifier(str: string, allocator: Allocator) Allocator.Error!string { + if (str.len == 0) { + return "_"; + } + + var iterator = strings.CodepointIterator.init(str); + var cursor = strings.CodepointIterator.Cursor{}; + + var has_needed_gap = false; + var needs_gap = false; + var start_i: usize = 0; + + if (!iterator.next(&cursor)) return "_"; + + const JSLexerTables = @import("../js_lexer_tables.zig"); + + // Common case: no gap necessary. No allocation necessary. + needs_gap = !js_lexer.isIdentifierStart(cursor.c); + if (!needs_gap) { + // Are there any non-alphanumeric chars at all? + while (iterator.next(&cursor)) { + if (!js_lexer.isIdentifierContinue(cursor.c) or cursor.width > 1) { + needs_gap = true; + start_i = cursor.i; + break; + } + } + } + + if (!needs_gap) { + return JSLexerTables.StrictModeReservedWordsRemap.get(str) orelse str; + } + + if (needs_gap) { + var mutable = try MutableString.initCopy(allocator, if (start_i == 0) + // the first letter can be a non-identifier start + // https://github.com/oven-sh/bun/issues/2946 + "_" + else + str[0..start_i]); + needs_gap = false; + + var items = str[start_i..]; + iterator = strings.CodepointIterator.init(items); + cursor = strings.CodepointIterator.Cursor{}; + + while (iterator.next(&cursor)) { + if (js_lexer.isIdentifierContinue(cursor.c) and cursor.width == 1) { + if (needs_gap) { + try mutable.appendChar('_'); + needs_gap = false; + has_needed_gap = true; + } + try mutable.append(items[cursor.i .. cursor.i + @as(u32, cursor.width)]); + } else if (!needs_gap) { + needs_gap = true; + // skip the code point, replace it with a single _ + } + } + + // If it ends with an emoji + if (needs_gap) { + try mutable.appendChar('_'); + needs_gap = false; + has_needed_gap = true; + } + + if (comptime bun.Environment.allow_assert) { + bun.assert(js_lexer.isIdentifier(mutable.list.items)); + } + + return try mutable.list.toOwnedSlice(allocator); + } + + return str; +} + +pub fn len(self: *const MutableString) usize { + return self.list.items.len; +} + +pub fn copy(self: *MutableString, str: anytype) Allocator.Error!void { + try self.list.ensureTotalCapacity(self.allocator, str[0..].len); + + if (self.list.items.len == 0) { + try self.list.insertSlice(self.allocator, 0, str); + } else { + try self.list.replaceRange(self.allocator, 0, str[0..].len, str[0..]); + } +} + +pub inline fn growBy(self: *MutableString, amount: usize) Allocator.Error!void { + try self.list.ensureUnusedCapacity(self.allocator, amount); +} + +pub inline fn appendSlice(self: *MutableString, items: []const u8) Allocator.Error!void { + try self.list.appendSlice(self.allocator, items); +} + +pub inline fn appendSliceExact(self: *MutableString, items: []const u8) Allocator.Error!void { + if (items.len == 0) return; + try self.list.ensureTotalCapacityPrecise(self.allocator, self.list.items.len + items.len); + var end = self.list.items.ptr + self.list.items.len; + self.list.items.len += items.len; + @memcpy(end[0..items.len], items); +} + +pub inline fn reset( + self: *MutableString, +) void { + self.list.clearRetainingCapacity(); +} + +pub inline fn resetTo( + self: *MutableString, + index: usize, +) void { + bun.assert(index <= self.list.capacity); + self.list.items.len = index; +} + +pub fn inflate(self: *MutableString, amount: usize) Allocator.Error!void { + try self.list.resize(self.allocator, amount); +} + +pub inline fn appendCharNTimes(self: *MutableString, char: u8, n: usize) Allocator.Error!void { + try self.list.appendNTimes(self.allocator, char, n); +} + +pub inline fn appendChar(self: *MutableString, char: u8) Allocator.Error!void { + try self.list.append(self.allocator, char); +} +pub inline fn appendCharAssumeCapacity(self: *MutableString, char: u8) void { + self.list.appendAssumeCapacity(char); +} +pub inline fn append(self: *MutableString, char: []const u8) Allocator.Error!void { + try self.list.appendSlice(self.allocator, char); +} +pub inline fn appendInt(self: *MutableString, int: u64) Allocator.Error!void { + const count = bun.fmt.fastDigitCount(int); + try self.list.ensureUnusedCapacity(self.allocator, count); + const old = self.list.items.len; + self.list.items.len += count; + bun.assert(count == std.fmt.formatIntBuf(self.list.items.ptr[old .. old + count], int, 10, .lower, .{})); +} + +pub inline fn appendAssumeCapacity(self: *MutableString, char: []const u8) void { + self.list.appendSliceAssumeCapacity( + char, + ); +} +pub inline fn lenI(self: *MutableString) i32 { + return @as(i32, @intCast(self.list.items.len)); +} + +pub fn toOwnedSlice(self: *MutableString) string { + return self.list.toOwnedSlice(self.allocator) catch bun.outOfMemory(); // TODO +} + +pub fn slice(self: *MutableString) []u8 { + return self.list.items; +} + +/// Clear the existing value without freeing the memory or shrinking the capacity. +pub fn move(self: *MutableString) []u8 { + const out = self.list.items; + self.list = .{}; + return out; +} + +/// Appends `0` if needed +pub fn sliceWithSentinel(self: *MutableString) [:0]u8 { + if (self.list.items.len > 0 and self.list.items[self.list.items.len - 1] != 0) { + self.list.append( + self.allocator, + 0, + ) catch unreachable; + } + + return self.list.items[0 .. self.list.items.len - 1 :0]; +} + +pub fn toOwnedSliceLength(self: *MutableString, length: usize) string { + self.list.shrinkAndFree(self.allocator, length); + return self.list.toOwnedSlice(self.allocator) catch bun.outOfMemory(); // TODO +} + +pub fn containsChar(self: *const MutableString, char: u8) bool { + return self.indexOfChar(char) != null; +} + +pub fn indexOfChar(self: *const MutableString, char: u8) ?u32 { + return strings.indexOfChar(self.list.items, char); +} + +pub fn lastIndexOfChar(self: *const MutableString, char: u8) ?usize { + return strings.lastIndexOfChar(self.list.items, char); +} + +pub fn lastIndexOf(self: *const MutableString, str: u8) ?usize { + return strings.lastIndexOfChar(self.list.items, str); +} + +pub fn indexOf(self: *const MutableString, str: u8) ?usize { + return std.mem.indexOf(u8, self.list.items, str); +} + +pub fn eql(self: *MutableString, other: anytype) bool { + return std.mem.eql(u8, self.list.items, other); +} + +pub fn toSocketBuffers(self: *MutableString, comptime count: usize, ranges: anytype) [count]std.posix.iovec_const { + var buffers: [count]std.posix.iovec_const = undefined; + inline for (&buffers, ranges) |*b, r| { + b.* = .{ + .iov_base = self.list.items[r[0]..r[1]].ptr, + .iov_len = self.list.items[r[0]..r[1]].len, + }; + } + return buffers; +} + +pub const BufferedWriter = struct { + context: *MutableString, + buffer: [max]u8 = undefined, + pos: usize = 0, + + const max = 2048; + + pub const Writer = std.io.Writer(*BufferedWriter, Allocator.Error, BufferedWriter.writeAll); + + inline fn remain(this: *BufferedWriter) []u8 { + return this.buffer[this.pos..]; + } + + pub fn flush(this: *BufferedWriter) Allocator.Error!void { + _ = try this.context.writeAll(this.buffer[0..this.pos]); + this.pos = 0; + } + + pub fn writeAll(this: *BufferedWriter, bytes: []const u8) Allocator.Error!usize { + const pending = bytes; + + if (pending.len >= max) { + try this.flush(); + try this.context.append(pending); + return pending.len; + } + + if (pending.len > 0) { + if (pending.len + this.pos > max) { + try this.flush(); + } + @memcpy(this.remain()[0..pending.len], pending); + this.pos += pending.len; + } + + return pending.len; + } + + const E = bun.JSAst.E; + + /// Write a E.String to the buffer. + /// This automatically encodes UTF-16 into UTF-8 using + /// the same code path as TextEncoder + pub fn writeString(this: *BufferedWriter, bytes: *E.String) Allocator.Error!usize { + if (bytes.isUTF8()) { + return try this.writeAll(bytes.slice(this.context.allocator)); + } + + return try this.writeAll16(bytes.slice16()); + } + + /// Write a UTF-16 string to the (UTF-8) buffer + /// This automatically encodes UTF-16 into UTF-8 using + /// the same code path as TextEncoder + pub fn writeAll16(this: *BufferedWriter, bytes: []const u16) Allocator.Error!usize { + const pending = bytes; + + if (pending.len >= max) { + try this.flush(); + try this.context.list.ensureUnusedCapacity(this.context.allocator, bytes.len * 2); + const decoded = strings.copyUTF16IntoUTF8( + this.remain()[0 .. bytes.len * 2], + []const u16, + bytes, + true, + ); + this.context.list.items.len += @as(usize, decoded.written); + return pending.len; + } + + if (pending.len > 0) { + if ((pending.len * 2) + this.pos > max) { + try this.flush(); + } + const decoded = strings.copyUTF16IntoUTF8( + this.remain()[0 .. bytes.len * 2], + []const u16, + bytes, + true, + ); + this.pos += @as(usize, decoded.written); + } + + return pending.len; + } + + pub fn writeHTMLAttributeValueString(this: *BufferedWriter, str: *E.String) Allocator.Error!void { + if (str.isUTF8()) { + try this.writeHTMLAttributeValue(str.slice(this.context.allocator)); + return; + } + + try this.writeHTMLAttributeValue16(str.slice16()); + } + + pub fn writeHTMLAttributeValue(this: *BufferedWriter, bytes: []const u8) Allocator.Error!void { + var items = bytes; + while (items.len > 0) { + // TODO: SIMD + if (strings.indexOfAny(items, "\"<>")) |j| { + _ = try this.writeAll(items[0..j]); + _ = switch (items[j]) { + '"' => try this.writeAll("""), + '<' => try this.writeAll("<"), + '>' => try this.writeAll(">"), + else => unreachable, + }; + + items = items[j + 1 ..]; + continue; + } + + _ = try this.writeAll(items); + break; + } + } + + pub fn writeHTMLAttributeValue16(this: *BufferedWriter, bytes: []const u16) Allocator.Error!void { + var items = bytes; + while (items.len > 0) { + if (strings.indexOfAny16(items, "\"<>")) |j| { + // this won't handle strings larger than 4 GB + // that's fine though, 4 GB of SSR'd HTML is quite a lot... + _ = try this.writeAll16(items[0..j]); + _ = switch (items[j]) { + '"' => try this.writeAll("""), + '<' => try this.writeAll("<"), + '>' => try this.writeAll(">"), + else => unreachable, + }; + + items = items[j + 1 ..]; + continue; + } + + _ = try this.writeAll16(items); + break; + } + } + + pub fn writer(this: *BufferedWriter) BufferedWriter.Writer { + return BufferedWriter.Writer{ .context = this }; + } +}; + +pub fn writeAll(self: *MutableString, bytes: string) Allocator.Error!usize { + try self.list.appendSlice(self.allocator, bytes); + return bytes.len; +} diff --git a/src/string/PathString.zig b/src/string/PathString.zig new file mode 100644 index 0000000000..5ea77ef53c --- /dev/null +++ b/src/string/PathString.zig @@ -0,0 +1,58 @@ +const std = @import("std"); +const bun = @import("root").bun; + +// macOS sets file path limit to 1024 +// Since a pointer on x64 is 64 bits and only 46 bits are used +// We can safely store the entire path slice in a single u64. +pub const PathString = packed struct { + const PathIntLen = std.math.IntFittingRange(0, bun.MAX_PATH_BYTES); + pub const use_small_path_string = @bitSizeOf(usize) - @bitSizeOf(PathIntLen) >= 53; + pub const PathInt = if (use_small_path_string) PathIntLen else usize; + pub const PointerIntType = if (use_small_path_string) u53 else usize; + ptr: PointerIntType = 0, + len: PathInt = 0, + + const JSC = bun.JSC; + + pub fn estimatedSize(this: *const PathString) usize { + return @as(usize, this.len); + } + + pub inline fn slice(this: anytype) []const u8 { + @setRuntimeSafety(false); // "cast causes pointer to be null" is fine here. if it is null, the len will be 0. + return @as([*]u8, @ptrFromInt(@as(usize, @intCast(this.ptr))))[0..this.len]; + } + + pub inline fn sliceAssumeZ(this: anytype) [:0]const u8 { + @setRuntimeSafety(false); // "cast causes pointer to be null" is fine here. if it is null, the len will be 0. + return @as([*:0]u8, @ptrFromInt(@as(usize, @intCast(this.ptr))))[0..this.len :0]; + } + + pub inline fn init(str: []const u8) @This() { + @setRuntimeSafety(false); // "cast causes pointer to be null" is fine here. if it is null, the len will be 0. + + return .{ + .ptr = @as(PointerIntType, @truncate(@intFromPtr(str.ptr))), + .len = @as(PathInt, @truncate(str.len)), + }; + } + + pub inline fn isEmpty(this: anytype) bool { + return this.len == 0; + } + + pub fn format(self: PathString, comptime _: []const u8, _: std.fmt.FormatOptions, writer: anytype) !void { + try writer.writeAll(self.slice()); + } + + pub const empty = @This(){ .ptr = 0, .len = 0 }; + comptime { + if (!bun.Environment.isWasm) { + if (use_small_path_string and @bitSizeOf(@This()) != 64) { + @compileError("PathString must be 64 bits"); + } else if (!use_small_path_string and @bitSizeOf(@This()) != 128) { + @compileError("PathString must be 128 bits"); + } + } + } +}; diff --git a/src/string/SmolStr.zig b/src/string/SmolStr.zig new file mode 100644 index 0000000000..d6d9f5efca --- /dev/null +++ b/src/string/SmolStr.zig @@ -0,0 +1,208 @@ +const std = @import("std"); +const BabyList = @import("../baby_list.zig").BabyList; +const Allocator = std.mem.Allocator; +const assert = std.debug.assert; + +/// This is a string type that stores up to 15 bytes inline on the stack, and heap allocates if it is longer +pub const SmolStr = packed struct { + __len: u32, + cap: u32, + __ptr: [*]u8, + + const Tag: usize = 0x8000000000000000; + const NegatedTag: usize = ~Tag; + + pub fn jsonStringify(self: *const SmolStr, writer: anytype) !void { + try writer.write(self.slice()); + } + + pub const Inlined = packed struct { + data: u120, + __len: u7, + _tag: u1, + + pub fn len(this: Inlined) u8 { + return @intCast(this.__len); + } + + pub fn setLen(this: *Inlined, new_len: u7) void { + this.__len = new_len; + } + + pub fn slice(this: *Inlined) []const u8 { + return this.allChars()[0..this.__len]; + } + + pub fn allChars(this: *Inlined) *[15]u8 { + return @as([*]u8, @ptrCast(@as(*u128, @ptrCast(this))))[0..15]; + } + }; + + comptime { + assert(@sizeOf(SmolStr) == @sizeOf(Inlined)); + } + + pub fn empty() SmolStr { + const inlined = Inlined{ + .data = 0, + .__len = 0, + ._tag = 1, + }; + return SmolStr.fromInlined(inlined); + } + + pub fn len(this: *const SmolStr) u32 { + if (this.isInlined()) { + return @intCast((@intFromPtr(this.__ptr) >> 56) & 0b01111111); + } + + return this.__len; + } + + pub fn ptr(this: *SmolStr) [*]u8 { + return @ptrFromInt(@as(usize, @intFromPtr(this.__ptr)) & NegatedTag); + } + + pub fn ptrConst(this: *const SmolStr) [*]const u8 { + return @ptrFromInt(@as(usize, @intFromPtr(this.__ptr)) & NegatedTag); + } + + pub fn markInlined(this: *SmolStr) void { + this.__ptr = @ptrFromInt(@as(usize, @intFromPtr(this.__ptr)) | Tag); + } + + pub fn markHeap(this: *SmolStr) void { + this.__ptr = @ptrFromInt(@as(usize, @intFromPtr(this.__ptr)) & NegatedTag); + } + + pub fn isInlined(this: *const SmolStr) bool { + return @as(usize, @intFromPtr(this.__ptr)) & Tag != 0; + } + + pub fn toInlined(this: *const SmolStr) Inlined { + var inlined: Inlined = @bitCast(@as(u128, @bitCast(this.*))); + inlined._tag = 1; + return inlined; + } + + pub fn fromBabyList(baby_list: BabyList(u8)) SmolStr { + var smol_str: SmolStr = .{ + .__len = baby_list.len, + .cap = baby_list.cap, + .__ptr = baby_list.ptr, + }; + smol_str.markHeap(); + return smol_str; + } + + pub fn fromInlined(inlined: Inlined) SmolStr { + var smol_str: SmolStr = @bitCast(inlined); + smol_str.markInlined(); + return smol_str; + } + + pub fn fromChar(char: u8) SmolStr { + var inlined = Inlined{ + .data = 0, + .__len = 1, + ._tag = 1, + }; + + inlined.allChars()[0] = char; + inlined.setLen(1); + + return SmolStr.fromInlined(inlined); + } + + pub fn fromSlice(allocator: Allocator, values: []const u8) Allocator.Error!SmolStr { + if (values.len > 15) { + var baby_list = try BabyList(u8).initCapacity(allocator, values.len); + baby_list.appendSliceAssumeCapacity(values); + return SmolStr.fromBabyList(baby_list); + } + + var inlined = Inlined{ + .data = 0, + .__len = 0, + ._tag = 1, + }; + + if (values.len > 0) { + @memcpy(inlined.allChars()[0..values.len], values[0..values.len]); + inlined.setLen(@intCast(values.len)); + } + + return SmolStr.fromInlined(inlined); + } + + pub fn slice(this: *const SmolStr) []const u8 { + if (this.isInlined()) { + const bytes: [*]const u8 = @ptrCast(this); + return bytes[0..this.len()]; + } + return this.ptrConst()[0..this.__len]; + } + + pub fn appendChar(this: *SmolStr, allocator: Allocator, char: u8) Allocator.Error!void { + if (this.isInlined()) { + var inlined = this.toInlined(); + if (inlined.len() + 1 > 15) { + var baby_list = try BabyList(u8).initCapacity(allocator, inlined.len() + 1); + baby_list.appendSliceAssumeCapacity(inlined.slice()); + try baby_list.push(allocator, char); + // this.* = SmolStr.fromBabyList(baby_list); + this.__len = baby_list.len; + this.__ptr = baby_list.ptr; + this.cap = baby_list.cap; + this.markHeap(); + return; + } + inlined.allChars()[inlined.len()] = char; + inlined.setLen(@intCast(inlined.len() + 1)); + // this.* = SmolStr.fromInlined(inlined); + this.* = @bitCast(inlined); + this.markInlined(); + return; + } + + var baby_list = BabyList(u8){ + .ptr = this.ptr(), + .len = this.__len, + .cap = this.cap, + }; + try baby_list.push(allocator, char); + + // this.* = SmolStr.fromBabyList(baby_list); + this.__len = baby_list.len; + this.__ptr = baby_list.ptr; + this.cap = baby_list.cap; + return; + } + + pub fn appendSlice(this: *SmolStr, allocator: Allocator, values: []const u8) Allocator.Error!void { + if (this.isInlined()) { + var inlined = this.toInlined(); + if (inlined.len() + values.len > 15) { + var baby_list = try BabyList(u8).initCapacity(allocator, inlined.len() + values.len); + baby_list.appendSliceAssumeCapacity(inlined.slice()); + baby_list.appendSliceAssumeCapacity(values); + this.* = SmolStr.fromBabyList(baby_list); + return; + } + @memcpy(inlined.allChars()[inlined.len() .. inlined.len() + values.len], values); + inlined.setLen(@intCast(inlined.len() + values.len)); + this.* = SmolStr.fromInlined(inlined); + return; + } + + var baby_list = BabyList(u8){ + .ptr = this.ptr(), + .len = this.__len, + .cap = this.cap, + }; + try baby_list.append(allocator, values); + + this.* = SmolStr.fromBabyList(baby_list); + return; + } +}; diff --git a/src/string_builder.zig b/src/string/StringBuilder.zig similarity index 86% rename from src/string_builder.zig rename to src/string/StringBuilder.zig index 0178663a4f..1f570efeda 100644 --- a/src/string_builder.zig +++ b/src/string/StringBuilder.zig @@ -1,9 +1,8 @@ -const std = @import("std"); -const Allocator = std.mem.Allocator; -const bun = @import("root").bun; -const Environment = bun.Environment; -const string = @import("string_types.zig").string; const StringBuilder = @This(); +const std = @import("std"); +const bun = @import("root").bun; +const Allocator = std.mem.Allocator; +const Environment = bun.Environment; const assert = bun.assert; const DebugHashTable = if (Environment.allow_assert) std.AutoHashMapUnmanaged(u64, void) else void; @@ -15,7 +14,7 @@ ptr: ?[*]u8 = null, pub fn initCapacity( allocator: std.mem.Allocator, cap: usize, -) !StringBuilder { +) Allocator.Error!StringBuilder { return StringBuilder{ .cap = cap, .len = 0, @@ -23,15 +22,15 @@ pub fn initCapacity( }; } -pub fn countZ(this: *StringBuilder, slice: string) void { +pub fn countZ(this: *StringBuilder, slice: []const u8) void { this.cap += slice.len + 1; } -pub fn count(this: *StringBuilder, slice: string) void { +pub fn count(this: *StringBuilder, slice: []const u8) void { this.cap += slice.len; } -pub fn allocate(this: *StringBuilder, allocator: Allocator) !void { +pub fn allocate(this: *StringBuilder, allocator: Allocator) Allocator.Error!void { const slice = try allocator.alloc(u8, this.cap); this.ptr = slice.ptr; this.len = 0; @@ -73,7 +72,7 @@ pub fn append16(this: *StringBuilder, slice: []const u16, fallback_allocator: st } } -pub fn appendZ(this: *StringBuilder, slice: string) [:0]const u8 { +pub fn appendZ(this: *StringBuilder, slice: []const u8) [:0]const u8 { if (comptime Environment.allow_assert) { assert(this.len + 1 <= this.cap); // didn't count everything assert(this.ptr != null); // must call allocate first @@ -89,13 +88,13 @@ pub fn appendZ(this: *StringBuilder, slice: string) [:0]const u8 { return result; } -pub fn appendStr(this: *StringBuilder, str: bun.String) string { +pub fn appendStr(this: *StringBuilder, str: bun.String) []const u8 { const slice = str.toUTF8(bun.default_allocator); defer slice.deinit(); return this.append(slice.slice()); } -pub fn append(this: *StringBuilder, slice: string) string { +pub fn append(this: *StringBuilder, slice: []const u8) []const u8 { if (comptime Environment.allow_assert) { assert(this.len <= this.cap); // didn't count everything assert(this.ptr != null); // must call allocate first @@ -110,7 +109,7 @@ pub fn append(this: *StringBuilder, slice: string) string { return result; } -pub fn addConcat(this: *StringBuilder, slices: []const string) bun.StringPointer { +pub fn addConcat(this: *StringBuilder, slices: []const []const u8) bun.StringPointer { var remain = this.allocatedSlice()[this.len..]; var len: usize = 0; for (slices) |slice| { @@ -134,7 +133,7 @@ pub fn add(this: *StringBuilder, len: usize) bun.StringPointer { return bun.StringPointer{ .offset = @as(u32, @truncate(start)), .length = @as(u32, @truncate(len)) }; } -pub fn appendCount(this: *StringBuilder, slice: string) bun.StringPointer { +pub fn appendCount(this: *StringBuilder, slice: []const u8) bun.StringPointer { if (comptime Environment.allow_assert) { assert(this.len <= this.cap); // didn't count everything assert(this.ptr != null); // must call allocate first @@ -151,7 +150,7 @@ pub fn appendCount(this: *StringBuilder, slice: string) bun.StringPointer { return bun.StringPointer{ .offset = @as(u32, @truncate(start)), .length = @as(u32, @truncate(slice.len)) }; } -pub fn appendCountZ(this: *StringBuilder, slice: string) bun.StringPointer { +pub fn appendCountZ(this: *StringBuilder, slice: []const u8) bun.StringPointer { if (comptime Environment.allow_assert) { assert(this.len <= this.cap); // didn't count everything assert(this.ptr != null); // must call allocate first @@ -170,7 +169,7 @@ pub fn appendCountZ(this: *StringBuilder, slice: string) bun.StringPointer { return bun.StringPointer{ .offset = @as(u32, @truncate(start)), .length = @as(u32, @truncate(slice.len)) }; } -pub fn fmt(this: *StringBuilder, comptime str: string, args: anytype) string { +pub fn fmt(this: *StringBuilder, comptime str: []const u8, args: anytype) []const u8 { if (comptime Environment.allow_assert) { assert(this.len <= this.cap); // didn't count everything assert(this.ptr != null); // must call allocate first @@ -185,7 +184,7 @@ pub fn fmt(this: *StringBuilder, comptime str: string, args: anytype) string { return out; } -pub fn fmtAppendCount(this: *StringBuilder, comptime str: string, args: anytype) bun.StringPointer { +pub fn fmtAppendCount(this: *StringBuilder, comptime str: []const u8, args: anytype) bun.StringPointer { if (comptime Environment.allow_assert) { assert(this.len <= this.cap); // didn't count everything assert(this.ptr != null); // must call allocate first @@ -204,7 +203,7 @@ pub fn fmtAppendCount(this: *StringBuilder, comptime str: string, args: anytype) }; } -pub fn fmtAppendCountZ(this: *StringBuilder, comptime str: string, args: anytype) bun.StringPointer { +pub fn fmtAppendCountZ(this: *StringBuilder, comptime str: []const u8, args: anytype) bun.StringPointer { if (comptime Environment.allow_assert) { assert(this.len <= this.cap); // didn't count everything assert(this.ptr != null); // must call allocate first @@ -224,7 +223,7 @@ pub fn fmtAppendCountZ(this: *StringBuilder, comptime str: string, args: anytype }; } -pub fn fmtCount(this: *StringBuilder, comptime str: string, args: anytype) void { +pub fn fmtCount(this: *StringBuilder, comptime str: []const u8, args: anytype) void { this.cap += std.fmt.count(str, args); } diff --git a/src/StringJoiner.zig b/src/string/StringJoiner.zig similarity index 97% rename from src/StringJoiner.zig rename to src/string/StringJoiner.zig index 31af5933af..58e421bc8c 100644 --- a/src/StringJoiner.zig +++ b/src/string/StringJoiner.zig @@ -1,12 +1,11 @@ //! Rope-like data structure for joining many small strings into one big string. //! Implemented as a linked list of potentially-owned slices and a length. +const StringJoiner = @This(); const std = @import("std"); const default_allocator = bun.default_allocator; const bun = @import("root").bun; -const string = bun.string; const Allocator = std.mem.Allocator; const NullableAllocator = bun.NullableAllocator; -const StringJoiner = @This(); const assert = bun.assert; /// Temporary allocator used for nodes and duplicated strings. @@ -154,7 +153,7 @@ pub fn ensureNewlineAtEnd(this: *StringJoiner) void { } } -pub fn contains(this: *const StringJoiner, slice: string) bool { +pub fn contains(this: *const StringJoiner, slice: []const u8) bool { var el = this.head; while (el) |node| { el = node.next; diff --git a/src/string/WTFStringImpl.zig b/src/string/WTFStringImpl.zig new file mode 100644 index 0000000000..cef71d7e3e --- /dev/null +++ b/src/string/WTFStringImpl.zig @@ -0,0 +1,268 @@ +const std = @import("std"); +const bun = @import("root").bun; +const JSC = bun.JSC; +const OOM = bun.OOM; + +pub const WTFStringImpl = *WTFStringImplStruct; +const ZigString = bun.JSC.ZigString; + +pub const WTFStringImplStruct = extern struct { + m_refCount: u32 = 0, + m_length: u32 = 0, + m_ptr: extern union { latin1: [*]const u8, utf16: [*]const u16 }, + m_hashAndFlags: u32 = 0, + + // --------------------------------------------------------------------- + // These details must stay in sync with WTFStringImpl.h in WebKit! + // --------------------------------------------------------------------- + const s_flagCount: u32 = 8; + + const s_flagMask: u32 = (1 << s_flagCount) - 1; + const s_flagStringKindCount: u32 = 4; + const s_hashZeroValue: u32 = 0; + const s_hashFlagStringKindIsAtom: u32 = @as(1, u32) << (s_flagStringKindCount); + const s_hashFlagStringKindIsSymbol: u32 = @as(1, u32) << (s_flagStringKindCount + 1); + const s_hashMaskStringKind: u32 = s_hashFlagStringKindIsAtom | s_hashFlagStringKindIsSymbol; + const s_hashFlagDidReportCost: u32 = @as(1, u32) << 3; + const s_hashFlag8BitBuffer: u32 = 1 << 2; + const s_hashMaskBufferOwnership: u32 = (1 << 0) | (1 << 1); + + /// The bottom bit in the ref count indicates a static (immortal) string. + const s_refCountFlagIsStaticString = 0x1; + + /// This allows us to ref / deref without disturbing the static string flag. + const s_refCountIncrement = 0x2; + + // --------------------------------------------------------------------- + + pub fn refCount(this: WTFStringImpl) u32 { + return this.m_refCount / s_refCountIncrement; + } + + pub fn memoryCost(this: WTFStringImpl) usize { + return this.byteLength(); + } + + pub fn isStatic(this: WTFStringImpl) bool { + return this.m_refCount & s_refCountIncrement != 0; + } + + pub fn byteLength(this: WTFStringImpl) usize { + return if (this.is8Bit()) this.m_length else this.m_length * 2; + } + + extern fn WTFStringImpl__isThreadSafe(WTFStringImpl) bool; + pub fn isThreadSafe(this: WTFStringImpl) bool { + return WTFStringImpl__isThreadSafe(this); + } + + pub fn byteSlice(this: WTFStringImpl) []const u8 { + return this.m_ptr.latin1[0..this.byteLength()]; + } + + pub inline fn is8Bit(self: WTFStringImpl) bool { + return (self.m_hashAndFlags & s_hashFlag8BitBuffer) != 0; + } + + pub inline fn length(self: WTFStringImpl) u32 { + return self.m_length; + } + + pub inline fn utf16Slice(self: WTFStringImpl) []const u16 { + bun.assert(!is8Bit(self)); + return self.m_ptr.utf16[0..length(self)]; + } + + pub inline fn latin1Slice(self: WTFStringImpl) []const u8 { + bun.assert(is8Bit(self)); + return self.m_ptr.latin1[0..length(self)]; + } + + /// Caller must ensure that the string is 8-bit and ASCII. + pub inline fn utf8Slice(self: WTFStringImpl) []const u8 { + if (comptime bun.Environment.allow_assert) + bun.assert(canUseAsUTF8(self)); + return self.m_ptr.latin1[0..length(self)]; + } + + pub fn toZigString(this: WTFStringImpl) ZigString { + if (this.is8Bit()) { + return ZigString.init(this.latin1Slice()); + } else { + return ZigString.initUTF16(this.utf16Slice()); + } + } + + pub inline fn deref(self: WTFStringImpl) void { + JSC.markBinding(@src()); + const current_count = self.refCount(); + bun.assert(current_count > 0); + Bun__WTFStringImpl__deref(self); + if (comptime bun.Environment.allow_assert) { + if (current_count > 1) { + bun.assert(self.refCount() < current_count or self.isStatic()); + } + } + } + + pub inline fn ref(self: WTFStringImpl) void { + JSC.markBinding(@src()); + const current_count = self.refCount(); + bun.assert(current_count > 0); + Bun__WTFStringImpl__ref(self); + bun.assert(self.refCount() > current_count or self.isStatic()); + } + + pub fn toLatin1Slice(this: WTFStringImpl) ZigString.Slice { + this.ref(); + return ZigString.Slice.init(this.refCountAllocator(), this.latin1Slice()); + } + + extern fn Bun__WTFStringImpl__ensureHash(this: WTFStringImpl) void; + /// Compute the hash() if necessary + pub fn ensureHash(this: WTFStringImpl) void { + JSC.markBinding(@src()); + Bun__WTFStringImpl__ensureHash(this); + } + + pub fn toUTF8(this: WTFStringImpl, allocator: std.mem.Allocator) ZigString.Slice { + if (this.is8Bit()) { + if (bun.strings.toUTF8FromLatin1(allocator, this.latin1Slice()) catch bun.outOfMemory()) |utf8| { + return ZigString.Slice.init(allocator, utf8.items); + } + + return this.toLatin1Slice(); + } + + return ZigString.Slice.init( + allocator, + bun.strings.toUTF8Alloc(allocator, this.utf16Slice()) catch bun.outOfMemory(), + ); + } + + pub const max = std.math.maxInt(u32); + + pub fn toUTF8WithoutRef(this: WTFStringImpl, allocator: std.mem.Allocator) ZigString.Slice { + if (this.is8Bit()) { + if (bun.strings.toUTF8FromLatin1(allocator, this.latin1Slice()) catch bun.outOfMemory()) |utf8| { + return ZigString.Slice.init(allocator, utf8.items); + } + + return ZigString.Slice.fromUTF8NeverFree(this.latin1Slice()); + } + + return ZigString.Slice.init( + allocator, + bun.strings.toUTF8Alloc(allocator, this.utf16Slice()) catch bun.outOfMemory(), + ); + } + + pub fn toOwnedSliceZ(this: WTFStringImpl, allocator: std.mem.Allocator) [:0]u8 { + if (this.is8Bit()) { + if (bun.strings.toUTF8FromLatin1Z(allocator, this.latin1Slice()) catch bun.outOfMemory()) |utf8| { + return utf8.items[0 .. utf8.items.len - 1 :0]; + } + + return allocator.dupeZ(u8, this.latin1Slice()) catch bun.outOfMemory(); + } + return bun.strings.toUTF8AllocZ(allocator, this.utf16Slice()) catch bun.outOfMemory(); + } + + pub fn toUTF8IfNeeded(this: WTFStringImpl, allocator: std.mem.Allocator) ?ZigString.Slice { + if (this.is8Bit()) { + if (bun.strings.toUTF8FromLatin1(allocator, this.latin1Slice()) catch bun.outOfMemory()) |utf8| { + return ZigString.Slice.init(allocator, utf8.items); + } + + return null; + } + + return ZigString.Slice.init( + allocator, + bun.strings.toUTF8Alloc(allocator, this.utf16Slice()) catch bun.outOfMemory(), + ); + } + + /// Avoid using this in code paths that are about to get the string as a UTF-8 + /// In that case, use toUTF8IfNeeded instead. + pub fn canUseAsUTF8(this: WTFStringImpl) bool { + return this.is8Bit() and bun.strings.isAllASCII(this.latin1Slice()); + } + + pub fn utf16ByteLength(this: WTFStringImpl) usize { + if (this.is8Bit()) { + return this.length() * 2; + } else { + return this.length(); + } + } + + pub fn utf8ByteLength(this: WTFStringImpl) usize { + if (this.is8Bit()) { + const input = this.latin1Slice(); + return if (input.len > 0) JSC.WebCore.Encoder.byteLengthU8(input.ptr, input.len, .utf8) else 0; + } else { + const input = this.utf16Slice(); + return if (input.len > 0) JSC.WebCore.Encoder.byteLengthU16(input.ptr, input.len, .utf8) else 0; + } + } + + pub fn latin1ByteLength(this: WTFStringImpl) usize { + // Not all UTF-16 characters fit are representable in latin1. + // Those get truncated? + return this.length(); + } + + pub fn refCountAllocator(self: WTFStringImpl) std.mem.Allocator { + return std.mem.Allocator{ .ptr = self, .vtable = StringImplAllocator.VTablePtr }; + } + + pub fn hasPrefix(self: WTFStringImpl, text: []const u8) bool { + return Bun__WTFStringImpl__hasPrefix(self, text.ptr, text.len); + } + + extern fn Bun__WTFStringImpl__deref(self: WTFStringImpl) void; + extern fn Bun__WTFStringImpl__ref(self: WTFStringImpl) void; + extern fn Bun__WTFStringImpl__hasPrefix(self: *const WTFStringImplStruct, offset: [*]const u8, length: usize) bool; +}; + +pub const StringImplAllocator = struct { + fn alloc(ptr: *anyopaque, len: usize, _: u8, _: usize) ?[*]u8 { + var this = bun.cast(WTFStringImpl, ptr); + const len_ = this.byteLength(); + + if (len_ != len) { + // we don't actually allocate, we just reference count + return null; + } + + this.ref(); + + // we should never actually allocate + return @constCast(this.m_ptr.latin1); + } + + fn resize(_: *anyopaque, _: []u8, _: u8, _: usize, _: usize) bool { + return false; + } + + pub fn free( + ptr: *anyopaque, + buf: []u8, + _: u8, + _: usize, + ) void { + var this = bun.cast(WTFStringImpl, ptr); + bun.assert(this.latin1Slice().ptr == buf.ptr); + bun.assert(this.latin1Slice().len == buf.len); + this.deref(); + } + + pub const VTable = std.mem.Allocator.VTable{ + .alloc = &alloc, + .resize = &resize, + .free = &free, + }; + + pub const VTablePtr = &VTable; +}; diff --git a/src/string_mutable.zig b/src/string_mutable.zig deleted file mode 100644 index 6e01507ada..0000000000 --- a/src/string_mutable.zig +++ /dev/null @@ -1,455 +0,0 @@ -const std = @import("std"); -const expect = std.testing.expect; - -const bun = @import("root").bun; - -const strings = bun.strings; -const js_lexer = bun.js_lexer; - -const string = bun.string; -const stringZ = bun.stringZ; -const CodePoint = bun.CodePoint; -const OOM = bun.OOM; - -pub const MutableString = struct { - allocator: std.mem.Allocator, - list: std.ArrayListUnmanaged(u8), - - pub fn init2048(allocator: std.mem.Allocator) std.mem.Allocator.Error!MutableString { - return MutableString.init(allocator, 2048); - } - - pub fn clone(self: *MutableString) !MutableString { - return MutableString.initCopy(self.allocator, self.list.items); - } - - pub const Writer = std.io.Writer(*@This(), OOM, MutableString.writeAll); - pub fn writer(self: *MutableString) Writer { - return Writer{ - .context = self, - }; - } - - pub fn isEmpty(this: *const MutableString) bool { - return this.list.items.len == 0; - } - - pub fn deinit(str: *MutableString) void { - if (str.list.capacity > 0) { - str.list.expandToCapacity(); - str.list.clearAndFree(str.allocator); - } - } - - pub fn owns(this: *const MutableString, items: []const u8) bool { - return bun.isSliceInBuffer(items, this.list.items.ptr[0..this.list.capacity]); - } - - pub fn growIfNeeded(self: *MutableString, amount: usize) OOM!void { - try self.list.ensureUnusedCapacity(self.allocator, amount); - } - - pub fn write(self: *MutableString, bytes: anytype) OOM!usize { - bun.debugAssert(bytes.len == 0 or !bun.isSliceInBuffer(bytes, self.list.allocatedSlice())); - try self.list.appendSlice(self.allocator, bytes); - return bytes.len; - } - - pub fn bufferedWriter(self: *MutableString) BufferedWriter { - return BufferedWriter{ .context = self }; - } - - pub fn init(allocator: std.mem.Allocator, capacity: usize) OOM!MutableString { - return MutableString{ .allocator = allocator, .list = if (capacity > 0) - try std.ArrayListUnmanaged(u8).initCapacity(allocator, capacity) - else - std.ArrayListUnmanaged(u8){} }; - } - - pub fn initEmpty(allocator: std.mem.Allocator) MutableString { - return MutableString{ .allocator = allocator, .list = .{} }; - } - - pub const ensureUnusedCapacity = growIfNeeded; - - pub fn initCopy(allocator: std.mem.Allocator, str: anytype) OOM!MutableString { - var mutable = try MutableString.init(allocator, str.len); - try mutable.copy(str); - return mutable; - } - - /// Convert it to an ASCII identifier. Note: If you change this to a non-ASCII - /// identifier, you're going to potentially cause trouble with non-BMP code - /// points in target environments that don't support bracketed Unicode escapes. - pub fn ensureValidIdentifier(str: string, allocator: std.mem.Allocator) !string { - if (str.len == 0) { - return "_"; - } - - var iterator = strings.CodepointIterator.init(str); - var cursor = strings.CodepointIterator.Cursor{}; - - var has_needed_gap = false; - var needs_gap = false; - var start_i: usize = 0; - - if (!iterator.next(&cursor)) return "_"; - - const JSLexerTables = @import("./js_lexer_tables.zig"); - - // Common case: no gap necessary. No allocation necessary. - needs_gap = !js_lexer.isIdentifierStart(cursor.c); - if (!needs_gap) { - // Are there any non-alphanumeric chars at all? - while (iterator.next(&cursor)) { - if (!js_lexer.isIdentifierContinue(cursor.c) or cursor.width > 1) { - needs_gap = true; - start_i = cursor.i; - break; - } - } - } - - if (!needs_gap) { - return JSLexerTables.StrictModeReservedWordsRemap.get(str) orelse str; - } - - if (needs_gap) { - var mutable = try MutableString.initCopy(allocator, if (start_i == 0) - // the first letter can be a non-identifier start - // https://github.com/oven-sh/bun/issues/2946 - "_" - else - str[0..start_i]); - needs_gap = false; - - var items = str[start_i..]; - iterator = strings.CodepointIterator.init(items); - cursor = strings.CodepointIterator.Cursor{}; - - while (iterator.next(&cursor)) { - if (js_lexer.isIdentifierContinue(cursor.c) and cursor.width == 1) { - if (needs_gap) { - try mutable.appendChar('_'); - needs_gap = false; - has_needed_gap = true; - } - try mutable.append(items[cursor.i .. cursor.i + @as(u32, cursor.width)]); - } else if (!needs_gap) { - needs_gap = true; - // skip the code point, replace it with a single _ - } - } - - // If it ends with an emoji - if (needs_gap) { - try mutable.appendChar('_'); - needs_gap = false; - has_needed_gap = true; - } - - if (comptime bun.Environment.allow_assert) { - bun.assert(js_lexer.isIdentifier(mutable.list.items)); - } - - return try mutable.list.toOwnedSlice(allocator); - } - - return str; - } - - pub fn len(self: *const MutableString) usize { - return self.list.items.len; - } - - pub fn copy(self: *MutableString, str: anytype) !void { - try self.list.ensureTotalCapacity(self.allocator, str[0..].len); - - if (self.list.items.len == 0) { - try self.list.insertSlice(self.allocator, 0, str); - } else { - try self.list.replaceRange(self.allocator, 0, str[0..].len, str[0..]); - } - } - - pub inline fn growBy(self: *MutableString, amount: usize) !void { - try self.list.ensureUnusedCapacity(self.allocator, amount); - } - - pub inline fn appendSlice(self: *MutableString, items: []const u8) !void { - try self.list.appendSlice(self.allocator, items); - } - - pub inline fn appendSliceExact(self: *MutableString, items: []const u8) !void { - if (items.len == 0) return; - try self.list.ensureTotalCapacityPrecise(self.allocator, self.list.items.len + items.len); - var end = self.list.items.ptr + self.list.items.len; - self.list.items.len += items.len; - @memcpy(end[0..items.len], items); - } - - pub inline fn reset( - self: *MutableString, - ) void { - self.list.clearRetainingCapacity(); - } - - pub inline fn resetTo( - self: *MutableString, - index: usize, - ) void { - bun.assert(index <= self.list.capacity); - self.list.items.len = index; - } - - pub fn inflate(self: *MutableString, amount: usize) !void { - try self.list.resize(self.allocator, amount); - } - - pub inline fn appendCharNTimes(self: *MutableString, char: u8, n: usize) !void { - try self.list.appendNTimes(self.allocator, char, n); - } - - pub inline fn appendChar(self: *MutableString, char: u8) !void { - try self.list.append(self.allocator, char); - } - pub inline fn appendCharAssumeCapacity(self: *MutableString, char: u8) void { - self.list.appendAssumeCapacity(char); - } - pub inline fn append(self: *MutableString, char: []const u8) !void { - try self.list.appendSlice(self.allocator, char); - } - pub inline fn appendInt(self: *MutableString, int: u64) !void { - const count = bun.fmt.fastDigitCount(int); - try self.list.ensureUnusedCapacity(self.allocator, count); - const old = self.list.items.len; - self.list.items.len += count; - bun.assert(count == std.fmt.formatIntBuf(self.list.items.ptr[old .. old + count], int, 10, .lower, .{})); - } - - pub inline fn appendAssumeCapacity(self: *MutableString, char: []const u8) void { - self.list.appendSliceAssumeCapacity( - char, - ); - } - pub inline fn lenI(self: *MutableString) i32 { - return @as(i32, @intCast(self.list.items.len)); - } - - pub fn toOwnedSlice(self: *MutableString) string { - return self.list.toOwnedSlice(self.allocator) catch bun.outOfMemory(); // TODO - } - - pub fn slice(self: *MutableString) []u8 { - return self.list.items; - } - - /// Clear the existing value without freeing the memory or shrinking the capacity. - pub fn move(self: *MutableString) []u8 { - const out = self.list.items; - self.list = .{}; - return out; - } - - /// Appends `0` if needed - pub fn sliceWithSentinel(self: *MutableString) [:0]u8 { - if (self.list.items.len > 0 and self.list.items[self.list.items.len - 1] != 0) { - self.list.append( - self.allocator, - 0, - ) catch unreachable; - } - - return self.list.items[0 .. self.list.items.len - 1 :0]; - } - - pub fn toOwnedSliceLength(self: *MutableString, length: usize) string { - self.list.shrinkAndFree(self.allocator, length); - return self.list.toOwnedSlice(self.allocator) catch bun.outOfMemory(); // TODO - } - - pub fn containsChar(self: *const MutableString, char: u8) bool { - return self.indexOfChar(char) != null; - } - - pub fn indexOfChar(self: *const MutableString, char: u8) ?u32 { - return strings.indexOfChar(self.list.items, char); - } - - pub fn lastIndexOfChar(self: *const MutableString, char: u8) ?usize { - return strings.lastIndexOfChar(self.list.items, char); - } - - pub fn lastIndexOf(self: *const MutableString, str: u8) ?usize { - return strings.lastIndexOfChar(self.list.items, str); - } - - pub fn indexOf(self: *const MutableString, str: u8) ?usize { - return std.mem.indexOf(u8, self.list.items, str); - } - - pub fn eql(self: *MutableString, other: anytype) bool { - return std.mem.eql(u8, self.list.items, other); - } - - pub fn toSocketBuffers(self: *MutableString, comptime count: usize, ranges: anytype) [count]std.posix.iovec_const { - var buffers: [count]std.posix.iovec_const = undefined; - inline for (&buffers, ranges) |*b, r| { - b.* = .{ - .iov_base = self.list.items[r[0]..r[1]].ptr, - .iov_len = self.list.items[r[0]..r[1]].len, - }; - } - return buffers; - } - - pub const BufferedWriter = struct { - context: *MutableString, - buffer: [max]u8 = undefined, - pos: usize = 0, - - const max = 2048; - - pub const Writer = std.io.Writer(*BufferedWriter, OOM, BufferedWriter.writeAll); - - inline fn remain(this: *BufferedWriter) []u8 { - return this.buffer[this.pos..]; - } - - pub fn flush(this: *BufferedWriter) OOM!void { - _ = try this.context.writeAll(this.buffer[0..this.pos]); - this.pos = 0; - } - - pub fn writeAll(this: *BufferedWriter, bytes: []const u8) OOM!usize { - const pending = bytes; - - if (pending.len >= max) { - try this.flush(); - try this.context.append(pending); - return pending.len; - } - - if (pending.len > 0) { - if (pending.len + this.pos > max) { - try this.flush(); - } - @memcpy(this.remain()[0..pending.len], pending); - this.pos += pending.len; - } - - return pending.len; - } - - const E = bun.JSAst.E; - - /// Write a E.String to the buffer. - /// This automatically encodes UTF-16 into UTF-8 using - /// the same code path as TextEncoder - pub fn writeString(this: *BufferedWriter, bytes: *E.String) OOM!usize { - if (bytes.isUTF8()) { - return try this.writeAll(bytes.slice(this.context.allocator)); - } - - return try this.writeAll16(bytes.slice16()); - } - - /// Write a UTF-16 string to the (UTF-8) buffer - /// This automatically encodes UTF-16 into UTF-8 using - /// the same code path as TextEncoder - pub fn writeAll16(this: *BufferedWriter, bytes: []const u16) OOM!usize { - const pending = bytes; - - if (pending.len >= max) { - try this.flush(); - try this.context.list.ensureUnusedCapacity(this.context.allocator, bytes.len * 2); - const decoded = strings.copyUTF16IntoUTF8( - this.remain()[0 .. bytes.len * 2], - []const u16, - bytes, - true, - ); - this.context.list.items.len += @as(usize, decoded.written); - return pending.len; - } - - if (pending.len > 0) { - if ((pending.len * 2) + this.pos > max) { - try this.flush(); - } - const decoded = strings.copyUTF16IntoUTF8( - this.remain()[0 .. bytes.len * 2], - []const u16, - bytes, - true, - ); - this.pos += @as(usize, decoded.written); - } - - return pending.len; - } - - pub fn writeHTMLAttributeValueString(this: *BufferedWriter, str: *E.String) OOM!void { - if (str.isUTF8()) { - try this.writeHTMLAttributeValue(str.slice(this.context.allocator)); - return; - } - - try this.writeHTMLAttributeValue16(str.slice16()); - } - - pub fn writeHTMLAttributeValue(this: *BufferedWriter, bytes: []const u8) OOM!void { - var items = bytes; - while (items.len > 0) { - // TODO: SIMD - if (strings.indexOfAny(items, "\"<>")) |j| { - _ = try this.writeAll(items[0..j]); - _ = switch (items[j]) { - '"' => try this.writeAll("""), - '<' => try this.writeAll("<"), - '>' => try this.writeAll(">"), - else => unreachable, - }; - - items = items[j + 1 ..]; - continue; - } - - _ = try this.writeAll(items); - break; - } - } - - pub fn writeHTMLAttributeValue16(this: *BufferedWriter, bytes: []const u16) OOM!void { - var items = bytes; - while (items.len > 0) { - if (strings.indexOfAny16(items, "\"<>")) |j| { - // this won't handle strings larger than 4 GB - // that's fine though, 4 GB of SSR'd HTML is quite a lot... - _ = try this.writeAll16(items[0..j]); - _ = switch (items[j]) { - '"' => try this.writeAll("""), - '<' => try this.writeAll("<"), - '>' => try this.writeAll(">"), - else => unreachable, - }; - - items = items[j + 1 ..]; - continue; - } - - _ = try this.writeAll16(items); - break; - } - } - - pub fn writer(this: *BufferedWriter) BufferedWriter.Writer { - return BufferedWriter.Writer{ .context = this }; - } - }; - - pub fn writeAll(self: *MutableString, bytes: string) OOM!usize { - try self.list.appendSlice(self.allocator, bytes); - return bytes.len; - } -}; diff --git a/src/string_types.zig b/src/string_types.zig index 7e6a8f9767..c3f615d3c2 100644 --- a/src/string_types.zig +++ b/src/string_types.zig @@ -1,309 +1,4 @@ -const std = @import("std"); -const BabyList = @import("./baby_list.zig").BabyList; pub const string = []const u8; pub const stringZ = [:0]const u8; pub const stringMutable = []u8; pub const CodePoint = i32; -const bun = @import("root").bun; -// macOS sets file path limit to 1024 -// Since a pointer on x64 is 64 bits and only 46 bits are used -// We can safely store the entire path slice in a single u64. -pub const PathString = packed struct { - const PathIntLen = std.math.IntFittingRange(0, bun.MAX_PATH_BYTES); - pub const use_small_path_string = @bitSizeOf(usize) - @bitSizeOf(PathIntLen) >= 53; - pub const PathInt = if (use_small_path_string) PathIntLen else usize; - pub const PointerIntType = if (use_small_path_string) u53 else usize; - ptr: PointerIntType = 0, - len: PathInt = 0, - - const JSC = bun.JSC; - - pub fn estimatedSize(this: *const PathString) usize { - return @as(usize, this.len); - } - - pub inline fn slice(this: anytype) string { - @setRuntimeSafety(false); // "cast causes pointer to be null" is fine here. if it is null, the len will be 0. - return @as([*]u8, @ptrFromInt(@as(usize, @intCast(this.ptr))))[0..this.len]; - } - - pub inline fn sliceAssumeZ(this: anytype) stringZ { - @setRuntimeSafety(false); // "cast causes pointer to be null" is fine here. if it is null, the len will be 0. - return @as([*:0]u8, @ptrFromInt(@as(usize, @intCast(this.ptr))))[0..this.len :0]; - } - - pub inline fn init(str: string) @This() { - @setRuntimeSafety(false); // "cast causes pointer to be null" is fine here. if it is null, the len will be 0. - - return .{ - .ptr = @as(PointerIntType, @truncate(@intFromPtr(str.ptr))), - .len = @as(PathInt, @truncate(str.len)), - }; - } - - pub inline fn isEmpty(this: anytype) bool { - return this.len == 0; - } - - pub fn format(self: PathString, comptime _: []const u8, _: std.fmt.FormatOptions, writer: anytype) !void { - try writer.writeAll(self.slice()); - } - - pub const empty = @This(){ .ptr = 0, .len = 0 }; - comptime { - if (!bun.Environment.isWasm) { - if (use_small_path_string and @bitSizeOf(@This()) != 64) { - @compileError("PathString must be 64 bits"); - } else if (!use_small_path_string and @bitSizeOf(@This()) != 128) { - @compileError("PathString must be 128 bits"); - } - } - } -}; - -pub const HashedString = struct { - ptr: [*]const u8, - len: u32, - hash: u32, - - pub const empty = HashedString{ .ptr = @as([*]const u8, @ptrFromInt(0xDEADBEEF)), .len = 0, .hash = 0 }; - - pub fn init(buf: string) HashedString { - return HashedString{ - .ptr = buf.ptr, - .len = @as(u32, @truncate(buf.len)), - .hash = @as(u32, @truncate(bun.hash(buf))), - }; - } - - pub fn initNoHash(buf: string) HashedString { - return HashedString{ - .ptr = buf.ptr, - .len = @as(u32, @truncate(buf.len)), - .hash = 0, - }; - } - - pub fn eql(this: HashedString, other: anytype) bool { - return Eql(this, @TypeOf(other), other); - } - - pub fn Eql(this: HashedString, comptime Other: type, other: Other) bool { - switch (comptime Other) { - HashedString, *HashedString, *const HashedString => { - return ((@max(this.hash, other.hash) > 0 and this.hash == other.hash) or (this.ptr == other.ptr)) and this.len == other.len; - }, - else => { - return @as(usize, this.len) == other.len and @as(u32, @truncate(bun.hash(other[0..other.len]))) == this.hash; - }, - } - } - - pub fn str(this: HashedString) string { - return this.ptr[0..this.len]; - } -}; - -/// This is a string type that stores up to 15 bytes inline on the stack, and heap allocates if it is longer -pub const SmolStr = packed struct { - __len: u32, - cap: u32, - __ptr: [*]u8, - - const Tag: usize = 0x8000000000000000; - const NegatedTag: usize = ~Tag; - - pub fn jsonStringify(self: *const SmolStr, writer: anytype) !void { - try writer.write(self.slice()); - } - - pub const Inlined = packed struct { - data: u120, - __len: u7, - _tag: u1, - - pub fn len(this: Inlined) u8 { - return @intCast(this.__len); - } - - pub fn setLen(this: *Inlined, new_len: u7) void { - this.__len = new_len; - } - - pub fn slice(this: *Inlined) []const u8 { - return this.allChars()[0..this.__len]; - } - - pub fn allChars(this: *Inlined) *[15]u8 { - return @as([*]u8, @ptrCast(@as(*u128, @ptrCast(this))))[0..15]; - } - }; - - comptime { - bun.assert(@sizeOf(SmolStr) == @sizeOf(Inlined)); - } - - pub fn empty() SmolStr { - const inlined = Inlined{ - .data = 0, - .__len = 0, - ._tag = 1, - }; - return SmolStr.fromInlined(inlined); - } - - pub fn len(this: *const SmolStr) u32 { - if (this.isInlined()) { - return @intCast((@intFromPtr(this.__ptr) >> 56) & 0b01111111); - } - - return this.__len; - } - - pub fn ptr(this: *SmolStr) [*]u8 { - return @ptrFromInt(@as(usize, @intFromPtr(this.__ptr)) & NegatedTag); - } - - pub fn ptrConst(this: *const SmolStr) [*]const u8 { - return @ptrFromInt(@as(usize, @intFromPtr(this.__ptr)) & NegatedTag); - } - - pub fn markInlined(this: *SmolStr) void { - this.__ptr = @ptrFromInt(@as(usize, @intFromPtr(this.__ptr)) | Tag); - } - - pub fn markHeap(this: *SmolStr) void { - this.__ptr = @ptrFromInt(@as(usize, @intFromPtr(this.__ptr)) & NegatedTag); - } - - pub fn isInlined(this: *const SmolStr) bool { - return @as(usize, @intFromPtr(this.__ptr)) & Tag != 0; - } - - pub fn toInlined(this: *const SmolStr) Inlined { - var inlined: Inlined = @bitCast(@as(u128, @bitCast(this.*))); - inlined._tag = 1; - return inlined; - } - - pub fn fromBabyList(baby_list: BabyList(u8)) SmolStr { - var smol_str: SmolStr = .{ - .__len = baby_list.len, - .cap = baby_list.cap, - .__ptr = baby_list.ptr, - }; - smol_str.markHeap(); - return smol_str; - } - - pub fn fromInlined(inlined: Inlined) SmolStr { - var smol_str: SmolStr = @bitCast(inlined); - smol_str.markInlined(); - return smol_str; - } - - pub fn fromChar(char: u8) SmolStr { - var inlined = Inlined{ - .data = 0, - .__len = 1, - ._tag = 1, - }; - - inlined.allChars()[0] = char; - inlined.setLen(1); - - return SmolStr.fromInlined(inlined); - } - - pub fn fromSlice(allocator: std.mem.Allocator, values: []const u8) !SmolStr { - if (values.len > 15) { - var baby_list = try BabyList(u8).initCapacity(allocator, values.len); - baby_list.appendSliceAssumeCapacity(values); - return SmolStr.fromBabyList(baby_list); - } - - var inlined = Inlined{ - .data = 0, - .__len = 0, - ._tag = 1, - }; - - if (values.len > 0) { - @memcpy(inlined.allChars()[0..values.len], values[0..values.len]); - inlined.setLen(@intCast(values.len)); - } - - return SmolStr.fromInlined(inlined); - } - - pub fn slice(this: *const SmolStr) []const u8 { - if (this.isInlined()) { - const bytes: [*]const u8 = @ptrCast(this); - return bytes[0..this.len()]; - } - return this.ptrConst()[0..this.__len]; - } - - pub fn appendChar(this: *SmolStr, allocator: std.mem.Allocator, char: u8) !void { - if (this.isInlined()) { - var inlined = this.toInlined(); - if (inlined.len() + 1 > 15) { - var baby_list = try BabyList(u8).initCapacity(allocator, inlined.len() + 1); - baby_list.appendSliceAssumeCapacity(inlined.slice()); - try baby_list.push(allocator, char); - // this.* = SmolStr.fromBabyList(baby_list); - this.__len = baby_list.len; - this.__ptr = baby_list.ptr; - this.cap = baby_list.cap; - this.markHeap(); - return; - } - inlined.allChars()[inlined.len()] = char; - inlined.setLen(@intCast(inlined.len() + 1)); - // this.* = SmolStr.fromInlined(inlined); - this.* = @bitCast(inlined); - this.markInlined(); - return; - } - - var baby_list = BabyList(u8){ - .ptr = this.ptr(), - .len = this.__len, - .cap = this.cap, - }; - try baby_list.push(allocator, char); - - // this.* = SmolStr.fromBabyList(baby_list); - this.__len = baby_list.len; - this.__ptr = baby_list.ptr; - this.cap = baby_list.cap; - return; - } - - pub fn appendSlice(this: *SmolStr, allocator: std.mem.Allocator, values: []const u8) !void { - if (this.isInlined()) { - var inlined = this.toInlined(); - if (inlined.len() + values.len > 15) { - var baby_list = try BabyList(u8).initCapacity(allocator, inlined.len() + values.len); - baby_list.appendSliceAssumeCapacity(inlined.slice()); - baby_list.appendSliceAssumeCapacity(values); - this.* = SmolStr.fromBabyList(baby_list); - return; - } - @memcpy(inlined.allChars()[inlined.len() .. inlined.len() + values.len], values); - inlined.setLen(@intCast(inlined.len() + values.len)); - this.* = SmolStr.fromInlined(inlined); - return; - } - - var baby_list = BabyList(u8){ - .ptr = this.ptr(), - .len = this.__len, - .cap = this.cap, - }; - try baby_list.append(allocator, values); - - this.* = SmolStr.fromBabyList(baby_list); - return; - } -};