From a2f1a87f0d7ca2f18fe444730643183d83ab81fd Mon Sep 17 00:00:00 2001 From: Jarred Sumner Date: Wed, 15 Jan 2025 23:45:33 -0800 Subject: [PATCH] Fix under-reporting string memory usage to GC (#16426) --- src/bake/DevServer.zig | 8 ++--- src/bun.js/api/BunObject.zig | 4 +-- src/bun.js/api/html_rewriter.zig | 4 +-- src/bun.js/api/server.zig | 4 +-- src/bun.js/base.zig | 4 +-- src/bun.js/bindings/BunString.cpp | 54 ++++++++++++++++++++++++++++++- src/bun.js/bindings/bindings.zig | 4 +++ src/bun.js/node/types.zig | 35 +++----------------- src/bun.js/test/expect.zig | 4 +-- src/bun.js/webcore.zig | 5 ++- src/bun.js/webcore/S3File.zig | 3 +- src/js_ast.zig | 9 ++---- src/napi/napi.zig | 15 ++++----- src/string.zig | 40 ++++++++++++++++++----- 14 files changed, 113 insertions(+), 80 deletions(-) diff --git a/src/bake/DevServer.zig b/src/bake/DevServer.zig index 096220b9c7..b3e4ccd353 100644 --- a/src/bake/DevServer.zig +++ b/src/bake/DevServer.zig @@ -761,11 +761,9 @@ fn onRequestWithBundle( // routerTypeMain router_type.server_file_string.get() orelse str: { const name = dev.server_graph.bundled_files.keys()[fromOpaqueFileId(.server, router_type.server_file).get()]; - const str = bun.String.createUTF8(dev.relativePath(name)); - defer str.deref(); - const js = str.toJS(dev.vm.global); - router_type.server_file_string = JSC.Strong.create(js, dev.vm.global); - break :str js; + const str = bun.String.createUTF8ForJS(dev.vm.global, dev.relativePath(name)); + router_type.server_file_string = JSC.Strong.create(str, dev.vm.global); + break :str str; }, // routeModules route_bundle.cached_module_list.get() orelse arr: { diff --git a/src/bun.js/api/BunObject.zig b/src/bun.js/api/BunObject.zig index 5049757365..d542813aa1 100644 --- a/src/bun.js/api/BunObject.zig +++ b/src/bun.js/api/BunObject.zig @@ -4599,9 +4599,7 @@ const InternalTestingAPIs = struct { return globalThis.throwError(err, "Error formatting code"); }; - var str = bun.String.createUTF8(buffer.list.items); - defer str.deref(); - return str.toJS(globalThis); + return bun.String.createUTF8ForJS(globalThis, buffer.list.items); } }; diff --git a/src/bun.js/api/html_rewriter.zig b/src/bun.js/api/html_rewriter.zig index ced09dd55e..4ab5a5c9b1 100644 --- a/src/bun.js/api/html_rewriter.zig +++ b/src/bun.js/api/html_rewriter.zig @@ -1871,9 +1871,7 @@ pub const Element = struct { ) JSValue { if (this.element == null) return JSValue.jsUndefined(); - var str = bun.String.createUTF8(std.mem.span(this.element.?.namespaceURI())); - defer str.deref(); - return str.toJS(globalObject); + return bun.String.createUTF8ForJS(globalObject, std.mem.span(this.element.?.namespaceURI())); } pub fn getAttributes( diff --git a/src/bun.js/api/server.zig b/src/bun.js/api/server.zig index b16ab3fe98..764812ec79 100644 --- a/src/bun.js/api/server.zig +++ b/src/bun.js/api/server.zig @@ -6366,9 +6366,7 @@ pub fn NewServer(comptime NamespaceType: type, comptime ssl_enabled_: bool, comp this: *ThisServer, globalThis: *JSC.JSGlobalObject, ) JSC.JSValue { - var str = bun.String.createUTF8(this.config.id); - defer str.deref(); - return str.toJS(globalThis); + return bun.String.createUTF8ForJS(globalThis, this.config.id); } pub fn getPendingRequests( diff --git a/src/bun.js/base.zig b/src/bun.js/base.zig index 7bc75dc56a..a81413f87b 100644 --- a/src/bun.js/base.zig +++ b/src/bun.js/base.zig @@ -59,9 +59,7 @@ pub fn toJS(globalObject: *JSC.JSGlobalObject, comptime ValueType: type, value: bool => return JSC.JSValue.jsBoolean(if (comptime Type != ValueType) value.* else value), *JSC.JSGlobalObject => return value.toJSValue(), []const u8, [:0]const u8, [*:0]const u8, []u8, [:0]u8, [*:0]u8 => { - const str = bun.String.createUTF8(value); - defer str.deref(); - return str.toJS(globalObject); + return bun.String.createUTF8ForJS(globalObject, value); }, []const bun.String => { defer { diff --git a/src/bun.js/bindings/BunString.cpp b/src/bun.js/bindings/BunString.cpp index 3190415941..0657074dd5 100644 --- a/src/bun.js/bindings/BunString.cpp +++ b/src/bun.js/bindings/BunString.cpp @@ -1,4 +1,5 @@ + #include "helpers.h" #include "root.h" #include "headers-handwritten.h" @@ -34,6 +35,7 @@ #include "JSDOMOperation.h" #include "GCDefferalContext.h" +#include "wtf/StdLibExtras.h" #include "wtf/text/StringImpl.h" #include "wtf/text/StringToIntegerConversion.h" @@ -92,6 +94,52 @@ extern "C" BunString BunString__tryCreateAtom(const char* bytes, size_t length) return { BunStringTag::Dead, {} }; } +extern "C" JSC::EncodedJSValue BunString__createUTF8ForJS(JSC::JSGlobalObject* globalObject, const char* ptr, size_t length) +{ + auto& vm = globalObject->vm(); + auto scope = DECLARE_THROW_SCOPE(vm); + if (simdutf::validate_ascii(ptr, length)) { + std::span destination; + + auto impl = WTF::StringImpl::tryCreateUninitialized(length, destination); + if (UNLIKELY(!impl)) { + throwOutOfMemoryError(globalObject, scope); + return JSC::EncodedJSValue(); + } + std::span source = { reinterpret_cast(ptr), length }; + WTF::memcpySpan(destination, source); + return JSValue::encode(jsString(vm, String(WTFMove(impl)))); + } + + auto str = WTF::String::fromUTF8ReplacingInvalidSequences(std::span { reinterpret_cast(ptr), length }); + if (UNLIKELY(str.isNull())) { + throwOutOfMemoryError(globalObject, scope); + return JSC::EncodedJSValue(); + } + return JSValue::encode(jsString(vm, str)); +} + +extern "C" JSC::EncodedJSValue BunString__transferToJS(BunString* bunString, JSC::JSGlobalObject* globalObject) +{ + auto& vm = globalObject->vm(); + auto scope = DECLARE_THROW_SCOPE(vm); + + if (UNLIKELY(bunString->tag == BunStringTag::Empty || bunString->tag == BunStringTag::Dead)) { + return JSValue::encode(JSC::jsEmptyString(vm)); + } + + if (LIKELY(bunString->tag == BunStringTag::WTFStringImpl)) { + auto str = bunString->toWTFString(); + bunString->impl.wtf->deref(); + *bunString = { .tag = BunStringTag::Dead }; + return JSValue::encode(jsString(vm, WTFMove(str))); + } + + WTF::String str = bunString->toWTFString(); + *bunString = { .tag = BunStringTag::Dead }; + return JSValue::encode(jsString(vm, WTFMove(str))); +} + // int64_t max to say "not a number" extern "C" int64_t BunString__toInt32(BunString* bunString) { @@ -115,7 +163,11 @@ JSC::JSValue toJS(JSC::JSGlobalObject* globalObject, BunString bunString) return JSValue(JSC::jsEmptyString(globalObject->vm())); } if (bunString.tag == BunStringTag::WTFStringImpl) { - ASSERT(bunString.impl.wtf->refCount() > 0 && !bunString.impl.wtf->isEmpty()); +#if ASSERT_ENABLED + unsigned refCount = bunString.impl.wtf->refCount(); + ASSERT(refCount > 0 && !bunString.impl.wtf->isEmpty()); +#endif + return JSValue(jsString(globalObject->vm(), String(bunString.impl.wtf))); } diff --git a/src/bun.js/bindings/bindings.zig b/src/bun.js/bindings/bindings.zig index 8f74b97657..3389c2f85f 100644 --- a/src/bun.js/bindings/bindings.zig +++ b/src/bun.js/bindings/bindings.zig @@ -574,6 +574,10 @@ pub const ZigString = extern struct { } } + pub fn isWTFAllocated(this: *const Slice) bool { + return bun.String.isWTFAllocator(this.allocator.get() orelse return false); + } + pub fn init(allocator: std.mem.Allocator, input: []const u8) Slice { return .{ .ptr = input.ptr, diff --git a/src/bun.js/node/types.zig b/src/bun.js/node/types.zig index c3d84429e3..66d1bad3a7 100644 --- a/src/bun.js/node/types.zig +++ b/src/bun.js/node/types.zig @@ -494,12 +494,7 @@ pub const StringOrBuffer = union(enum) { pub fn toJS(this: *StringOrBuffer, ctx: JSC.C.JSContextRef) JSC.JSValue { return switch (this.*) { inline .threadsafe_string, .string => |*str| { - defer { - str.deinit(); - str.* = .{}; - } - - return str.toJS(ctx); + return str.transferToJS(ctx); }, .encoded_slice => { defer { @@ -507,9 +502,7 @@ pub const StringOrBuffer = union(enum) { this.encoded_slice = .{}; } - const str = bun.String.createUTF8(this.encoded_slice.slice()); - defer str.deref(); - return str.toJS(ctx); + return bun.String.createUTF8ForJS(ctx, this.encoded_slice.slice()); }, .buffer => { if (this.buffer.buffer.value != .zero) { @@ -769,10 +762,9 @@ pub const Encoding = enum(u8) { .base64 => { var base64_buf: [std.base64.standard.Encoder.calcSize(max_size * 4)]u8 = undefined; const encoded_len = bun.base64.encode(&base64_buf, input); - const encoded, const bytes = bun.String.createUninitialized(.latin1, encoded_len); - defer encoded.deref(); + var encoded, const bytes = bun.String.createUninitialized(.latin1, encoded_len); @memcpy(@constCast(bytes), base64_buf[0..encoded_len]); - return encoded.toJS(globalObject); + return encoded.transferToJS(globalObject); }, .base64url => { var buf: [std.base64.url_safe_no_pad.Encoder.calcSize(max_size * 4)]u8 = undefined; @@ -926,25 +918,6 @@ pub const PathLike = union(enum) { return sliceZWithForceCopy(this, buf, false); } - pub fn toJS(this: *const PathLike, globalObject: *JSC.JSGlobalObject) JSC.JSValue { - return switch (this.*) { - .string => this.string.toJS(globalObject, null), - .buffer => this.buffer.toJS(globalObject), - inline .threadsafe_string, .slice_with_underlying_string => |*str| str.toJS(globalObject), - .encoded_slice => |encoded| { - if (this.encoded_slice.allocator.get()) |allocator| { - // Is this a globally-allocated slice? - if (allocator.vtable == bun.default_allocator.vtable) {} - } - - const str = bun.String.createUTF8(encoded.slice()); - defer str.deref(); - return str.toJS(globalObject); - }, - else => unreachable, - }; - } - pub fn fromJS(ctx: JSC.C.JSContextRef, arguments: *ArgumentsSlice) bun.JSError!?PathLike { return fromJSWithAllocator(ctx, arguments, bun.default_allocator); } diff --git a/src/bun.js/test/expect.zig b/src/bun.js/test/expect.zig index f09abb158f..0136fd9ca0 100644 --- a/src/bun.js/test/expect.zig +++ b/src/bun.js/test/expect.zig @@ -5422,9 +5422,7 @@ pub const ExpectMatcherUtils = struct { try buffered_writer.flush(); - const str = bun.String.createUTF8(mutable_string.toOwnedSlice()); - defer str.deref(); - return str.toJS(globalThis); + return bun.String.createUTF8ForJS(globalThis, mutable_string.toOwnedSlice()); } inline fn printValueCatched(globalThis: *JSGlobalObject, value: JSValue, comptime color_or_null: ?[]const u8) JSValue { diff --git a/src/bun.js/webcore.zig b/src/bun.js/webcore.zig index cf53ac74c8..78b9346228 100644 --- a/src/bun.js/webcore.zig +++ b/src/bun.js/webcore.zig @@ -634,13 +634,12 @@ pub const Crypto = struct { globalThis: *JSC.JSGlobalObject, _: *JSC.CallFrame, ) bun.JSError!JSC.JSValue { - const str, var bytes = bun.String.createUninitialized(.latin1, 36); - defer str.deref(); + var str, var bytes = bun.String.createUninitialized(.latin1, 36); const uuid = globalThis.bunVM().rareData().nextUUID(); uuid.print(bytes[0..36]); - return str.toJS(globalThis); + return str.transferToJS(globalThis); } comptime { diff --git a/src/bun.js/webcore/S3File.zig b/src/bun.js/webcore/S3File.zig index e6f34f97fe..7b02077244 100644 --- a/src/bun.js/webcore/S3File.zig +++ b/src/bun.js/webcore/S3File.zig @@ -501,8 +501,7 @@ pub fn getBucket( globalThis: *JSC.JSGlobalObject, ) callconv(JSC.conv) JSValue { if (getBucketName(this)) |name| { - var str = bun.String.createUTF8(name); - return str.transferToJS(globalThis); + return bun.String.createUTF8ForJS(globalThis, name); } return .undefined; } diff --git a/src/js_ast.zig b/src/js_ast.zig index 04d6466e7b..6ce22d81da 100644 --- a/src/js_ast.zig +++ b/src/js_ast.zig @@ -2504,20 +2504,17 @@ pub const E = struct { if (s.isUTF8()) { if (try strings.toUTF16Alloc(allocator, s.slice8(), false, false)) |utf16| { var out, const chars = bun.String.createUninitialized(.utf16, utf16.len); - defer out.deref(); @memcpy(chars, utf16); - return out.toJS(globalObject); + return out.transferToJS(globalObject); } else { var out, const chars = bun.String.createUninitialized(.latin1, s.slice8().len); - defer out.deref(); @memcpy(chars, s.slice8()); - return out.toJS(globalObject); + return out.transferToJS(globalObject); } } else { var out, const chars = bun.String.createUninitialized(.utf16, s.slice16().len); - defer out.deref(); @memcpy(chars, s.slice16()); - return out.toJS(globalObject); + return out.transferToJS(globalObject); } } diff --git a/src/napi/napi.zig b/src/napi/napi.zig index 6ec08af81d..ff738aa8a0 100644 --- a/src/napi/napi.zig +++ b/src/napi/napi.zig @@ -372,13 +372,12 @@ pub export fn napi_create_string_utf8(env: napi_env, str: ?[*]const u8, length: log("napi_create_string_utf8: {s}", .{slice}); - var string = bun.String.createUTF8(slice); - if (string.tag == .Dead) { - return env.genericFailure(); + const globalObject = env.toJS(); + const string = bun.String.createUTF8ForJS(globalObject, slice); + if (globalObject.hasException()) { + return env.setLastError(.pending_exception); } - - defer string.deref(); - result.set(env, string.toJS(env.toJS())); + result.set(env, string); return env.ok(); } pub export fn napi_create_string_utf16(env: napi_env, str: ?[*]const char16_t, length: usize, result_: ?*napi_value) napi_status { @@ -407,11 +406,9 @@ pub export fn napi_create_string_utf16(env: napi_env, str: ?[*]const char16_t, l } var string, const chars = bun.String.createUninitialized(.utf16, slice.len); - defer string.deref(); - @memcpy(chars, slice); - result.set(env, string.toJS(env.toJS())); + result.set(env, string.transferToJS(env.toJS())); return env.ok(); } pub extern fn napi_create_symbol(env: napi_env, description: napi_value, result: *napi_value) napi_status; diff --git a/src/string.zig b/src/string.zig index 183f796c8f..791ea81add 100644 --- a/src/string.zig +++ b/src/string.zig @@ -342,11 +342,10 @@ pub const String = extern struct { if (this.tag == .WTFStringImpl) this.value.WTFStringImpl.ensureHash(); } + extern fn BunString__transferToJS(this: *String, globalThis: *JSC.JSGlobalObject) JSC.JSValue; pub fn transferToJS(this: *String, globalThis: *JSC.JSGlobalObject) JSC.JSValue { - const js_value = this.toJS(globalThis); - this.deref(); - this.* = dead; - return js_value; + JSC.markBinding(@src()); + return BunString__transferToJS(this, globalThis); } pub fn toOwnedSlice(this: String, allocator: std.mem.Allocator) ![]u8 { @@ -1069,6 +1068,12 @@ pub const String = extern struct { extern fn Bun__parseDate(*JSC.JSGlobalObject, *String) f64; extern fn BunString__fromJSRef(globalObject: *JSC.JSGlobalObject, value: bun.JSC.JSValue, out: *String) bool; extern fn BunString__toWTFString(this: *String) void; + extern fn BunString__createUTF8ForJS(globalObject: *JSC.JSGlobalObject, ptr: [*]const u8, len: usize) JSC.JSValue; + + pub fn createUTF8ForJS(globalObject: *JSC.JSGlobalObject, utf8_slice: []const u8) JSC.JSValue { + JSC.markBinding(@src()); + return BunString__createUTF8ForJS(globalObject, utf8_slice.ptr, utf8_slice.len); + } pub fn parseDate(this: *String, globalObject: *JSC.JSGlobalObject) f64 { JSC.markBinding(@src()); @@ -1466,6 +1471,14 @@ pub const SliceWithUnderlyingString = struct { } pub fn toJS(this: *SliceWithUnderlyingString, globalObject: *JSC.JSGlobalObject) JSC.JSValue { + return this.toJSWithOptions(globalObject, false); + } + + pub fn transferToJS(this: *SliceWithUnderlyingString, globalObject: *JSC.JSGlobalObject) JSC.JSValue { + return this.toJSWithOptions(globalObject, true); + } + + fn toJSWithOptions(this: *SliceWithUnderlyingString, globalObject: *JSC.JSGlobalObject, transfer: bool) JSC.JSValue { if ((this.underlying.tag == .Dead or this.underlying.tag == .Empty) and this.utf8.length() > 0) { if (comptime bun.Environment.allow_assert) { if (this.utf8.allocator.get()) |allocator| { @@ -1487,12 +1500,23 @@ pub const SliceWithUnderlyingString = struct { } } - const out = bun.String.createUTF8(this.utf8.slice()); - defer out.deref(); - return out.toJS(globalObject); + defer { + if (transfer) { + this.utf8.deinit(); + this.utf8 = .{}; + } + } + + return String.createUTF8ForJS(globalObject, this.utf8.slice()); } - return this.underlying.toJS(globalObject); + if (transfer) { + this.utf8.deinit(); + this.utf8 = .{}; + return this.underlying.transferToJS(globalObject); + } else { + return this.underlying.toJS(globalObject); + } } };