Compare commits

...

3 Commits

Author SHA1 Message Date
Jarred Sumner
534eeb85e7 Update string_immutable.zig 2025-01-11 18:31:42 -08:00
Jarred Sumner
8b3cf6f777 windows 2025-01-11 18:28:15 -08:00
Jarred Sumner
0f2309c507 Slightly reduce memory usage of IPC 2025-01-11 18:23:20 -08:00
10 changed files with 141 additions and 160 deletions

View File

@@ -338,9 +338,13 @@ pub fn BabyList(comptime Type: type) type {
if (comptime Type != u8)
@compileError("Unsupported for type " ++ @typeName(Type));
const initial = this.len;
const old = this.listManaged(allocator);
const new = try strings.allocateLatin1IntoUTF8WithList(old, old.items.len, []const u8, str);
this.update(new);
var list_ = this.listManaged(allocator);
{
defer this.update(list_);
try strings.allocateLatin1IntoUTF8WithList(&list_, list_.items.len, []const u8, str);
}
return this.len - initial;
}

View File

@@ -484,10 +484,12 @@ pub const ZigString = extern struct {
return try allocator.dupeZ(u8, this.slice());
var list = std.ArrayList(u8).init(allocator);
list = if (this.is16Bit())
try strings.toUTF8ListWithType(list, []const u16, this.utf16SliceAligned())
errdefer list.deinit();
if (this.is16Bit())
try strings.toUTF8ListWithType(&list, []const u16, this.utf16SliceAligned())
else
try strings.allocateLatin1IntoUTF8WithList(list, 0, []const u8, this.slice());
try strings.allocateLatin1IntoUTF8WithList(&list, 0, []const u8, this.slice());
if (list.capacity > list.items.len) {
list.items.ptr[list.items.len] = 0;
@@ -501,10 +503,12 @@ pub const ZigString = extern struct {
return allocator.dupeZ(u8, this.slice());
var list = std.ArrayList(u8).init(allocator);
list = if (this.is16Bit())
try strings.toUTF8ListWithType(list, []const u16, this.utf16SliceAligned())
errdefer list.deinit();
if (this.is16Bit())
try strings.toUTF8ListWithType(&list, []const u16, this.utf16SliceAligned())
else
try strings.allocateLatin1IntoUTF8WithList(list, 0, []const u8, this.slice());
try strings.allocateLatin1IntoUTF8WithList(&list, 0, []const u8, this.slice());
try list.append(0);
return list.items[0 .. list.items.len - 1 :0];

View File

@@ -161,7 +161,7 @@ const advanced = struct {
return payload_length;
}
pub fn serializeInternal(_: *IPCData, writer: anytype, global: *JSC.JSGlobalObject, value: JSValue) !usize {
pub fn serializeInternal(_: *IPCData, writer: *bun.io.StreamBuffer, global: *JSC.JSGlobalObject, value: JSValue) !usize {
const serialized = value.serialize(global) orelse
return IPCSerializationError.SerializationFailed;
defer serialized.deinit();
@@ -246,48 +246,33 @@ const json = struct {
return IPCDecodeError.NotEnoughBytes;
}
pub fn serialize(_: *IPCData, writer: anytype, global: *JSC.JSGlobalObject, value: JSValue) !usize {
pub fn serialize(_: *IPCData, writer: *bun.io.StreamBuffer, global: *JSC.JSGlobalObject, value: JSValue) !usize {
var out: bun.String = undefined;
value.jsonStringify(global, 0, &out);
defer out.deref();
if (out.tag == .Dead) return IPCSerializationError.SerializationFailed;
// TODO: it would be cool to have a 'toUTF8Into' which can write directly into 'ipc_data.outgoing.list'
const str = out.toUTF8(bun.default_allocator);
defer str.deinit();
const initial = writer.list.items.len;
try writer.write(&.{1});
try writer.writeString(out);
try writer.write("\n");
const slice = str.slice();
try writer.ensureUnusedCapacity(1 + slice.len + 1);
writer.writeAssumeCapacity(&.{1});
writer.writeAssumeCapacity(slice);
writer.writeAssumeCapacity("\n");
return 1 + slice.len + 1;
return writer.list.items.len - initial;
}
pub fn serializeInternal(_: *IPCData, writer: anytype, global: *JSC.JSGlobalObject, value: JSValue) !usize {
pub fn serializeInternal(_: *IPCData, writer: *bun.io.StreamBuffer, global: *JSC.JSGlobalObject, value: JSValue) !usize {
var out: bun.String = undefined;
value.jsonStringify(global, 0, &out);
defer out.deref();
if (out.tag == .Dead) return IPCSerializationError.SerializationFailed;
const initial = writer.list.items.len;
try writer.write(&.{2});
try writer.writeString(out);
try writer.write("\n");
// TODO: it would be cool to have a 'toUTF8Into' which can write directly into 'ipc_data.outgoing.list'
const str = out.toUTF8(bun.default_allocator);
defer str.deinit();
const slice = str.slice();
try writer.ensureUnusedCapacity(1 + slice.len + 1);
writer.writeAssumeCapacity(&.{2});
writer.writeAssumeCapacity(slice);
writer.writeAssumeCapacity("\n");
return 1 + slice.len + 1;
return writer.list.items.len - initial;
}
};
@@ -307,7 +292,7 @@ pub fn getVersionPacket(mode: Mode) []const u8 {
/// Given a writer interface, serialize and write a value.
/// Returns true if the value was written, false if it was not.
pub fn serialize(data: *IPCData, writer: anytype, global: *JSC.JSGlobalObject, value: JSValue) !usize {
pub fn serialize(data: *IPCData, writer: *bun.io.StreamBuffer, global: *JSC.JSGlobalObject, value: JSValue) !usize {
return switch (data.mode) {
inline else => |t| @field(@This(), @tagName(t)).serialize(data, writer, global, value),
};
@@ -315,7 +300,7 @@ pub fn serialize(data: *IPCData, writer: anytype, global: *JSC.JSGlobalObject, v
/// Given a writer interface, serialize and write a value.
/// Returns true if the value was written, false if it was not.
pub fn serializeInternal(data: *IPCData, writer: anytype, global: *JSC.JSGlobalObject, value: JSValue) !usize {
pub fn serializeInternal(data: *IPCData, writer: *bun.io.StreamBuffer, global: *JSC.JSGlobalObject, value: JSValue) !usize {
return switch (data.mode) {
inline else => |t| @field(@This(), @tagName(t)).serializeInternal(data, writer, global, value),
};

View File

@@ -1053,6 +1053,10 @@ pub const StreamBuffer = struct {
return this.size() > 0;
}
pub fn writeString(this: *StreamBuffer, str: bun.String) !void {
try str.writeUTF8Into(&this.list);
}
pub fn write(this: *StreamBuffer, buffer: []const u8) !void {
_ = try this.list.appendSlice(buffer);
}

View File

@@ -358,11 +358,12 @@ pub const Archiver = struct {
if (comptime ContextType != void and @hasDecl(std.meta.Child(ContextType), "onFirstDirectoryName")) {
if (appender.needs_first_dirname) {
if (comptime Environment.isWindows) {
const list = std.ArrayList(u8).init(default_allocator);
var result = try strings.toUTF8ListWithType(list, []const u16, pathname[0..pathname.len]);
var list = std.ArrayList(u8).init(default_allocator);
defer list.deinit();
try strings.toUTF8ListWithType(&list, []const u16, pathname[0..pathname.len]);
// onFirstDirectoryName copies the contents of pathname to another buffer, safe to free
defer result.deinit();
appender.onFirstDirectoryName(strings.withoutTrailingSlash(result.items));
appender.onFirstDirectoryName(strings.withoutTrailingSlash(list.items));
} else {
appender.onFirstDirectoryName(strings.withoutTrailingSlash(bun.asByteSlice(pathname)));
}

View File

@@ -3061,26 +3061,7 @@ pub fn NewLexer(comptime encoding: StringEncoding) type {
fn appendStringToStrPool(self: *@This(), bunstr: bun.String) !void {
const start = self.strpool.items.len;
if (bunstr.isUTF16()) {
const utf16 = bunstr.utf16();
const additional = bun.simdutf.simdutf__utf8_length_from_utf16le(utf16.ptr, utf16.len);
try self.strpool.ensureUnusedCapacity(additional);
try bun.strings.convertUTF16ToUTF8Append(&self.strpool, bunstr.utf16());
} else if (bunstr.isUTF8()) {
try self.strpool.appendSlice(bunstr.byteSlice());
} else if (bunstr.is8Bit()) {
if (isAllAscii(bunstr.byteSlice())) {
try self.strpool.appendSlice(bunstr.byteSlice());
} else {
const bytes = bunstr.byteSlice();
const non_ascii_idx = bun.strings.firstNonASCII(bytes) orelse 0;
if (non_ascii_idx > 0) {
try self.strpool.appendSlice(bytes[0..non_ascii_idx]);
}
self.strpool = try bun.strings.allocateLatin1IntoUTF8WithList(self.strpool, self.strpool.items.len, []const u8, bytes[non_ascii_idx..]);
}
}
try bunstr.writeUTF8Into(&self.strpool);
const end = self.strpool.items.len;
self.j += @intCast(end - start);
}
@@ -3926,15 +3907,8 @@ pub const ShellSrcBuilder = struct {
return true;
}
}
if (bunstr.isUTF16()) {
try this.appendUTF16Impl(bunstr.utf16());
return true;
}
if (bunstr.isUTF8() or bun.strings.isAllASCII(bunstr.byteSlice())) {
try this.appendUTF8Impl(bunstr.byteSlice());
return true;
}
try this.appendLatin1Impl(bunstr.byteSlice());
try bunstr.writeUTF8Into(this.outbuf);
return true;
}
@@ -3971,7 +3945,7 @@ pub const ShellSrcBuilder = struct {
try this.appendUTF8Impl(latin1[0..non_ascii_idx]);
}
this.outbuf.* = try bun.strings.allocateLatin1IntoUTF8WithList(this.outbuf.*, this.outbuf.items.len, []const u8, latin1);
try bun.strings.allocateLatin1IntoUTF8WithList(this.outbuf, this.outbuf.items.len, []const u8, latin1);
}
pub fn appendJSStrRef(this: *ShellSrcBuilder, bunstr: bun.String) bun.OOM!void {

View File

@@ -961,6 +961,23 @@ pub const String = extern struct {
}
}
pub fn writeUTF8Into(this: String, out: *std.ArrayList(u8)) !void {
if (this.isEmpty())
return;
if (this.isUTF8()) {
try out.appendSlice(this.utf8());
return;
}
if (this.is8Bit()) {
try bun.strings.allocateLatin1IntoUTF8WithList(out, out.items.len, []const u8, this.latin1());
return;
}
try bun.strings.toUTF8AppendToList(out, this.utf16());
}
pub fn toUTF8(this: String, allocator: std.mem.Allocator) ZigString.Slice {
if (this.tag == .WTFStringImpl) {
return this.value.WTFStringImpl.toUTF8(allocator);

View File

@@ -2079,33 +2079,18 @@ pub fn toPathMaybeDir(buf: []u8, utf8: []const u8, comptime add_trailing_lash: b
return buf[0..len :0];
}
pub fn convertUTF16ToUTF8(list_: std.ArrayList(u8), comptime Type: type, utf16: Type) !std.ArrayList(u8) {
var list = list_;
pub fn convertUTF16ToUTF8(list: *std.ArrayList(u8), comptime Type: type, utf16: Type) !void {
const result = bun.simdutf.convert.utf16.to.utf8.with_errors.le(
utf16,
list.items.ptr[0..list.capacity],
list.items.ptr[list.items.len..list.capacity],
);
if (result.status == .surrogate) {
// Slow path: there was invalid UTF-16, so we need to convert it without simdutf.
return toUTF8ListWithTypeBun(&list, Type, utf16, false);
list.* = try toUTF8ListWithTypeBun(list, Type, utf16, false);
return;
}
list.items.len = result.count;
return list;
}
pub fn convertUTF16ToUTF8WithoutInvalidSurrogatePairs(list_: std.ArrayList(u8), comptime Type: type, utf16: Type) !std.ArrayList(u8) {
var list = list_;
const result = bun.simdutf.convert.utf16.to.utf8.with_errors.le(
utf16,
list.items.ptr[0..list.capacity],
);
if (result.status == .surrogate) {
return error.SurrogatePair;
}
list.items.len = result.count;
return list;
list.items.len += result.count;
}
pub fn convertUTF16ToUTF8Append(list: *std.ArrayList(u8), utf16: []const u16) !void {
@@ -2123,40 +2108,30 @@ pub fn convertUTF16ToUTF8Append(list: *std.ArrayList(u8), utf16: []const u16) !v
list.items.len += result.count;
}
pub fn toUTF8AllocWithTypeWithoutInvalidSurrogatePairs(allocator: std.mem.Allocator, comptime Type: type, utf16: Type) ![]u8 {
if (bun.FeatureFlags.use_simdutf and comptime Type == []const u16) {
const length = bun.simdutf.length.utf8.from.utf16.le(utf16);
// add 16 bytes of padding for SIMDUTF
var list = try std.ArrayList(u8).initCapacity(allocator, length + 16);
list = try convertUTF16ToUTF8(list, Type, utf16);
return list.items;
}
var list = try std.ArrayList(u8).initCapacity(allocator, utf16.len);
list = try toUTF8ListWithType(list, Type, utf16);
return list.items;
}
// These do the same thing.
pub const toUTF8AllocWithTypeWithoutInvalidSurrogatePairs = toUTF8AllocWithType;
pub fn toUTF8AllocWithType(allocator: std.mem.Allocator, comptime Type: type, utf16: Type) ![]u8 {
if (bun.FeatureFlags.use_simdutf and comptime Type == []const u16) {
const length = bun.simdutf.length.utf8.from.utf16.le(utf16);
// add 16 bytes of padding for SIMDUTF
var list = try std.ArrayList(u8).initCapacity(allocator, length + 16);
list = try convertUTF16ToUTF8(list, Type, utf16);
errdefer list.deinit();
try convertUTF16ToUTF8(&list, Type, utf16);
return list.items;
}
var list = try std.ArrayList(u8).initCapacity(allocator, utf16.len);
list = try toUTF8ListWithType(list, Type, utf16);
var list = std.ArrayList(u8).init(allocator);
errdefer list.deinit();
list = try toUTF8ListWithType(&list, Type, utf16);
return list.items;
}
pub fn toUTF8ListWithType(list_: std.ArrayList(u8), comptime Type: type, utf16: Type) !std.ArrayList(u8) {
pub fn toUTF8ListWithType(list: *std.ArrayList(u8), comptime Type: type, utf16: Type) !void {
if (bun.FeatureFlags.use_simdutf and comptime Type == []const u16) {
var list = list_;
const length = bun.simdutf.length.utf8.from.utf16.le(utf16);
try list.ensureTotalCapacityPrecise(length + 16);
const buf = try convertUTF16ToUTF8(list, Type, utf16);
try convertUTF16ToUTF8(list, Type, utf16);
// Commenting out because `convertUTF16ToUTF8` may convert to WTF-8
// which uses 3 bytes for invalid surrogates, causing the length to not
@@ -2164,8 +2139,7 @@ pub fn toUTF8ListWithType(list_: std.ArrayList(u8), comptime Type: type, utf16:
// if (Environment.allow_assert) {
// bun.unsafeAssert(buf.items.len == length);
// }
return buf;
return;
}
@compileError("not implemented");
@@ -2187,8 +2161,10 @@ pub fn toUTF8FromLatin1(allocator: std.mem.Allocator, latin1: []const u8) !?std.
if (isAllASCII(latin1))
return null;
const list = try std.ArrayList(u8).initCapacity(allocator, latin1.len);
return try allocateLatin1IntoUTF8WithList(list, 0, []const u8, latin1);
var list = try std.ArrayList(u8).initCapacity(allocator, latin1.len);
errdefer list.deinit();
try allocateLatin1IntoUTF8WithList(&list, 0, []const u8, latin1);
return list;
}
pub fn toUTF8FromLatin1Z(allocator: std.mem.Allocator, latin1: []const u8) !?std.ArrayList(u8) {
@@ -2198,10 +2174,11 @@ pub fn toUTF8FromLatin1Z(allocator: std.mem.Allocator, latin1: []const u8) !?std
if (isAllASCII(latin1))
return null;
const list = try std.ArrayList(u8).initCapacity(allocator, latin1.len + 1);
var list1 = try allocateLatin1IntoUTF8WithList(list, 0, []const u8, latin1);
try list1.append(0);
return list1;
var list = try std.ArrayList(u8).initCapacity(allocator, latin1.len + 1);
errdefer list.deinit();
try allocateLatin1IntoUTF8WithList(&list, 0, []const u8, latin1);
try list.append(0);
return list;
}
pub fn toUTF8ListWithTypeBun(list: *std.ArrayList(u8), comptime Type: type, utf16: Type, comptime skip_trailing_replacement: bool) !(if (skip_trailing_replacement) ?u16 else std.ArrayList(u8)) {
@@ -2269,15 +2246,21 @@ pub fn allocateLatin1IntoUTF8(allocator: std.mem.Allocator, comptime Type: type,
return out;
}
const list = try std.ArrayList(u8).initCapacity(allocator, latin1_.len);
var foo = try allocateLatin1IntoUTF8WithList(list, 0, Type, latin1_);
return try foo.toOwnedSlice();
var list = try std.ArrayList(u8).initCapacity(allocator, latin1_.len);
errdefer list.deinit();
try allocateLatin1IntoUTF8WithList(&list, 0, Type, latin1_);
// Large reallocations are expensive and may cause more heap fragmentation.
if (list.items.len > 64 and list.items.len + 64 > list.capacity) {
return list.items;
}
return try list.toOwnedSlice();
}
pub fn allocateLatin1IntoUTF8WithList(list_: std.ArrayList(u8), offset_into_list: usize, comptime Type: type, latin1_: Type) !std.ArrayList(u8) {
pub fn allocateLatin1IntoUTF8WithList(list: *std.ArrayList(u8), offset_into_list: usize, comptime Type: type, latin1_: Type) !void {
var latin1 = latin1_;
var i: usize = offset_into_list;
var list = list_;
try list.ensureUnusedCapacity(latin1.len);
while (latin1.len > 0) {
@@ -2388,8 +2371,6 @@ pub fn allocateLatin1IntoUTF8WithList(list_: std.ArrayList(u8), offset_into_list
}
log("Latin1 {d} -> UTF8 {d}", .{ latin1_.len, i });
return list;
}
pub const UTF16Replacement = struct {

View File

@@ -1 +1 @@
process.send("hello");
process.send(process.argv.at(-1));

View File

@@ -3,38 +3,49 @@ import { describe, expect, it } from "bun:test";
import { bunExe, gcTick } from "harness";
import path from "path";
describe.each(["advanced", "json"])("ipc mode %s", mode => {
it("the subprocess should be defined and the child should send", done => {
gcTick();
const returned_subprocess = spawn([bunExe(), path.join(__dirname, "bun-ipc-child.js")], {
ipc: (message, subProcess) => {
expect(subProcess).toBe(returned_subprocess);
expect(message).toBe("hello");
subProcess.kill();
done();
const messages = [
"ASCII",
// latin1
String.fromCharCode(...("Copyright " + String.fromCharCode(0x00a9) + " 2025").split("").map(a => a.charCodeAt(0))),
// UTF-16
"🌟 Hello from the emoji! ✨",
];
for (const message of messages) {
describe(JSON.stringify(message), () => {
describe.each(["advanced", "json"])("ipc mode %s", mode => {
it("the subprocess should be defined and the child should send", done => {
gcTick();
},
stdio: ["inherit", "inherit", "inherit"],
serialization: mode,
const returned_subprocess = spawn([bunExe(), path.join(__dirname, "bun-ipc-child.js"), message], {
ipc: (reply, subProcess) => {
expect(subProcess).toBe(returned_subprocess);
expect(reply).toBe(message);
subProcess.kill();
done();
gcTick();
},
stdio: ["inherit", "inherit", "inherit"],
serialization: mode,
});
});
it("the subprocess should receive the parent message and respond back", done => {
gcTick();
const childProc = spawn([bunExe(), path.join(__dirname, "bun-ipc-child-respond.js")], {
ipc: (reply, subProcess) => {
expect(reply).toBe(`pong:${message}`);
subProcess.kill();
done();
gcTick();
},
stdio: ["inherit", "inherit", "inherit"],
serialization: mode,
});
childProc.send(message);
gcTick();
});
});
});
it("the subprocess should receive the parent message and respond back", done => {
gcTick();
const parentMessage = "I am your father";
const childProc = spawn([bunExe(), path.join(__dirname, "bun-ipc-child-respond.js")], {
ipc: (message, subProcess) => {
expect(message).toBe(`pong:${parentMessage}`);
subProcess.kill();
done();
gcTick();
},
stdio: ["inherit", "inherit", "inherit"],
serialization: mode,
});
childProc.send(parentMessage);
gcTick();
});
});
}