apply changes

2026-02-17 06:12:08 +00:00 · 2025-07-28 21:56:23 -07:00
12 changed files with 175 additions and 50 deletions
--- a/src/bun.js/ConsoleObject.zig
+++ b/src/bun.js/ConsoleObject.zig
@@ -693,6 +693,7 @@ pub const FormatOptions = struct {
    single_line: bool = false,
    default_indent: u16 = 0,
    error_display_level: ErrorDisplayLevel = .full,
+    multiline_strings: bool = false,
    pub const ErrorDisplayLevel = enum {
        normal,
        warn,
@@ -824,6 +825,7 @@ pub fn format2(
            .stack_check = bun.StackCheck.init(),
            .can_throw_stack_overflow = true,
            .error_display_level = options.error_display_level,
+            .multiline_strings = options.multiline_strings,
        };
        defer fmt.deinit();
        const tag = try ConsoleObject.Formatter.Tag.get(vals[0], global);
@@ -995,6 +997,7 @@ pub const Formatter = struct {
    /// If ArrayBuffer-like objects contain ascii text, the buffer is printed as a string.
    /// Set true in the error printer so that ShellError prints a more readable message.
    format_buffer_as_text: bool = false,
+    multiline_strings: bool = false,

    pub fn deinit(this: *Formatter) void {
        if (bun.take(&this.map_node)) |node| {
@@ -2111,12 +2114,36 @@ pub const Formatter = struct {
                    defer if (comptime enable_ansi_colors)
                        writer.writeAll(Output.prettyFmt("<r>", true));

-                    if (str.isUTF16()) {
-                        try this.printAs(.JSON, Writer, writer_, value, .StringObject, enable_ansi_colors);
-                        return;
-                    }
+                    switch (str.isUTF16()) {
+                        inline else => |isUTF16| {
+                            const encoding: bun.strings.Encoding = comptime if (isUTF16) .utf16 else .latin1;
+                            const slice = if (isUTF16) str.utf16() else str.latin1();

-                    JSPrinter.writeJSONString(str.latin1(), Writer, writer_, .latin1) catch unreachable;
+                            if (this.multiline_strings and std.mem.indexOfScalar(encoding.Unit(), slice, '\n') != null) {
+                                writer.writeAll("\"");
+                                var lines = std.mem.splitScalar(encoding.Unit(), slice, '\n');
+
+                                if (lines.next()) |line| {
+                                    JSPrinter.writePreQuotedString(encoding, line, Writer, writer_, '"', false, true) catch {};
+                                }
+
+                                while (lines.next()) |line| {
+                                    writer.writeAll("\n");
+                                    this.writeIndent(Writer, writer_) catch {};
+                                    JSPrinter.writePreQuotedString(encoding, line, Writer, writer_, '"', false, true) catch {};
+                                }
+                                writer.writeAll("\"");
+                            } else {
+                                if (isUTF16) {
+                                    try this.printAs(.JSON, Writer, writer_, value, .StringObject, enable_ansi_colors);
+                                    return;
+                                }
+                                writer.writeAll("\"");
+                                JSPrinter.writePreQuotedString(encoding, @ptrCast(slice), Writer, writer_, '"', false, true) catch {};
+                                writer.writeAll("\"");
+                            }
+                        },
+                    }

                    return;
                }
--- a/src/bun.js/RuntimeTranspilerCache.zig
+++ b/src/bun.js/RuntimeTranspilerCache.zig
@@ -12,7 +12,8 @@
 /// Version 13: Hoist `import.meta.require` definition, see #15738
 /// Version 14: Updated global defines table list.
 /// Version 15: Updated global defines table list.
-const expected_version = 15;
+/// Version 16: Emojis in strings are preserved in unicode output (node/web)
+const expected_version = 16;

 const debug = Output.scoped(.cache, false);
 const MINIMUM_CACHE_SIZE = 50 * 1024;
--- a/src/bun.js/test/snapshot.zig
+++ b/src/bun.js/test/snapshot.zig
@@ -422,7 +422,7 @@ pub const Snapshots = struct {
                if (needs_pre_comma) try result_text.appendSlice(", ");
                const result_text_writer = result_text.writer();
                try result_text.appendSlice("`");
-                try bun.js_printer.writePreQuotedString(re_indented, @TypeOf(result_text_writer), result_text_writer, '`', false, false, .utf8);
+                try bun.js_printer.writePreQuotedString(.utf8, re_indented, @TypeOf(result_text_writer), result_text_writer, '`', false, false);
                try result_text.appendSlice("`");

                if (ils.is_added) Jest.runner.?.snapshots.added += 1;
--- a/src/bundler/HTMLImportManifest.zig
+++ b/src/bundler/HTMLImportManifest.zig
@@ -101,7 +101,7 @@ pub fn writeEscapedJSON(index: u32, graph: *const Graph, linker_graph: *const Li
    var bytes = std.ArrayList(u8).init(allocator);
    defer bytes.deinit();
    try write(index, graph, linker_graph, chunks, bytes.writer());
-    try bun.js_printer.writePreQuotedString(bytes.items, @TypeOf(writer), writer, '"', false, true, .utf8);
+    try bun.js_printer.writePreQuotedString(.utf8, bytes.items, @TypeOf(writer), writer, '"', false, true);
 }

 fn escapedJSONFormatter(this: HTMLImportManifest, comptime _: []const u8, _: std.fmt.FormatOptions, writer: anytype) bun.OOM!void {
--- a/src/fmt.zig
+++ b/src/fmt.zig
@@ -231,7 +231,7 @@ const JSONFormatterUTF8 = struct {
        if (self.opts.quote) {
            try bun.js_printer.writeJSONString(self.input, @TypeOf(writer), writer, .utf8);
        } else {
-            try bun.js_printer.writePreQuotedString(self.input, @TypeOf(writer), writer, '"', false, true, .utf8);
+            try bun.js_printer.writePreQuotedString(.utf8, self.input, @TypeOf(writer), writer, '"', false, true);
        }
    }
 };
--- a/src/js_printer.zig
+++ b/src/js_printer.zig
@@ -151,19 +151,19 @@ pub fn quoteForJSON(text: []const u8, output_: MutableString, comptime ascii_onl
    return bytes;
 }

-pub fn writePreQuotedString(text_in: []const u8, comptime Writer: type, writer: Writer, comptime quote_char: u8, comptime ascii_only: bool, comptime json: bool, comptime encoding: strings.Encoding) !void {
-    const text = if (comptime encoding == .utf16) @as([]const u16, @alignCast(std.mem.bytesAsSlice(u16, text_in))) else text_in;
+pub fn writePreQuotedString(comptime encoding: strings.Encoding, text: []const encoding.Unit(), comptime Writer: type, writer: Writer, comptime quote_char: u8, comptime ascii_only: bool, comptime json: bool) !void {
    if (comptime json and quote_char != '"') @compileError("for json, quote_char must be '\"'");
    var i: usize = 0;
    const n: usize = text.len;
    while (i < n) {
-        const width = switch (comptime encoding) {
+        const width: u8 = switch (comptime encoding) {
            .latin1, .ascii => 1,
            .utf8 => strings.wtf8ByteSequenceLengthWithInvalid(text[i]),
-            .utf16 => 1,
+            .utf16 => if (text[i] >= strings.HIGH_SURROGATE_START and text[i] <= strings.HIGH_SURROGATE_END and text.len > i + 1 and //
+                text[i + 1] >= strings.LOW_SURROGATE_START and text[i + 1] <= strings.LOW_SURROGATE_END) 2 else 1,
        };
        const clamped_width = @min(@as(usize, width), n -| i);
-        const c = switch (encoding) {
+        const c: i32 = switch (encoding) {
            .utf8 => strings.decodeWTF8RuneT(
                &switch (clamped_width) {
                    // 0 is not returned by `wtf8ByteSequenceLengthWithInvalid`
@@ -177,20 +177,9 @@ pub fn writePreQuotedString(text_in: []const u8, comptime Writer: type, writer:
                i32,
                0,
            ),
-            .ascii => brk: {
-                std.debug.assert(text[i] <= 0x7F);
-                break :brk text[i];
-            },
-            .latin1 => brk: {
-                if (text[i] <= 0x7F) break :brk text[i];
-                break :brk strings.latin1ToCodepointAssumeNotASCII(text[i], i32);
-            },
-            .utf16 => brk: {
-                // TODO: if this is a part of a surrogate pair, we could parse the whole codepoint in order
-                // to emit it as a single \u{result} rather than two paired \uLOW\uHIGH.
-                // eg: "\u{10334}" will convert to "\uD800\uDF34" without this.
-                break :brk @as(i32, text[i]);
-            },
+            .ascii, .latin1 => text[i],
+            .utf16 => if (text[i] >= strings.HIGH_SURROGATE_START and text[i] <= strings.HIGH_SURROGATE_END and text.len > i + 1 and //
+                text[i + 1] >= strings.LOW_SURROGATE_START and text[i + 1] <= strings.LOW_SURROGATE_END) @bitCast(strings.utf16DecodeSurrogatePair(text[i], text[i + 1])) else text[i],
        };
        if (canPrintWithoutEscape(i32, c, ascii_only)) {
            const remain = text[i + clamped_width ..];
@@ -301,7 +290,7 @@ pub fn writePreQuotedString(text_in: []const u8, comptime Writer: type, writer:
            },

            else => {
-                i += @as(usize, width);
+                i += @as(usize, clamped_width);

                if (c <= 0xFF and !json) {
                    const k = @as(usize, @intCast(c));
@@ -352,13 +341,13 @@ pub fn quoteForJSONBuffer(text: []const u8, bytes: *MutableString, comptime asci

    try bytes.growIfNeeded(estimateLengthForUTF8(text, ascii_only, '"'));
    try bytes.appendChar('"');
-    try writePreQuotedString(text, @TypeOf(writer), writer, '"', ascii_only, true, .utf8);
+    try writePreQuotedString(.utf8, text, @TypeOf(writer), writer, '"', ascii_only, true);
    bytes.appendChar('"') catch unreachable;
 }

 pub fn writeJSONString(input: []const u8, comptime Writer: type, writer: Writer, comptime encoding: strings.Encoding) !void {
    try writer.writeAll("\"");
-    try writePreQuotedString(input, Writer, writer, '"', false, true, encoding);
+    try writePreQuotedString(encoding, input, Writer, writer, '"', false, true);
    try writer.writeAll("\"");
 }

@@ -1553,20 +1542,18 @@ fn NewPrinter(
        pub fn printStringCharactersUTF8(e: *Printer, text: []const u8, quote: u8) void {
            const writer = e.writer.stdWriter();
            (switch (quote) {
-                '\'' => writePreQuotedString(text, @TypeOf(writer), writer, '\'', ascii_only, false, .utf8),
-                '"' => writePreQuotedString(text, @TypeOf(writer), writer, '"', ascii_only, false, .utf8),
-                '`' => writePreQuotedString(text, @TypeOf(writer), writer, '`', ascii_only, false, .utf8),
+                '\'' => writePreQuotedString(.utf8, text, @TypeOf(writer), writer, '\'', ascii_only, false),
+                '"' => writePreQuotedString(.utf8, text, @TypeOf(writer), writer, '"', ascii_only, false),
+                '`' => writePreQuotedString(.utf8, text, @TypeOf(writer), writer, '`', ascii_only, false),
                else => unreachable,
            }) catch |err| switch (err) {};
        }
        pub fn printStringCharactersUTF16(e: *Printer, text: []const u16, quote: u8) void {
-            const slice = std.mem.sliceAsBytes(text);
-
            const writer = e.writer.stdWriter();
            (switch (quote) {
-                '\'' => writePreQuotedString(slice, @TypeOf(writer), writer, '\'', ascii_only, false, .utf16),
-                '"' => writePreQuotedString(slice, @TypeOf(writer), writer, '"', ascii_only, false, .utf16),
-                '`' => writePreQuotedString(slice, @TypeOf(writer), writer, '`', ascii_only, false, .utf16),
+                '\'' => writePreQuotedString(.utf16, text, @TypeOf(writer), writer, '\'', ascii_only, false),
+                '"' => writePreQuotedString(.utf16, text, @TypeOf(writer), writer, '"', ascii_only, false),
+                '`' => writePreQuotedString(.utf16, text, @TypeOf(writer), writer, '`', ascii_only, false),
                else => unreachable,
            }) catch |err| switch (err) {};
        }
--- a/src/string/immutable.zig
+++ b/src/string/immutable.zig
@@ -8,6 +8,15 @@ pub const Encoding = enum {
    utf8,
    latin1,
    utf16,
+
+    pub fn Unit(comptime self: @This()) type {
+        return switch (self) {
+            .ascii => u8,
+            .utf8 => u8,
+            .latin1 => u8,
+            .utf16 => u16,
+        };
+    }
 };

 /// Returned by classification functions that do not discriminate between utf8 and ascii.
@@ -2117,7 +2126,7 @@ fn QuoteEscapeFormat(comptime flags: QuoteEscapeFormatFlags) type {
        data: []const u8,

        pub fn format(self: @This(), comptime _: []const u8, _: std.fmt.FormatOptions, writer: anytype) !void {
-            try bun.js_printer.writePreQuotedString(self.data, @TypeOf(writer), writer, flags.quote_char, false, flags.json, flags.str_encoding);
+            try bun.js_printer.writePreQuotedString(flags.str_encoding, self.data, @TypeOf(writer), writer, flags.quote_char, false, flags.json);
        }
    };
 }
@@ -2262,7 +2271,6 @@ pub const eqlUtf16 = unicode.eqlUtf16;
 pub const isAllASCII = unicode.isAllASCII;
 pub const isValidUTF8 = unicode.isValidUTF8;
 pub const isValidUTF8WithoutSIMD = unicode.isValidUTF8WithoutSIMD;
-pub const latin1ToCodepointAssumeNotASCII = unicode.latin1ToCodepointAssumeNotASCII;
 pub const latin1ToCodepointBytesAssumeNotASCII = unicode.latin1ToCodepointBytesAssumeNotASCII;
 pub const latin1ToCodepointBytesAssumeNotASCII16 = unicode.latin1ToCodepointBytesAssumeNotASCII16;
 pub const literal = unicode.literal;
@@ -2289,6 +2297,11 @@ pub const u16Trail = unicode.u16Trail;
 pub const utf16Codepoint = unicode.utf16Codepoint;
 pub const utf16CodepointWithFFFD = unicode.utf16CodepointWithFFFD;
 pub const utf16EqlString = unicode.utf16EqlString;
+pub const utf16DecodeSurrogatePair = unicode.utf16DecodeSurrogatePair;
+pub const HIGH_SURROGATE_START = unicode.HIGH_SURROGATE_START;
+pub const HIGH_SURROGATE_END = unicode.HIGH_SURROGATE_END;
+pub const LOW_SURROGATE_START = unicode.LOW_SURROGATE_START;
+pub const LOW_SURROGATE_END = unicode.LOW_SURROGATE_END;
 pub const utf8ByteSequenceLength = unicode.utf8ByteSequenceLength;
 pub const utf8ByteSequenceLengthUnsafe = unicode.utf8ByteSequenceLengthUnsafe;
 pub const w = unicode.w;
--- a/src/string/immutable/unicode.zig
+++ b/src/string/immutable/unicode.zig
@@ -1374,6 +1374,17 @@ pub fn utf16CodepointWithFFFD(comptime Type: type, input: Type) UTF16Replacement
    return utf16CodepointWithFFFDAndFirstInputChar(Type, input[0], input);
 }

+pub const HIGH_SURROGATE_START = 0xD800;
+pub const HIGH_SURROGATE_END = 0xDBFF;
+pub const LOW_SURROGATE_START = 0xDC00;
+pub const LOW_SURROGATE_END = 0xDFFF;
+
+pub fn utf16DecodeSurrogatePair(a: u32, b: u32) u32 {
+    bun.assert(a >= HIGH_SURROGATE_START and a <= HIGH_SURROGATE_END);
+    bun.assert(b >= LOW_SURROGATE_START and b <= LOW_SURROGATE_END);
+    return 0x10000 + (((a & 0x03ff) << 10) | (b & 0x03ff));
+}
+
 fn utf16CodepointWithFFFDAndFirstInputChar(comptime Type: type, char: std.meta.Elem(Type), input: Type) UTF16Replacement {
    const c0 = @as(u21, char);

@@ -1401,7 +1412,7 @@ fn utf16CodepointWithFFFDAndFirstInputChar(comptime Type: type, char: std.meta.E
            };
        // return error.ExpectedSecondSurrogateHalf;

-        return .{ .len = 2, .code_point = 0x10000 + (((c0 & 0x03ff) << 10) | (c1 & 0x03ff)) };
+        return .{ .len = 2, .code_point = utf16DecodeSurrogatePair(c0, c1) };
    } else if (c0 & ~@as(u21, 0x03ff) == 0xdc00) {
        // return error.UnexpectedSecondSurrogateHalf;
        return .{ .fail = true, .len = 1, .code_point = unicode_replacement };
@@ -1629,13 +1640,6 @@ pub fn convertUTF16toUTF8InBuffer(
    return buf[0..result];
 }

-pub fn latin1ToCodepointAssumeNotASCII(char: u8, comptime CodePointType: type) CodePointType {
-    return @as(
-        CodePointType,
-        @intCast(latin1ToCodepointBytesAssumeNotASCII16(char)),
-    );
-}
-
 const latin1_to_utf16_conversion_table = [256]u16{
    0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, // 00-07
    0x0008, 0x0009, 0x000A, 0x000B, 0x000C, 0x000D, 0x000E, 0x000F, // 08-0F
--- a/test/bundler/bundler_string.test.ts
+++ b/test/bundler/bundler_string.test.ts
@@ -1,4 +1,4 @@
-import { describe } from "bun:test";
+import { describe, expect } from "bun:test";
 import { dedent, itBundled } from "./expectBundled";

 interface TemplateStringTest {
@@ -89,6 +89,20 @@ const templateStringTests: Record<string, TemplateStringTest> = {
 };

 describe("bundler", () => {
+  // Test for emoji output in bun build
+  itBundled("string/EmojiDirectOutput", {
+    files: {
+      "a.js": `console.log("😀");`,
+    },
+    outfile: "out.js",
+    onAfterBundle(api) {
+      const content = api.readFile("out.js");
+      expect(content).toContain("😀");
+      expect(content).not.toContain("\\ud83d");
+      expect(content).not.toContain("\\ude00");
+    },
+  });
+
  for (const key in templateStringTests) {
    const test = templateStringTests[key];
    if ([test.capture, test.captureRaw, test.print].filter(x => x !== undefined).length !== 1) {
--- a/test/js/bun/test/printing/consolelog.fixture.ts
+++ b/test/js/bun/test/printing/consolelog.fixture.ts
@@ -0,0 +1,6 @@
+console.log("--- begin ---");
+console.log({
+  a: "a",
+  multiline: 'pub fn main() !void {\n    std.log.info("Hello, {s}", .{name});\n}',
+});
+console.log("--- end ---");
--- a/test/js/bun/test/printing/consolelogexample.test.ts
+++ b/test/js/bun/test/printing/consolelogexample.test.ts
@@ -0,0 +1,36 @@
+import { expect, test } from "bun:test";
+import { bunEnv, bunExe } from "harness";
+
+test("Bun.inspect", async () => {
+  expect(Bun.inspect("abc\ndef\nghi")).toMatchInlineSnapshot(`""abc\\ndef\\nghi""`);
+  expect(Bun.inspect({ a: "abc\ndef\nghi" })).toMatchInlineSnapshot(`
+    "{
+      a: "abc\\ndef\\nghi",
+    }"
+  `);
+});
+
+test("console.log output", async () => {
+  const result = Bun.spawn({
+    cmd: [bunExe(), import.meta.dir + "/consolelog.fixture.ts"],
+    stdio: ["inherit", "pipe", "pipe"],
+    env: {
+      ...bunEnv,
+      FORCE_COLOR: "0",
+    },
+  });
+  await result.exited;
+  const stdout = await result.stdout.text();
+  const stderr = await result.stderr.text();
+  expect(stderr).toBe("");
+  expect(result.exitCode).toBe(0);
+  expect(stdout).toMatchInlineSnapshot(`
+    "--- begin ---
+    {
+      a: "a",
+      multiline: "pub fn main() !void {\\n    std.log.info(\\"Hello, {s}\\", .{name});\\n}",
+    }
+    --- end ---
+    "
+  `);
+});
--- a/test/js/bun/test/printing/stringsnapshot.test.ts
+++ b/test/js/bun/test/printing/stringsnapshot.test.ts
@@ -0,0 +1,37 @@
+test("string inline snapshots", () => {
+  expect("inline").toMatchInlineSnapshot(`"inline"`);
+  expect("multi\nline").toMatchInlineSnapshot(`
+    "multi
+    line"
+  `);
+  expect({ key: "inline" }).toMatchInlineSnapshot(`
+    {
+      "key": "inline",
+    }
+  `);
+  expect({ key: "multi\nline", value: "inline" }).toMatchInlineSnapshot(`
+    {
+      "key": 
+    "multi
+    line"
+    ,
+      "value": "inline",
+    }
+  `);
+});
+
+test("bun inspect strings", () => {
+  expect(Bun.inspect("inline")).toMatchInlineSnapshot(`""inline""`);
+  expect(Bun.inspect("multi\nline")).toMatchInlineSnapshot(`""multi\\nline""`);
+  expect(Bun.inspect({ key: "inline" })).toMatchInlineSnapshot(`
+    "{
+      key: "inline",
+    }"
+  `);
+  expect(Bun.inspect({ key: "multi\nline", value: "inline" })).toMatchInlineSnapshot(`
+    "{
+      key: "multi\\nline",
+      value: "inline",
+    }"
+  `);
+});