Compare commits

...

1 Commits

Author SHA1 Message Date
pfg
1932d440df apply changes 2025-07-28 21:56:23 -07:00
12 changed files with 175 additions and 50 deletions

View File

@@ -693,6 +693,7 @@ pub const FormatOptions = struct {
single_line: bool = false,
default_indent: u16 = 0,
error_display_level: ErrorDisplayLevel = .full,
multiline_strings: bool = false,
pub const ErrorDisplayLevel = enum {
normal,
warn,
@@ -824,6 +825,7 @@ pub fn format2(
.stack_check = bun.StackCheck.init(),
.can_throw_stack_overflow = true,
.error_display_level = options.error_display_level,
.multiline_strings = options.multiline_strings,
};
defer fmt.deinit();
const tag = try ConsoleObject.Formatter.Tag.get(vals[0], global);
@@ -995,6 +997,7 @@ pub const Formatter = struct {
/// If ArrayBuffer-like objects contain ascii text, the buffer is printed as a string.
/// Set true in the error printer so that ShellError prints a more readable message.
format_buffer_as_text: bool = false,
multiline_strings: bool = false,
pub fn deinit(this: *Formatter) void {
if (bun.take(&this.map_node)) |node| {
@@ -2111,12 +2114,36 @@ pub const Formatter = struct {
defer if (comptime enable_ansi_colors)
writer.writeAll(Output.prettyFmt("<r>", true));
if (str.isUTF16()) {
try this.printAs(.JSON, Writer, writer_, value, .StringObject, enable_ansi_colors);
return;
}
switch (str.isUTF16()) {
inline else => |isUTF16| {
const encoding: bun.strings.Encoding = comptime if (isUTF16) .utf16 else .latin1;
const slice = if (isUTF16) str.utf16() else str.latin1();
JSPrinter.writeJSONString(str.latin1(), Writer, writer_, .latin1) catch unreachable;
if (this.multiline_strings and std.mem.indexOfScalar(encoding.Unit(), slice, '\n') != null) {
writer.writeAll("\"");
var lines = std.mem.splitScalar(encoding.Unit(), slice, '\n');
if (lines.next()) |line| {
JSPrinter.writePreQuotedString(encoding, line, Writer, writer_, '"', false, true) catch {};
}
while (lines.next()) |line| {
writer.writeAll("\n");
this.writeIndent(Writer, writer_) catch {};
JSPrinter.writePreQuotedString(encoding, line, Writer, writer_, '"', false, true) catch {};
}
writer.writeAll("\"");
} else {
if (isUTF16) {
try this.printAs(.JSON, Writer, writer_, value, .StringObject, enable_ansi_colors);
return;
}
writer.writeAll("\"");
JSPrinter.writePreQuotedString(encoding, @ptrCast(slice), Writer, writer_, '"', false, true) catch {};
writer.writeAll("\"");
}
},
}
return;
}

View File

@@ -12,7 +12,8 @@
/// Version 13: Hoist `import.meta.require` definition, see #15738
/// Version 14: Updated global defines table list.
/// Version 15: Updated global defines table list.
const expected_version = 15;
/// Version 16: Emojis in strings are preserved in unicode output (node/web)
const expected_version = 16;
const debug = Output.scoped(.cache, false);
const MINIMUM_CACHE_SIZE = 50 * 1024;

View File

@@ -422,7 +422,7 @@ pub const Snapshots = struct {
if (needs_pre_comma) try result_text.appendSlice(", ");
const result_text_writer = result_text.writer();
try result_text.appendSlice("`");
try bun.js_printer.writePreQuotedString(re_indented, @TypeOf(result_text_writer), result_text_writer, '`', false, false, .utf8);
try bun.js_printer.writePreQuotedString(.utf8, re_indented, @TypeOf(result_text_writer), result_text_writer, '`', false, false);
try result_text.appendSlice("`");
if (ils.is_added) Jest.runner.?.snapshots.added += 1;

View File

@@ -101,7 +101,7 @@ pub fn writeEscapedJSON(index: u32, graph: *const Graph, linker_graph: *const Li
var bytes = std.ArrayList(u8).init(allocator);
defer bytes.deinit();
try write(index, graph, linker_graph, chunks, bytes.writer());
try bun.js_printer.writePreQuotedString(bytes.items, @TypeOf(writer), writer, '"', false, true, .utf8);
try bun.js_printer.writePreQuotedString(.utf8, bytes.items, @TypeOf(writer), writer, '"', false, true);
}
fn escapedJSONFormatter(this: HTMLImportManifest, comptime _: []const u8, _: std.fmt.FormatOptions, writer: anytype) bun.OOM!void {

View File

@@ -231,7 +231,7 @@ const JSONFormatterUTF8 = struct {
if (self.opts.quote) {
try bun.js_printer.writeJSONString(self.input, @TypeOf(writer), writer, .utf8);
} else {
try bun.js_printer.writePreQuotedString(self.input, @TypeOf(writer), writer, '"', false, true, .utf8);
try bun.js_printer.writePreQuotedString(.utf8, self.input, @TypeOf(writer), writer, '"', false, true);
}
}
};

View File

@@ -151,19 +151,19 @@ pub fn quoteForJSON(text: []const u8, output_: MutableString, comptime ascii_onl
return bytes;
}
pub fn writePreQuotedString(text_in: []const u8, comptime Writer: type, writer: Writer, comptime quote_char: u8, comptime ascii_only: bool, comptime json: bool, comptime encoding: strings.Encoding) !void {
const text = if (comptime encoding == .utf16) @as([]const u16, @alignCast(std.mem.bytesAsSlice(u16, text_in))) else text_in;
pub fn writePreQuotedString(comptime encoding: strings.Encoding, text: []const encoding.Unit(), comptime Writer: type, writer: Writer, comptime quote_char: u8, comptime ascii_only: bool, comptime json: bool) !void {
if (comptime json and quote_char != '"') @compileError("for json, quote_char must be '\"'");
var i: usize = 0;
const n: usize = text.len;
while (i < n) {
const width = switch (comptime encoding) {
const width: u8 = switch (comptime encoding) {
.latin1, .ascii => 1,
.utf8 => strings.wtf8ByteSequenceLengthWithInvalid(text[i]),
.utf16 => 1,
.utf16 => if (text[i] >= strings.HIGH_SURROGATE_START and text[i] <= strings.HIGH_SURROGATE_END and text.len > i + 1 and //
text[i + 1] >= strings.LOW_SURROGATE_START and text[i + 1] <= strings.LOW_SURROGATE_END) 2 else 1,
};
const clamped_width = @min(@as(usize, width), n -| i);
const c = switch (encoding) {
const c: i32 = switch (encoding) {
.utf8 => strings.decodeWTF8RuneT(
&switch (clamped_width) {
// 0 is not returned by `wtf8ByteSequenceLengthWithInvalid`
@@ -177,20 +177,9 @@ pub fn writePreQuotedString(text_in: []const u8, comptime Writer: type, writer:
i32,
0,
),
.ascii => brk: {
std.debug.assert(text[i] <= 0x7F);
break :brk text[i];
},
.latin1 => brk: {
if (text[i] <= 0x7F) break :brk text[i];
break :brk strings.latin1ToCodepointAssumeNotASCII(text[i], i32);
},
.utf16 => brk: {
// TODO: if this is a part of a surrogate pair, we could parse the whole codepoint in order
// to emit it as a single \u{result} rather than two paired \uLOW\uHIGH.
// eg: "\u{10334}" will convert to "\uD800\uDF34" without this.
break :brk @as(i32, text[i]);
},
.ascii, .latin1 => text[i],
.utf16 => if (text[i] >= strings.HIGH_SURROGATE_START and text[i] <= strings.HIGH_SURROGATE_END and text.len > i + 1 and //
text[i + 1] >= strings.LOW_SURROGATE_START and text[i + 1] <= strings.LOW_SURROGATE_END) @bitCast(strings.utf16DecodeSurrogatePair(text[i], text[i + 1])) else text[i],
};
if (canPrintWithoutEscape(i32, c, ascii_only)) {
const remain = text[i + clamped_width ..];
@@ -301,7 +290,7 @@ pub fn writePreQuotedString(text_in: []const u8, comptime Writer: type, writer:
},
else => {
i += @as(usize, width);
i += @as(usize, clamped_width);
if (c <= 0xFF and !json) {
const k = @as(usize, @intCast(c));
@@ -352,13 +341,13 @@ pub fn quoteForJSONBuffer(text: []const u8, bytes: *MutableString, comptime asci
try bytes.growIfNeeded(estimateLengthForUTF8(text, ascii_only, '"'));
try bytes.appendChar('"');
try writePreQuotedString(text, @TypeOf(writer), writer, '"', ascii_only, true, .utf8);
try writePreQuotedString(.utf8, text, @TypeOf(writer), writer, '"', ascii_only, true);
bytes.appendChar('"') catch unreachable;
}
pub fn writeJSONString(input: []const u8, comptime Writer: type, writer: Writer, comptime encoding: strings.Encoding) !void {
try writer.writeAll("\"");
try writePreQuotedString(input, Writer, writer, '"', false, true, encoding);
try writePreQuotedString(encoding, input, Writer, writer, '"', false, true);
try writer.writeAll("\"");
}
@@ -1553,20 +1542,18 @@ fn NewPrinter(
pub fn printStringCharactersUTF8(e: *Printer, text: []const u8, quote: u8) void {
const writer = e.writer.stdWriter();
(switch (quote) {
'\'' => writePreQuotedString(text, @TypeOf(writer), writer, '\'', ascii_only, false, .utf8),
'"' => writePreQuotedString(text, @TypeOf(writer), writer, '"', ascii_only, false, .utf8),
'`' => writePreQuotedString(text, @TypeOf(writer), writer, '`', ascii_only, false, .utf8),
'\'' => writePreQuotedString(.utf8, text, @TypeOf(writer), writer, '\'', ascii_only, false),
'"' => writePreQuotedString(.utf8, text, @TypeOf(writer), writer, '"', ascii_only, false),
'`' => writePreQuotedString(.utf8, text, @TypeOf(writer), writer, '`', ascii_only, false),
else => unreachable,
}) catch |err| switch (err) {};
}
pub fn printStringCharactersUTF16(e: *Printer, text: []const u16, quote: u8) void {
const slice = std.mem.sliceAsBytes(text);
const writer = e.writer.stdWriter();
(switch (quote) {
'\'' => writePreQuotedString(slice, @TypeOf(writer), writer, '\'', ascii_only, false, .utf16),
'"' => writePreQuotedString(slice, @TypeOf(writer), writer, '"', ascii_only, false, .utf16),
'`' => writePreQuotedString(slice, @TypeOf(writer), writer, '`', ascii_only, false, .utf16),
'\'' => writePreQuotedString(.utf16, text, @TypeOf(writer), writer, '\'', ascii_only, false),
'"' => writePreQuotedString(.utf16, text, @TypeOf(writer), writer, '"', ascii_only, false),
'`' => writePreQuotedString(.utf16, text, @TypeOf(writer), writer, '`', ascii_only, false),
else => unreachable,
}) catch |err| switch (err) {};
}

View File

@@ -8,6 +8,15 @@ pub const Encoding = enum {
utf8,
latin1,
utf16,
pub fn Unit(comptime self: @This()) type {
return switch (self) {
.ascii => u8,
.utf8 => u8,
.latin1 => u8,
.utf16 => u16,
};
}
};
/// Returned by classification functions that do not discriminate between utf8 and ascii.
@@ -2117,7 +2126,7 @@ fn QuoteEscapeFormat(comptime flags: QuoteEscapeFormatFlags) type {
data: []const u8,
pub fn format(self: @This(), comptime _: []const u8, _: std.fmt.FormatOptions, writer: anytype) !void {
try bun.js_printer.writePreQuotedString(self.data, @TypeOf(writer), writer, flags.quote_char, false, flags.json, flags.str_encoding);
try bun.js_printer.writePreQuotedString(flags.str_encoding, self.data, @TypeOf(writer), writer, flags.quote_char, false, flags.json);
}
};
}
@@ -2262,7 +2271,6 @@ pub const eqlUtf16 = unicode.eqlUtf16;
pub const isAllASCII = unicode.isAllASCII;
pub const isValidUTF8 = unicode.isValidUTF8;
pub const isValidUTF8WithoutSIMD = unicode.isValidUTF8WithoutSIMD;
pub const latin1ToCodepointAssumeNotASCII = unicode.latin1ToCodepointAssumeNotASCII;
pub const latin1ToCodepointBytesAssumeNotASCII = unicode.latin1ToCodepointBytesAssumeNotASCII;
pub const latin1ToCodepointBytesAssumeNotASCII16 = unicode.latin1ToCodepointBytesAssumeNotASCII16;
pub const literal = unicode.literal;
@@ -2289,6 +2297,11 @@ pub const u16Trail = unicode.u16Trail;
pub const utf16Codepoint = unicode.utf16Codepoint;
pub const utf16CodepointWithFFFD = unicode.utf16CodepointWithFFFD;
pub const utf16EqlString = unicode.utf16EqlString;
pub const utf16DecodeSurrogatePair = unicode.utf16DecodeSurrogatePair;
pub const HIGH_SURROGATE_START = unicode.HIGH_SURROGATE_START;
pub const HIGH_SURROGATE_END = unicode.HIGH_SURROGATE_END;
pub const LOW_SURROGATE_START = unicode.LOW_SURROGATE_START;
pub const LOW_SURROGATE_END = unicode.LOW_SURROGATE_END;
pub const utf8ByteSequenceLength = unicode.utf8ByteSequenceLength;
pub const utf8ByteSequenceLengthUnsafe = unicode.utf8ByteSequenceLengthUnsafe;
pub const w = unicode.w;

View File

@@ -1374,6 +1374,17 @@ pub fn utf16CodepointWithFFFD(comptime Type: type, input: Type) UTF16Replacement
return utf16CodepointWithFFFDAndFirstInputChar(Type, input[0], input);
}
pub const HIGH_SURROGATE_START = 0xD800;
pub const HIGH_SURROGATE_END = 0xDBFF;
pub const LOW_SURROGATE_START = 0xDC00;
pub const LOW_SURROGATE_END = 0xDFFF;
pub fn utf16DecodeSurrogatePair(a: u32, b: u32) u32 {
bun.assert(a >= HIGH_SURROGATE_START and a <= HIGH_SURROGATE_END);
bun.assert(b >= LOW_SURROGATE_START and b <= LOW_SURROGATE_END);
return 0x10000 + (((a & 0x03ff) << 10) | (b & 0x03ff));
}
fn utf16CodepointWithFFFDAndFirstInputChar(comptime Type: type, char: std.meta.Elem(Type), input: Type) UTF16Replacement {
const c0 = @as(u21, char);
@@ -1401,7 +1412,7 @@ fn utf16CodepointWithFFFDAndFirstInputChar(comptime Type: type, char: std.meta.E
};
// return error.ExpectedSecondSurrogateHalf;
return .{ .len = 2, .code_point = 0x10000 + (((c0 & 0x03ff) << 10) | (c1 & 0x03ff)) };
return .{ .len = 2, .code_point = utf16DecodeSurrogatePair(c0, c1) };
} else if (c0 & ~@as(u21, 0x03ff) == 0xdc00) {
// return error.UnexpectedSecondSurrogateHalf;
return .{ .fail = true, .len = 1, .code_point = unicode_replacement };
@@ -1629,13 +1640,6 @@ pub fn convertUTF16toUTF8InBuffer(
return buf[0..result];
}
pub fn latin1ToCodepointAssumeNotASCII(char: u8, comptime CodePointType: type) CodePointType {
return @as(
CodePointType,
@intCast(latin1ToCodepointBytesAssumeNotASCII16(char)),
);
}
const latin1_to_utf16_conversion_table = [256]u16{
0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, // 00-07
0x0008, 0x0009, 0x000A, 0x000B, 0x000C, 0x000D, 0x000E, 0x000F, // 08-0F

View File

@@ -1,4 +1,4 @@
import { describe } from "bun:test";
import { describe, expect } from "bun:test";
import { dedent, itBundled } from "./expectBundled";
interface TemplateStringTest {
@@ -89,6 +89,20 @@ const templateStringTests: Record<string, TemplateStringTest> = {
};
describe("bundler", () => {
// Test for emoji output in bun build
itBundled("string/EmojiDirectOutput", {
files: {
"a.js": `console.log("😀");`,
},
outfile: "out.js",
onAfterBundle(api) {
const content = api.readFile("out.js");
expect(content).toContain("😀");
expect(content).not.toContain("\\ud83d");
expect(content).not.toContain("\\ude00");
},
});
for (const key in templateStringTests) {
const test = templateStringTests[key];
if ([test.capture, test.captureRaw, test.print].filter(x => x !== undefined).length !== 1) {

View File

@@ -0,0 +1,6 @@
console.log("--- begin ---");
console.log({
a: "a",
multiline: 'pub fn main() !void {\n std.log.info("Hello, {s}", .{name});\n}',
});
console.log("--- end ---");

View File

@@ -0,0 +1,36 @@
import { expect, test } from "bun:test";
import { bunEnv, bunExe } from "harness";
test("Bun.inspect", async () => {
expect(Bun.inspect("abc\ndef\nghi")).toMatchInlineSnapshot(`""abc\\ndef\\nghi""`);
expect(Bun.inspect({ a: "abc\ndef\nghi" })).toMatchInlineSnapshot(`
"{
a: "abc\\ndef\\nghi",
}"
`);
});
test("console.log output", async () => {
const result = Bun.spawn({
cmd: [bunExe(), import.meta.dir + "/consolelog.fixture.ts"],
stdio: ["inherit", "pipe", "pipe"],
env: {
...bunEnv,
FORCE_COLOR: "0",
},
});
await result.exited;
const stdout = await result.stdout.text();
const stderr = await result.stderr.text();
expect(stderr).toBe("");
expect(result.exitCode).toBe(0);
expect(stdout).toMatchInlineSnapshot(`
"--- begin ---
{
a: "a",
multiline: "pub fn main() !void {\\n std.log.info(\\"Hello, {s}\\", .{name});\\n}",
}
--- end ---
"
`);
});

View File

@@ -0,0 +1,37 @@
test("string inline snapshots", () => {
expect("inline").toMatchInlineSnapshot(`"inline"`);
expect("multi\nline").toMatchInlineSnapshot(`
"multi
line"
`);
expect({ key: "inline" }).toMatchInlineSnapshot(`
{
"key": "inline",
}
`);
expect({ key: "multi\nline", value: "inline" }).toMatchInlineSnapshot(`
{
"key":
"multi
line"
,
"value": "inline",
}
`);
});
test("bun inspect strings", () => {
expect(Bun.inspect("inline")).toMatchInlineSnapshot(`""inline""`);
expect(Bun.inspect("multi\nline")).toMatchInlineSnapshot(`""multi\\nline""`);
expect(Bun.inspect({ key: "inline" })).toMatchInlineSnapshot(`
"{
key: "inline",
}"
`);
expect(Bun.inspect({ key: "multi\nline", value: "inline" })).toMatchInlineSnapshot(`
"{
key: "multi\\nline",
value: "inline",
}"
`);
});