Compare commits

...

8 Commits

Author SHA1 Message Date
pfg
2217bff362 Merge branch 'main' into pfg/fix-tagged-template-literal 2025-10-17 18:12:48 -07:00
pfg
4eaca8e15b Merge remote-tracking branch 'origin/main' into pfg/fix-tagged-template-literal 2025-10-06 15:37:17 -07:00
pfg
64c623e2e6 Merge branch 'main' into pfg/fix-tagged-template-literal 2025-08-15 14:31:17 -07:00
pfg
bde76b8dac Merge remote-tracking branch 'origin/main' into pfg/fix-tagged-template-literal 2025-08-04 18:42:29 -07:00
pfg
fe732b112c one more cloneUTF8 2025-07-28 15:00:21 -07:00
autofix-ci[bot]
8530c64c2c [autofix.ci] apply automated fixes 2025-07-26 04:47:42 +00:00
pfg
7cdb0bb48b add a regex regression test 2025-07-25 21:44:13 -07:00
pfg
2c6d1abd6e rebase on main 2025-07-25 21:43:51 -07:00
10 changed files with 276 additions and 171 deletions

View File

@@ -778,7 +778,7 @@ pub const AsyncModule = struct {
return ResolvedSource{
.allocator = null,
.source_code = bun.String.cloneLatin1(printer.ctx.getWritten()),
.source_code = bun.String.cloneUTF8(printer.ctx.getWritten()),
.specifier = String.init(specifier),
.source_url = String.init(path.text),
.is_commonjs_module = parse_result.ast.has_commonjs_export_names or parse_result.ast.exports_kind == .cjs,
@@ -1120,7 +1120,7 @@ pub fn transpileSourceCode(
const bytecode_slice = parse_result.already_bundled.bytecodeSlice();
return ResolvedSource{
.allocator = null,
.source_code = bun.String.cloneLatin1(source.contents),
.source_code = bun.String.cloneUTF8(source.contents),
.specifier = input_specifier,
.source_url = input_specifier.createIfDifferent(path.text),
.already_bundled = true,
@@ -1289,7 +1289,7 @@ pub fn transpileSourceCode(
.allocator = null,
.source_code = brk: {
const written = printer.ctx.getWritten();
const result = cache.output_code orelse bun.String.cloneLatin1(written);
const result = cache.output_code orelse bun.String.cloneUTF8(written);
if (written.len > 1024 * 1024 * 2 or jsc_vm.smol) {
printer.ctx.buffer.deinit();
@@ -2516,7 +2516,7 @@ pub const RuntimeTranspilerStore = struct {
const bytecode_slice = parse_result.already_bundled.bytecodeSlice();
this.resolved_source = ResolvedSource{
.allocator = null,
.source_code = bun.String.cloneLatin1(parse_result.source.contents),
.source_code = bun.String.cloneUTF8(parse_result.source.contents),
.already_bundled = true,
.bytecode_cache = if (bytecode_slice.len > 0) bytecode_slice.ptr else null,
.bytecode_cache_size = bytecode_slice.len,
@@ -2576,7 +2576,7 @@ pub const RuntimeTranspilerStore = struct {
const source_code = brk: {
const written = printer.ctx.getWritten();
const result = cache.output_code orelse bun.String.cloneLatin1(written);
const result = cache.output_code orelse bun.String.cloneUTF8(written);
if (written.len > 1024 * 1024 * 2 or vm.smol) {
printer.ctx.buffer.deinit();

View File

@@ -13,7 +13,8 @@
/// Version 14: Updated global defines table list.
/// Version 15: Updated global defines table list.
/// Version 16: Added typeof undefined minification optimization.
const expected_version = 16;
/// Version 17: Emits utf-8 files in rare cases
const expected_version = 17;
const debug = Output.scoped(.cache, .visible);
const MINIMUM_CACHE_SIZE = 50 * 1024;
@@ -631,7 +632,7 @@ pub const RuntimeTranspilerCache = struct {
return;
}
bun.assert(this.entry == null);
const output_code = bun.String.cloneLatin1(output_code_bytes);
const output_code = bun.String.cloneUTF8(output_code_bytes);
this.output_code = output_code;
toFile(this.input_byte_length.?, this.input_hash.?, this.features_hash.?, sourcemap, output_code, this.exports_kind) catch |err| {
@@ -639,7 +640,7 @@ pub const RuntimeTranspilerCache = struct {
return;
};
if (comptime bun.Environment.allow_assert)
debug("put() = {d} bytes", .{output_code.latin1().len});
debug("put() = {d} bytes", .{output_code.length()});
}
};

View File

@@ -5,9 +5,6 @@ const first_high_surrogate = 0xD800;
const first_low_surrogate = 0xDC00;
const last_low_surrogate = 0xDFFF;
/// For support JavaScriptCore
const ascii_only_always_on_unless_minifying = true;
fn formatUnsignedIntegerBetween(comptime len: u16, buf: *[len]u8, val: u64) void {
comptime var i: u16 = len;
var remainder = val;
@@ -26,11 +23,11 @@ pub fn writeModuleId(comptime Writer: type, writer: Writer, module_id: u32) void
std.fmt.formatInt(module_id, 16, .lower, .{}, writer) catch unreachable;
}
pub fn canPrintWithoutEscape(comptime CodePointType: type, c: CodePointType, comptime ascii_only: bool) bool {
pub fn canPrintWithoutEscape(comptime CodePointType: type, c: CodePointType, prefers_ascii: bool) bool {
if (c <= last_ascii) {
return c >= first_ascii and c != '\\' and c != '"' and c != '\'' and c != '`' and c != '$';
} else {
return !ascii_only and c != 0xFEFF and c != 0x2028 and c != 0x2029 and (c < first_high_surrogate or c > last_low_surrogate);
return !prefers_ascii and c != 0xFEFF and c != 0x2028 and c != 0x2029 and (c < first_high_surrogate or c > last_low_surrogate);
}
}
@@ -109,7 +106,7 @@ fn ws(comptime str: []const u8) Whitespacer {
return .{ .normal = Static.with, .minify = Static.without };
}
pub fn estimateLengthForUTF8(input: []const u8, comptime ascii_only: bool, comptime quote_char: u8) usize {
pub fn estimateLengthForUTF8(input: []const u8, comptime prefers_ascii: bool, comptime quote_char: u8) usize {
var remaining = input;
var len: usize = 2; // for quotes
@@ -130,7 +127,7 @@ pub fn estimateLengthForUTF8(input: []const u8, comptime ascii_only: bool, compt
i32,
0,
);
if (canPrintWithoutEscape(i32, c, ascii_only)) {
if (canPrintWithoutEscape(i32, c, prefers_ascii)) {
len += @as(usize, char_len);
} else if (c <= 0xFFFF) {
len += 6;
@@ -145,7 +142,7 @@ pub fn estimateLengthForUTF8(input: []const u8, comptime ascii_only: bool, compt
return len;
}
pub fn writePreQuotedString(text_in: []const u8, comptime Writer: type, writer: Writer, comptime quote_char: u8, comptime ascii_only: bool, comptime json: bool, comptime encoding: strings.Encoding) !void {
pub fn writePreQuotedString(text_in: []const u8, comptime Writer: type, writer: Writer, comptime quote_char: u8, prefers_ascii: bool, comptime json: bool, comptime encoding: strings.Encoding) !void {
const text = if (comptime encoding == .utf16) @as([]const u16, @alignCast(std.mem.bytesAsSlice(u16, text_in))) else text_in;
if (comptime json and quote_char != '"') @compileError("for json, quote_char must be '\"'");
var i: usize = 0;
@@ -183,7 +180,7 @@ pub fn writePreQuotedString(text_in: []const u8, comptime Writer: type, writer:
break :brk @as(i32, text[i]);
},
};
if (canPrintWithoutEscape(i32, c, ascii_only)) {
if (canPrintWithoutEscape(i32, c, prefers_ascii)) {
const remain = text[i + clamped_width ..];
switch (encoding) {
@@ -338,12 +335,12 @@ pub fn writePreQuotedString(text_in: []const u8, comptime Writer: type, writer:
}
}
}
pub fn quoteForJSON(text: []const u8, bytes: *MutableString, comptime ascii_only: bool) !void {
pub fn quoteForJSON(text: []const u8, bytes: *MutableString, comptime prefers_ascii: bool) !void {
const writer = bytes.writer();
try bytes.growIfNeeded(estimateLengthForUTF8(text, ascii_only, '"'));
try bytes.growIfNeeded(estimateLengthForUTF8(text, prefers_ascii, '"'));
try bytes.appendChar('"');
try writePreQuotedString(text, @TypeOf(writer), writer, '"', ascii_only, true, .utf8);
try writePreQuotedString(text, @TypeOf(writer), writer, '"', prefers_ascii, true, .utf8);
bytes.appendChar('"') catch unreachable;
}
@@ -593,7 +590,6 @@ const ImportVariant = enum {
};
fn NewPrinter(
comptime ascii_only: bool,
comptime Writer: type,
comptime rewrite_esm_to_cjs: bool,
comptime is_bun_platform: bool,
@@ -615,6 +611,7 @@ fn NewPrinter(
prev_reg_exp_end: i32 = -1,
call_target: ?Expr.Data = null,
writer: Writer,
prefers_ascii: bool,
has_printed_bundled_import_statement: bool = false,
@@ -1524,9 +1521,9 @@ fn NewPrinter(
pub fn printStringCharactersUTF8(e: *Printer, text: []const u8, quote: u8) void {
const writer = e.writer.stdWriter();
(switch (quote) {
'\'' => writePreQuotedString(text, @TypeOf(writer), writer, '\'', ascii_only, false, .utf8),
'"' => writePreQuotedString(text, @TypeOf(writer), writer, '"', ascii_only, false, .utf8),
'`' => writePreQuotedString(text, @TypeOf(writer), writer, '`', ascii_only, false, .utf8),
'\'' => writePreQuotedString(text, @TypeOf(writer), writer, '\'', e.prefers_ascii, false, .utf8),
'"' => writePreQuotedString(text, @TypeOf(writer), writer, '"', e.prefers_ascii, false, .utf8),
'`' => writePreQuotedString(text, @TypeOf(writer), writer, '`', e.prefers_ascii, false, .utf8),
else => unreachable,
}) catch |err| switch (err) {};
}
@@ -1535,9 +1532,9 @@ fn NewPrinter(
const writer = e.writer.stdWriter();
(switch (quote) {
'\'' => writePreQuotedString(slice, @TypeOf(writer), writer, '\'', ascii_only, false, .utf16),
'"' => writePreQuotedString(slice, @TypeOf(writer), writer, '"', ascii_only, false, .utf16),
'`' => writePreQuotedString(slice, @TypeOf(writer), writer, '`', ascii_only, false, .utf16),
'\'' => writePreQuotedString(slice, @TypeOf(writer), writer, '\'', e.prefers_ascii, false, .utf16),
'"' => writePreQuotedString(slice, @TypeOf(writer), writer, '"', e.prefers_ascii, false, .utf16),
'`' => writePreQuotedString(slice, @TypeOf(writer), writer, '`', e.prefers_ascii, false, .utf16),
else => unreachable,
}) catch |err| switch (err) {};
}
@@ -1880,73 +1877,14 @@ fn NewPrinter(
}
}
pub inline fn canPrintIdentifierUTF16(_: *Printer, name: []const u16) bool {
if (comptime ascii_only or ascii_only_always_on_unless_minifying) {
pub inline fn canPrintIdentifierUTF16(p: *Printer, name: []const u16) bool {
if (p.prefers_ascii) {
return js_lexer.isLatin1Identifier([]const u16, name);
} else {
return js_lexer.isIdentifierUTF16(name);
}
}
fn printRawTemplateLiteral(p: *Printer, bytes: []const u8) void {
if (comptime is_json or !ascii_only) {
p.print(bytes);
return;
}
// Translate any non-ASCII to unicode escape sequences
// Note that this does not correctly handle malformed template literal strings
// template literal strings can contain invalid unicode code points
// and pretty much anything else
//
// we use WTF-8 here, but that's still not good enough.
//
var ascii_start: usize = 0;
var is_ascii = false;
var iter = CodepointIterator.init(bytes);
var cursor = CodepointIterator.Cursor{};
while (iter.next(&cursor)) {
switch (cursor.c) {
// unlike other versions, we only want to mutate > 0x7F
0...last_ascii => {
if (!is_ascii) {
ascii_start = cursor.i;
is_ascii = true;
}
},
else => {
if (is_ascii) {
p.print(bytes[ascii_start..cursor.i]);
is_ascii = false;
}
switch (cursor.c) {
0...0xFFFF => {
p.print([_]u8{
'\\',
'u',
hex_chars[cursor.c >> 12],
hex_chars[(cursor.c >> 8) & 15],
hex_chars[(cursor.c >> 4) & 15],
hex_chars[cursor.c & 15],
});
},
else => {
p.print("\\u{");
std.fmt.formatInt(cursor.c, 16, .lower, .{}, p) catch unreachable;
p.print("}");
},
}
},
}
}
if (is_ascii) {
p.print(bytes[ascii_start..]);
}
}
pub fn printExpr(p: *Printer, expr: Expr, level: Level, in_flags: ExprFlag.Set) void {
var flags = in_flags;
@@ -2772,7 +2710,10 @@ fn NewPrinter(
p.print("`");
switch (e.head) {
.raw => |raw| p.printRawTemplateLiteral(raw),
.raw => |raw| {
if (p.prefers_ascii and !strings.isAllASCII(raw)) p.prefers_ascii = false;
p.print(raw);
},
.cooked => |*cooked| {
if (cooked.isPresent()) {
cooked.resolveRopeIfNeeded(p.options.allocator);
@@ -2786,7 +2727,10 @@ fn NewPrinter(
p.printExpr(part.value, .lowest, ExprFlag.None());
p.print("}");
switch (part.tail) {
.raw => |raw| p.printRawTemplateLiteral(raw),
.raw => |raw| {
if (p.prefers_ascii and !strings.isAllASCII(raw)) p.prefers_ascii = false;
p.print(raw);
},
.cooked => |*cooked| {
if (cooked.isPresent()) {
cooked.resolveRopeIfNeeded(p.options.allocator);
@@ -3149,70 +3093,11 @@ fn NewPrinter(
p.print(" ");
}
if (comptime is_bun_platform) {
// Translate any non-ASCII to unicode escape sequences
var ascii_start: usize = 0;
var is_ascii = false;
var iter = CodepointIterator.init(e.value);
var cursor = CodepointIterator.Cursor{};
while (iter.next(&cursor)) {
switch (cursor.c) {
first_ascii...last_ascii => {
if (!is_ascii) {
ascii_start = cursor.i;
is_ascii = true;
}
},
else => {
if (is_ascii) {
p.print(e.value[ascii_start..cursor.i]);
is_ascii = false;
}
switch (cursor.c) {
0...0xFFFF => {
p.print([_]u8{
'\\',
'u',
hex_chars[cursor.c >> 12],
hex_chars[(cursor.c >> 8) & 15],
hex_chars[(cursor.c >> 4) & 15],
hex_chars[cursor.c & 15],
});
},
else => |c| {
const k = c - 0x10000;
const lo = @as(usize, @intCast(first_high_surrogate + ((k >> 10) & 0x3FF)));
const hi = @as(usize, @intCast(first_low_surrogate + (k & 0x3FF)));
p.print(&[_]u8{
'\\',
'u',
hex_chars[lo >> 12],
hex_chars[(lo >> 8) & 15],
hex_chars[(lo >> 4) & 15],
hex_chars[lo & 15],
'\\',
'u',
hex_chars[hi >> 12],
hex_chars[(hi >> 8) & 15],
hex_chars[(hi >> 4) & 15],
hex_chars[hi & 15],
});
},
}
},
}
}
if (is_ascii) {
p.print(e.value[ascii_start..]);
}
} else {
// UTF8 sequence is fine
p.print(e.value);
// RegExp literals cannot be printed ascii only because they expose a `.source` property
if (p.prefers_ascii and !strings.isAllASCII(e.value)) {
p.prefers_ascii = false;
}
p.print(e.value);
// Need a space before the next identifier to avoid it turning into flags
p.prev_reg_exp_end = p.writer.written;
@@ -4989,7 +4874,7 @@ fn NewPrinter(
}
pub fn printIdentifier(p: *Printer, identifier: string) void {
if (comptime ascii_only) {
if (p.prefers_ascii) {
p.printIdentifierAsciiOnly(identifier);
} else {
p.print(identifier);
@@ -5041,7 +4926,7 @@ fn NewPrinter(
i += 1;
}
if ((comptime ascii_only) and c > last_ascii) {
if (p.prefers_ascii and c > last_ascii) {
switch (c) {
0...0xFFFF => {
p.print(
@@ -5164,6 +5049,7 @@ fn NewPrinter(
opts: Options,
renamer: bun.renamer.Renamer,
source_map_builder: SourceMap.Chunk.Builder,
prefers_ascii: bool,
) Printer {
var printer = Printer{
.import_records = import_records,
@@ -5171,6 +5057,7 @@ fn NewPrinter(
.writer = writer,
.renamer = renamer,
.source_map_builder = source_map_builder,
.prefers_ascii = prefers_ascii,
};
if (comptime generate_source_map) {
// This seems silly to cache but the .items() function apparently costs 1ms according to Instruments.
@@ -5656,7 +5543,7 @@ pub fn printAst(
tree: Ast,
symbols: js_ast.Symbol.Map,
source: *const logger.Source,
comptime ascii_only: bool,
comptime is_bun_platform: bool,
opts: Options,
comptime generate_source_map: bool,
) !usize {
@@ -5735,11 +5622,9 @@ pub fn printAst(
}
const PrinterType = NewPrinter(
ascii_only,
Writer,
false,
// if it's ascii_only, it is also bun
ascii_only,
is_bun_platform,
false,
generate_source_map,
);
@@ -5750,7 +5635,8 @@ pub fn printAst(
tree.import_records.slice(),
opts,
renamer,
getSourceMapBuilder(if (generate_source_map) .lazy else .disable, ascii_only, opts, source, &tree),
getSourceMapBuilder(if (generate_source_map) .lazy else .disable, is_bun_platform, opts, source, &tree),
is_bun_platform,
);
defer {
if (comptime generate_source_map) {
@@ -5826,7 +5712,7 @@ pub fn printJSON(
source: *const logger.Source,
opts: Options,
) !usize {
const PrinterType = NewPrinter(false, Writer, false, false, true, false);
const PrinterType = NewPrinter(Writer, false, false, true, false);
const writer = _writer;
var s_expr = S.SExpr{ .value = expr };
const stmt = Stmt{ .loc = logger.Loc.Empty, .data = .{
@@ -5845,6 +5731,7 @@ pub fn printJSON(
opts,
renamer.toRenamer(),
undefined,
false,
);
var bin_stack_heap = std.heap.stackFallback(1024, bun.default_allocator);
printer.binary_expression_stack = std.ArrayList(PrinterType.BinaryExpressionVisitor).init(bin_stack_heap.get());
@@ -5936,8 +5823,6 @@ pub fn printWithWriterAndPlatform(
bun.crash_handler.current_action = .{ .print = source.path.text };
const PrinterType = NewPrinter(
// if it's bun, it is also ascii_only
is_bun_platform,
Writer,
false,
is_bun_platform,
@@ -5950,6 +5835,7 @@ pub fn printWithWriterAndPlatform(
opts,
renamer,
getSourceMapBuilder(if (generate_source_maps) .eager else .disable, is_bun_platform, opts, source, &ast),
is_bun_platform,
);
printer.was_lazy_export = ast.has_lazy_export;
var bin_stack_heap = std.heap.stackFallback(1024, bun.default_allocator);
@@ -6016,7 +5902,7 @@ pub fn printCommonJS(
tree: Ast,
symbols: js_ast.Symbol.Map,
source: *const logger.Source,
comptime ascii_only: bool,
comptime prefers_ascii: bool,
opts: Options,
comptime generate_source_map: bool,
) !usize {
@@ -6024,7 +5910,7 @@ pub fn printCommonJS(
defer bun.crash_handler.current_action = prev_action;
bun.crash_handler.current_action = .{ .print = source.path.text };
const PrinterType = NewPrinter(ascii_only, Writer, true, false, false, generate_source_map);
const PrinterType = NewPrinter(Writer, true, false, false, generate_source_map);
const writer = _writer;
var renamer = rename.NoOpRenamer.init(symbols, source);
var printer = PrinterType.init(
@@ -6033,6 +5919,7 @@ pub fn printCommonJS(
opts,
renamer.toRenamer(),
getSourceMapBuilder(if (generate_source_map) .lazy else .disable, false, opts, source, &tree),
prefers_ascii,
);
var bin_stack_heap = std.heap.stackFallback(1024, bun.default_allocator);
printer.binary_expression_stack = std.ArrayList(PrinterType.BinaryExpressionVisitor).init(bin_stack_heap.get());

View File

@@ -55,7 +55,7 @@ pub const FieldMessage = union(FieldType) {
return switch (tag) {
.severity => FieldMessage{ .severity = String.cloneUTF8(message) },
// Ignore this one for now.
// .localized_severity => FieldMessage{ .localized_severity = String.createUTF8(message) },
// .localized_severity => FieldMessage{ .localized_severity = String.cloneUTF8(message) },
.code => FieldMessage{ .code = String.cloneUTF8(message) },
.message => FieldMessage{ .message = String.cloneUTF8(message) },
.detail => FieldMessage{ .detail = String.cloneUTF8(message) },

View File

@@ -86,6 +86,17 @@ const templateStringTests: Record<string, TemplateStringTest> = {
StringAddition4: { expr: "`${1}z` + `\u2796${Number(1)}rest`", print: true },
StringAddition5: { expr: "`\u2796${1}z` + `\u2796${Number(1)}rest`", print: true },
StringAddition6: { expr: "`${1}` + '\u2796rest'", print: true },
TaggedTemplate1: { expr: "String.raw`one${'two'}three${'four'}`", print: true },
TaggedTemplate2: { expr: "String.raw`abc`", print: true },
TaggedTemplate3: { expr: "String.raw`\\n`", print: true },
TaggedTemplate4: { expr: "String.raw`\n`", print: true },
TaggedTemplate5: { expr: "String.raw`æ`", print: true },
TaggedTemplate6: { expr: "String.raw`\\xE6`", print: true },
TaggedTemplate7: { expr: 'String.raw`\xE6${"one"}`', print: true },
TaggedTemplate8: { expr: 'String.raw`\u{10334}${"two"}→`', print: true },
TaggedTemplate9: { expr: 'String.raw`\\u{10334}${"two"}→`', print: true },
RegExp1: { expr: "/æ/.source", print: true },
RegExp2: { expr: "/\xE6/.source", print: true },
};
describe("bundler", () => {

View File

@@ -1772,6 +1772,12 @@ console.log(<div {...obj} key="after" />);`),
expectPrintedMin_(`console.log("\\uD800" + "\\uDF34")`, 'console.log("\\uD800" + "\\uDF34")');
});
it("tagged template literal", () => {
expectPrinted_("String.raw`b`", "String.raw`b`");
expectPrinted_("String.raw`\\n`", "String.raw`\\n`");
expectPrinted_("String.raw`\n`", "String.raw`\n`");
});
it("fold string addition", () => {
expectPrinted_(
`

View File

@@ -291,10 +291,7 @@ describe("bunshell", () => {
test("escape unicode", async () => {
const { stdout } = await $`echo \\\\`;
// TODO: Uncomment and replace after unicode in template tags is supported
// expect(stdout.toString("utf8")).toEqual(`\弟\気\n`);
// Set this here for now, because unicode in template tags while using .raw is broken, but should be fixed
expect(stdout.toString("utf8")).toEqual("\\u5F1F\\u6C17\n");
expect(stdout.toString("utf8")).toEqual(`\\\\\n`);
});
/**

View File

@@ -70,7 +70,7 @@ test("constant-folded equals doesn't lie", async () => {
console.log("\"" === '"');
});
test.skip("template literal raw property with unicode in an ascii-only build", async () => {
test("template literal raw property with unicode in an ascii-only build", async () => {
expect(String.raw`你好𐃘\\`).toBe("你好𐃘\\\\");
expect((await $`echo 你好𐃘`.text()).trim()).toBe("你好𐃘");
});

View File

@@ -0,0 +1,34 @@
import { expect, test } from "bun:test";
test("issue #8207 - regex source string replacement with UTF-16 character", () => {
// This tests the case where parsel-js does string replacement on a regex source
// The ¶ character (U+00B6) should be replaceable in the regex source string
const regex = /:(?<name>[-\w\P{ASCII}]+)(?:\((?<argument>*)\))?/gu;
// Get the source and try to replace ¶ with .*
const source = regex.source;
const replaced = source.replace("(?<argument>¶*)", "(?<argument>.*)");
// The replacement should work - the ¶ character should be found and replaced
expect(replaced).not.toBe(source);
expect(replaced).toContain("(?<argument>.*)");
expect(replaced).not.toContain("(?<argument>¶*)");
// Verify the new regex can be created successfully
const newRegex = new RegExp(replaced, "gu");
expect(newRegex).toBeInstanceOf(RegExp);
});
test("issue #8207 - regex with UTF-16 character in source", () => {
// Additional test to ensure the regex itself works correctly
const regex = /:(?<name>[-\w\P{ASCII}]+)(?:\((?<argument>*)\))?/gu;
// Test matching with the original regex
const match1 = ":test(¶¶¶)".match(regex);
expect(match1).toBeTruthy();
expect(match1[0]).toBe(":test(¶¶¶)");
const match2 = ":name".match(regex);
expect(match2).toBeTruthy();
expect(match2[0]).toBe(":name");
});

View File

@@ -0,0 +1,169 @@
import { bunEnv, bunExe, tempDirWithFiles } from "harness";
const expected_stdout = new TextDecoder().decode(
new Uint8Array([195, 166, 226, 132, 162, 229, 188, 159, 230, 176, 151, 240, 159, 145, 139]),
);
const fixture = `console.log(String.raw\`æ™弟気👋\`);`;
const dir = tempDirWithFiles("run directly", {
"fixture.ts": fixture,
"requires_rtc_fixture.ts": fixture + " ".repeat(16 * 1024 * 1024),
});
test("run directly", async () => {
const result = Bun.spawnSync({
cmd: [bunExe(), "fixture.ts"],
cwd: dir,
env: bunEnv,
stdio: ["pipe", "pipe", "pipe"],
});
expect({
stdout: result.stdout.toString().trim(),
stderr: result.stderr.toString().trim(),
exitCode: result.exitCode,
}).toEqual({
stdout: expected_stdout,
stderr: "",
exitCode: 0,
});
});
test("build js then run", async () => {
const result_built = Bun.spawnSync({
cmd: [bunExe(), "build", "--target", "bun", "--outfile", "build/fixture.js", "fixture.ts"],
cwd: dir,
env: bunEnv,
stdio: ["inherit", "inherit", "inherit"],
});
expect(result_built.exitCode).toBe(0);
const result = Bun.spawnSync({
cmd: [bunExe(), "build/fixture.js"],
cwd: dir,
env: bunEnv,
stdio: ["pipe", "pipe", "pipe"],
});
expect({
stdout: result.stdout.toString().trim(),
stderr: result.stderr.toString().trim(),
exitCode: result.exitCode,
}).toEqual({
stdout: expected_stdout,
stderr: "",
exitCode: 0,
});
});
test("build min js then run", async () => {
const result_built = Bun.spawnSync({
cmd: [bunExe(), "build", "--target", "bun", "--minify", "--outfile", "build/fixture-min.js", "fixture.ts"],
cwd: dir,
env: bunEnv,
stdio: ["inherit", "inherit", "inherit"],
});
expect(result_built.exitCode).toBe(0);
const result = Bun.spawnSync({
cmd: [bunExe(), "build/fixture-min.js"],
cwd: dir,
env: bunEnv,
stdio: ["pipe", "pipe", "pipe"],
});
expect({
stdout: result.stdout.toString().trim(),
stderr: result.stderr.toString().trim(),
exitCode: result.exitCode,
}).toEqual({
stdout: expected_stdout,
stderr: "",
exitCode: 0,
});
});
// It's not clear what the cutoff is to the runtime transpiler cache
// https://github.com/oven-sh/bun/blob/b960677f5f99de7adf7b84fb8b4c8e1a97ff9e55/src/bun.js/RuntimeTranspilerCache.zig#L17
test("run directly (requires rtc)", async () => {
const result = Bun.spawnSync({
cmd: [bunExe(), "requires_rtc_fixture.ts"],
cwd: dir,
env: bunEnv,
stdio: ["pipe", "pipe", "pipe"],
});
expect({
stdout: result.stdout.toString().trim(),
stderr: result.stderr.toString().trim(),
exitCode: result.exitCode,
}).toEqual({
stdout: expected_stdout,
stderr: "",
exitCode: 0,
});
});
test("build js then run (requires rtc)", async () => {
const result_built = Bun.spawnSync({
cmd: [
bunExe(),
"build",
"--target",
"bun",
"--outfile",
"build/requires_rtc_fixture.js",
"requires_rtc_fixture.ts",
],
cwd: dir,
env: bunEnv,
stdio: ["inherit", "inherit", "inherit"],
});
expect(result_built.exitCode).toBe(0);
const result = Bun.spawnSync({
cmd: [bunExe(), "build/requires_rtc_fixture.js"],
cwd: dir,
env: bunEnv,
stdio: ["pipe", "pipe", "pipe"],
});
expect({
stdout: result.stdout.toString().trim(),
stderr: result.stderr.toString().trim(),
exitCode: result.exitCode,
}).toEqual({
stdout: expected_stdout,
stderr: "",
exitCode: 0,
});
});
test("build min js then run (requires rtc)", async () => {
const result_built = Bun.spawnSync({
cmd: [
bunExe(),
"build",
"--target",
"bun",
"--minify",
"--outfile",
"build/requires_rtc_fixture-min.js",
"requires_rtc_fixture.ts",
],
cwd: dir,
env: bunEnv,
stdio: ["inherit", "inherit", "inherit"],
});
expect(result_built.exitCode).toBe(0);
const result = Bun.spawnSync({
cmd: [bunExe(), "build/requires_rtc_fixture-min.js"],
cwd: dir,
env: bunEnv,
stdio: ["pipe", "pipe", "pipe"],
});
expect({
stdout: result.stdout.toString().trim(),
stderr: result.stderr.toString().trim(),
exitCode: result.exitCode,
}).toEqual({
stdout: expected_stdout,
stderr: "",
exitCode: 0,
});
});