From d37daeb76ad078d4a2254419cfebbc0602a50670 Mon Sep 17 00:00:00 2001 From: Justin Whear Date: Fri, 17 Feb 2023 14:48:10 -0800 Subject: [PATCH] Fix #2005 (#2096) --- src/js_printer.zig | 49 +++++++++++++++++++++++++++++++++- test/bun.js/repro_2005.test.js | 12 +++++++++ 2 files changed, 60 insertions(+), 1 deletion(-) create mode 100644 test/bun.js/repro_2005.test.js diff --git a/src/js_printer.zig b/src/js_printer.zig index 48e4c62600..ce4138cd4e 100644 --- a/src/js_printer.zig +++ b/src/js_printer.zig @@ -2629,7 +2629,54 @@ pub fn NewPrinter( p.print(" "); } - p.print(e.value); + if (comptime is_bun_platform) { + // Translate any non-ASCII to unicode escape sequences + var ascii_start: usize = 0; + var is_ascii = false; + var iter = CodepointIterator.init(e.value); + var cursor = CodepointIterator.Cursor{}; + while (iter.next(&cursor)) { + switch (cursor.c) { + first_ascii...last_ascii => { + if (!is_ascii) { + ascii_start = cursor.i; + is_ascii = true; + } + }, + else => { + if (is_ascii) { + p.print(e.value[ascii_start..cursor.i]); + is_ascii = false; + } + + switch (cursor.c) { + 0...0xFFFF => { + p.print([_]u8{ + '\\', + 'u', + hex_chars[cursor.c >> 12], + hex_chars[(cursor.c >> 8) & 15], + hex_chars[(cursor.c >> 4) & 15], + hex_chars[cursor.c & 15], + }); + }, + else => { + p.print("\\u{"); + std.fmt.formatInt(cursor.c, 16, .lower, .{}, p) catch unreachable; + p.print("}"); + }, + } + }, + } + } + + if (is_ascii) { + p.print(e.value[ascii_start..]); + } + } else { + // UTF8 sequence is fine + p.print(e.value); + } // Need a space before the next identifier to avoid it turning into flags p.prev_reg_exp_end = p.writer.written; diff --git a/test/bun.js/repro_2005.test.js b/test/bun.js/repro_2005.test.js new file mode 100644 index 0000000000..bd80ab7dd9 --- /dev/null +++ b/test/bun.js/repro_2005.test.js @@ -0,0 +1,12 @@ +import { it, expect } from "bun:test"; + +it("regex literal with non-Latin1 should work", () => { + const text = "这是一段要替换的文字"; + + //Correct results: 这是一段的文字 + expect(text.replace(new RegExp("要替换"), "")).toBe("这是一段的文字"); + + //Incorrect result: 这是一段要替换的文字 + expect(text.replace(/要替换/, "")).toBe("这是一段的文字"); + +});