fix(repl): validate UTF-8 sequences and harden against malformed input

- Validate continuation bytes (10xxxxxx) in readKey before accepting multi-byte sequences, rejecting malformed UTF-8 at input time - charLenAt: validate buffer bounds and continuation bytes, returning 1 for invalid/truncated sequences to ensure forward progress - charLenBefore: cap backward scan to 4 bytes and validate that the start byte's expected length matches, returning 1 on mismatch - Remove unused Multibyte.fromLen helper - Make test assertion more specific to avoid matching unrelated digits Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
fix(repl): support Unicode/CJK character input in REPL
2026-03-01 04:51:01 +01:00 · 2026-02-28 18:28:08 +00:00 · 2026-02-28 18:14:16 +00:00
10 changed files with 251 additions and 405 deletions
--- a/cmake/targets/BuildLolHtml.cmake
+++ b/cmake/targets/BuildLolHtml.cmake
@@ -4,7 +4,7 @@ register_repository(
  REPOSITORY
    cloudflare/lol-html
  COMMIT
-    e3aa54798602dd27250fafde1b5a66f080046252
+    e9e16dca48dd4a8ffbc77642bc4be60407585f11
 )

 set(LOLHTML_CWD ${VENDOR_PATH}/lolhtml/c-api)
--- a/packages/bun-uws/src/ChunkedEncoding.h
+++ b/packages/bun-uws/src/ChunkedEncoding.h
@@ -36,7 +36,7 @@ namespace uWS {
    constexpr uint64_t STATE_IS_ERROR = ~0ull;//0xFFFFFFFFFFFFFFFF;
    constexpr uint64_t STATE_SIZE_OVERFLOW = 0x0Full << (sizeof(uint64_t) * 8 - 8);//0x0F00000000000000;

-    inline uint64_t chunkSize(uint64_t state) {
+    inline unsigned int chunkSize(uint64_t state) {
        return state & STATE_SIZE_MASK;
    }

@@ -139,7 +139,7 @@ namespace uWS {
        // short read
    }

-    inline void decChunkSize(uint64_t &state, uint64_t by) {
+    inline void decChunkSize(uint64_t &state, unsigned int by) {

        //unsigned int bits = state & STATE_IS_CHUNKED;

@@ -208,7 +208,7 @@ namespace uWS {
            }

            // do we have data to emit all?
-            uint64_t remaining = chunkSize(state);
+            unsigned int remaining = chunkSize(state);
            if (data.length() >= remaining) {
                // emit all but 2 bytes then reset state to 0 and goto beginning
                // not fin
@@ -248,7 +248,7 @@ namespace uWS {
            } else {
                /* We will consume all our input data */
                std::string_view emitSoon;
-                uint64_t size = chunkSize(state);
+                unsigned int size = chunkSize(state);
                size_t len = data.length();
                if (size > 2) {
                    uint64_t maximalAppEmit = size - 2;
@@ -284,7 +284,7 @@ namespace uWS {
                        return std::nullopt;
                    }
                }
-                decChunkSize(state, (uint64_t) len);
+                decChunkSize(state, (unsigned int) len);
                state |= STATE_IS_CHUNKED;
                data.remove_prefix(len);
                if (emitSoon.length()) {
--- a/src/bundler/ParseTask.zig
+++ b/src/bundler/ParseTask.zig
@@ -1219,10 +1219,7 @@ fn runWithSourceCode(
    opts.features.minify_keep_names = transpiler.options.keep_names;
    opts.features.minify_whitespace = transpiler.options.minify_whitespace;
    opts.features.emit_decorator_metadata = task.emit_decorator_metadata;
-    // emitDecoratorMetadata implies legacy/experimental decorators, as it only
-    // makes sense with TypeScript's legacy decorator system (reflect-metadata).
-    // TC39 standard decorators have their own metadata mechanism.
-    opts.features.standard_decorators = !loader.isTypeScript() or !(task.experimental_decorators or task.emit_decorator_metadata);
+    opts.features.standard_decorators = !loader.isTypeScript() or !task.experimental_decorators;
    opts.features.unwrap_commonjs_packages = transpiler.options.unwrap_commonjs_packages;
    opts.features.bundler_feature_flags = transpiler.options.bundler_feature_flags;
    opts.features.hot_module_reloading = output_format == .internal_bake_dev and !source.index.isRuntime();
--- a/src/repl.zig
+++ b/src/repl.zig
@@ -120,9 +120,17 @@ const Key = union(enum) {
    // Regular printable character
    char: u8,

+    // Multi-byte UTF-8 character (2-4 bytes)
+    multibyte: Multibyte,
+
    // Unknown/unhandled
    unknown,

+    const Multibyte = struct {
+        bytes: [4]u8,
+        len: u3,
+    };
+
    pub fn fromByte(byte: u8) Key {
        return switch (byte) {
            1 => .ctrl_a,
@@ -319,7 +327,7 @@ const History = struct {

 const LineEditor = struct {
    buffer: ArrayList(u8),
-    cursor: usize = 0,
+    cursor: usize = 0, // byte position in buffer
    allocator: Allocator,

    pub fn init(allocator: Allocator) LineEditor {
@@ -362,21 +370,66 @@ const LineEditor = struct {
        self.cursor += slice.len;
    }

+    /// Returns the byte length of the UTF-8 character at the given byte position.
+    /// Returns 1 for invalid/truncated sequences to ensure forward progress.
+    fn charLenAt(self: *const LineEditor, pos: usize) usize {
+        if (pos >= self.buffer.items.len) return 0;
+        const seq_len = strings.codepointSize(u8, self.buffer.items[pos]);
+        if (seq_len < 2) return 1; // ASCII or invalid lead byte
+        const len: usize = @as(usize, seq_len);
+        // Validate: enough bytes remain and all continuation bytes are 10xxxxxx
+        if (pos + len > self.buffer.items.len) return 1;
+        for (1..len) |i| {
+            if (self.buffer.items[pos + i] & 0xC0 != 0x80) return 1;
+        }
+        return len;
+    }
+
+    /// Returns the byte length of the UTF-8 character ending at or before the given byte position.
+    /// Returns 1 for invalid sequences to ensure backward progress.
+    fn charLenBefore(self: *const LineEditor, pos: usize) usize {
+        if (pos == 0) return 0;
+        // Walk backward over continuation bytes (10xxxxxx), up to 3 continuation bytes
+        var i = pos;
+        const limit = pos -| 4; // don't scan more than 4 bytes back
+        while (i > limit) {
+            i -= 1;
+            if (self.buffer.items[i] & 0xC0 != 0x80) {
+                // Found a start byte; validate the sequence length matches
+                const expected_len = strings.codepointSize(u8, self.buffer.items[i]);
+                if (expected_len >= 2 and @as(usize, expected_len) == pos - i) {
+                    return pos - i;
+                }
+                // Mismatch: treat as single byte
+                return 1;
+            }
+        }
+        return 1; // fallback: step back one byte
+    }
+
    pub fn deleteChar(self: *LineEditor) void {
        if (self.cursor < self.buffer.items.len) {
-            _ = self.buffer.orderedRemove(self.cursor);
+            const char_len = self.charLenAt(self.cursor);
+            var i: usize = 0;
+            while (i < char_len and self.cursor < self.buffer.items.len) : (i += 1) {
+                _ = self.buffer.orderedRemove(self.cursor);
+            }
        }
    }

    pub fn backspace(self: *LineEditor) void {
        if (self.cursor > 0) {
-            self.cursor -= 1;
-            _ = self.buffer.orderedRemove(self.cursor);
+            const char_len = self.charLenBefore(self.cursor);
+            self.cursor -= char_len;
+            var i: usize = 0;
+            while (i < char_len and self.cursor < self.buffer.items.len) : (i += 1) {
+                _ = self.buffer.orderedRemove(self.cursor);
+            }
        }
    }

    pub fn deleteWord(self: *LineEditor) void {
-        // Delete word forward
+        // Delete word forward — skip whitespace, then non-whitespace
        while (self.cursor < self.buffer.items.len and
            std.ascii.isWhitespace(self.buffer.items[self.cursor]))
        {
@@ -385,23 +438,31 @@ const LineEditor = struct {
        while (self.cursor < self.buffer.items.len and
            !std.ascii.isWhitespace(self.buffer.items[self.cursor]))
        {
-            _ = self.buffer.orderedRemove(self.cursor);
+            const char_len = self.charLenAt(self.cursor);
+            var i: usize = 0;
+            while (i < char_len and self.cursor < self.buffer.items.len) : (i += 1) {
+                _ = self.buffer.orderedRemove(self.cursor);
+            }
        }
    }

    pub fn backspaceWord(self: *LineEditor) void {
-        // Delete word backward
+        // Delete word backward — skip whitespace, then non-whitespace
        while (self.cursor > 0 and
            std.ascii.isWhitespace(self.buffer.items[self.cursor - 1]))
        {
            self.cursor -= 1;
            _ = self.buffer.orderedRemove(self.cursor);
        }
-        while (self.cursor > 0 and
-            !std.ascii.isWhitespace(self.buffer.items[self.cursor - 1]))
-        {
-            self.cursor -= 1;
-            _ = self.buffer.orderedRemove(self.cursor);
+        while (self.cursor > 0) {
+            const start = self.prevCharStart();
+            if (std.ascii.isWhitespace(self.buffer.items[start])) break;
+            const char_len = self.cursor - start;
+            self.cursor = start;
+            var i: usize = 0;
+            while (i < char_len and self.cursor < self.buffer.items.len) : (i += 1) {
+                _ = self.buffer.orderedRemove(self.cursor);
+            }
        }
    }

@@ -419,34 +480,36 @@ const LineEditor = struct {

    pub fn moveLeft(self: *LineEditor) void {
        if (self.cursor > 0) {
-            self.cursor -= 1;
+            self.cursor -= self.charLenBefore(self.cursor);
        }
    }

    pub fn moveRight(self: *LineEditor) void {
        if (self.cursor < self.buffer.items.len) {
-            self.cursor += 1;
+            self.cursor += self.charLenAt(self.cursor);
        }
    }

    pub fn moveWordLeft(self: *LineEditor) void {
+        // Skip whitespace, then skip non-whitespace
        while (self.cursor > 0 and
            std.ascii.isWhitespace(self.buffer.items[self.cursor - 1]))
        {
            self.cursor -= 1;
        }
        while (self.cursor > 0 and
-            !std.ascii.isWhitespace(self.buffer.items[self.cursor - 1]))
+            !std.ascii.isWhitespace(self.buffer.items[self.prevCharStart()]))
        {
-            self.cursor -= 1;
+            self.cursor -= self.charLenBefore(self.cursor);
        }
    }

    pub fn moveWordRight(self: *LineEditor) void {
+        // Skip non-whitespace, then skip whitespace
        while (self.cursor < self.buffer.items.len and
            !std.ascii.isWhitespace(self.buffer.items[self.cursor]))
        {
-            self.cursor += 1;
+            self.cursor += self.charLenAt(self.cursor);
        }
        while (self.cursor < self.buffer.items.len and
            std.ascii.isWhitespace(self.buffer.items[self.cursor]))
@@ -455,6 +518,12 @@ const LineEditor = struct {
        }
    }

+    /// Returns the byte offset of the start of the character before cursor.
+    fn prevCharStart(self: *const LineEditor) usize {
+        if (self.cursor == 0) return 0;
+        return self.cursor - self.charLenBefore(self.cursor);
+    }
+
    pub fn moveToStart(self: *LineEditor) void {
        self.cursor = 0;
    }
@@ -464,18 +533,66 @@ const LineEditor = struct {
    }

    pub fn swap(self: *LineEditor) void {
+        // Swap operates on the two UTF-8 characters around the cursor.
        if (self.cursor > 0 and self.cursor < self.buffer.items.len) {
-            const temp = self.buffer.items[self.cursor - 1];
-            self.buffer.items[self.cursor - 1] = self.buffer.items[self.cursor];
-            self.buffer.items[self.cursor] = temp;
-            self.cursor += 1;
-        } else if (self.cursor > 1 and self.cursor == self.buffer.items.len) {
-            const temp = self.buffer.items[self.cursor - 2];
-            self.buffer.items[self.cursor - 2] = self.buffer.items[self.cursor - 1];
-            self.buffer.items[self.cursor - 1] = temp;
+            const left_len = self.charLenBefore(self.cursor);
+            const right_len = self.charLenAt(self.cursor);
+            const left_start = self.cursor - left_len;
+            const right_end = self.cursor + right_len;
+            if (right_end <= self.buffer.items.len) {
+                // Copy left char to temp
+                var tmp: [4]u8 = undefined;
+                @memcpy(tmp[0..left_len], self.buffer.items[left_start..self.cursor]);
+                // Shift right char into left position
+                std.mem.copyForwards(u8, self.buffer.items[left_start..], self.buffer.items[self.cursor..right_end]);
+                // Copy temp (left char) after right char
+                @memcpy(self.buffer.items[left_start + right_len ..][0..left_len], tmp[0..left_len]);
+                self.cursor = right_end;
+            }
+        } else if (self.cursor > 0 and self.cursor == self.buffer.items.len) {
+            // At end of line: swap the two characters before cursor
+            const right_len = self.charLenBefore(self.cursor);
+            const right_start = self.cursor - right_len;
+            if (right_start > 0) {
+                const left_len = self.charLenBefore(right_start);
+                const left_start = right_start - left_len;
+                var tmp: [4]u8 = undefined;
+                @memcpy(tmp[0..left_len], self.buffer.items[left_start..right_start]);
+                std.mem.copyForwards(u8, self.buffer.items[left_start..], self.buffer.items[right_start..self.cursor]);
+                @memcpy(self.buffer.items[left_start + right_len ..][0..left_len], tmp[0..left_len]);
+            }
        }
    }

+    /// Calculate display width of buffer content up to the given byte position.
+    pub fn displayWidth(self: *const LineEditor, end_pos: usize) usize {
+        var width: usize = 0;
+        var pos: usize = 0;
+        const buf = self.buffer.items;
+        const limit = @min(end_pos, buf.len);
+        while (pos < limit) {
+            const byte_len = strings.codepointSize(u8, buf[pos]);
+            if (byte_len == 0 or pos + byte_len > buf.len) {
+                // Invalid UTF-8 or truncated: treat as 1-wide
+                width += 1;
+                pos += 1;
+            } else if (byte_len == 1) {
+                width += 1;
+                pos += 1;
+            } else {
+                // Pad to 4 bytes for decodeWTF8RuneT
+                var tmp: [4]u8 = .{ 0, 0, 0, 0 };
+                for (0..@as(usize, byte_len)) |i| {
+                    tmp[i] = buf[pos + i];
+                }
+                const cp = strings.decodeWTF8RuneT(&tmp, byte_len, u32, 0xFFFD);
+                width += @as(usize, strings.visibleCodepointWidth(cp, false));
+                pos += @as(usize, byte_len);
+            }
+        }
+        return width;
+    }
+
    pub fn getLine(self: *const LineEditor) []const u8 {
        return self.buffer.items;
    }
@@ -922,6 +1039,20 @@ fn readKey(self: *Repl) ?Key {
        return .escape;
    }

+    // Handle UTF-8 multi-byte sequences
+    const seq_len = strings.codepointSize(u8, byte);
+    if (seq_len >= 2 and seq_len <= 4) {
+        const len: u3 = @intCast(seq_len);
+        var mb = Key.Multibyte{ .bytes = .{ byte, 0, 0, 0 }, .len = len };
+        for (1..seq_len) |i| {
+            const cont = self.readByte() orelse return .unknown;
+            // Validate continuation byte (must be 10xxxxxx)
+            if (cont & 0xC0 != 0x80) return .unknown;
+            mb.bytes[i] = cont;
+        }
+        return .{ .multibyte = mb };
+    }
+
    return Key.fromByte(byte);
 }

@@ -974,8 +1105,9 @@ fn refreshLine(self: *Repl) void {
        self.write(line);
    }

-    // Position cursor
-    const cursor_pos = prompt_len + self.line_editor.cursor;
+    // Position cursor using display width (not byte count)
+    const cursor_display_width = self.line_editor.displayWidth(self.line_editor.cursor);
+    const cursor_pos = prompt_len + cursor_display_width;
    if (cursor_pos < self.terminal_width) {
        self.write("\r");
        if (cursor_pos > 0) {
@@ -1781,6 +1913,10 @@ pub fn runWithVM(self: *Repl, vm: ?*jsc.VirtualMachine) !void {
                self.line_editor.insert(c) catch {};
                self.refreshLine();
            },
+            .multibyte => |mb| {
+                self.line_editor.insertSlice(mb.bytes[0..mb.len]) catch {};
+                self.refreshLine();
+            },
            else => {},
        }
    }
--- a/src/transpiler.zig
+++ b/src/transpiler.zig
@@ -1103,10 +1103,7 @@ pub const Transpiler = struct {
                var opts = js_parser.Parser.Options.init(jsx, loader);

                opts.features.emit_decorator_metadata = this_parse.emit_decorator_metadata;
-                // emitDecoratorMetadata implies legacy/experimental decorators, as it only
-                // makes sense with TypeScript's legacy decorator system (reflect-metadata).
-                // TC39 standard decorators have their own metadata mechanism.
-                opts.features.standard_decorators = !loader.isTypeScript() or !(this_parse.experimental_decorators or this_parse.emit_decorator_metadata);
+                opts.features.standard_decorators = !loader.isTypeScript() or !this_parse.experimental_decorators;
                opts.features.allow_runtime = transpiler.options.allow_runtime;
                opts.features.set_breakpoint_on_first_line = this_parse.set_breakpoint_on_first_line;
                opts.features.trim_unused_imports = transpiler.options.trim_unused_imports orelse loader.isTypeScript();
--- a/src/url.zig
+++ b/src/url.zig
@@ -976,10 +976,7 @@ pub const FormData = struct {
    }

    pub const Field = struct {
-        /// Raw slice into the input buffer. Not using `bun.Semver.String` because
-        /// file bodies are binary data that can contain null bytes, which
-        /// Semver.String's inline storage treats as terminators.
-        value: []const u8 = "",
+        value: bun.Semver.String = .{},
        filename: bun.Semver.String = .{},
        content_type: bun.Semver.String = .{},
        is_file: bool = false,
@@ -1091,7 +1088,7 @@ pub const FormData = struct {
            form: *jsc.DOMFormData,

            pub fn onEntry(wrap: *@This(), name: bun.Semver.String, field: Field, buf: []const u8) void {
-                const value_str = field.value;
+                const value_str = field.value.slice(buf);
                var key = jsc.ZigString.initUTF8(name.slice(buf));

                if (field.is_file) {
@@ -1281,7 +1278,7 @@ pub const FormData = struct {
            if (strings.endsWithComptime(body, "\r\n")) {
                body = body[0 .. body.len - 2];
            }
-            field.value = body;
+            field.value = subslicer.sub(body).value();
            field.filename = filename orelse .{};
            field.is_file = is_file;

--- a/test/js/bun/http/request-smuggling.test.ts
+++ b/test/js/bun/http/request-smuggling.test.ts
@@ -561,152 +561,6 @@ describe("SPILL.TERM - invalid chunk terminators", () => {
  });
 });

-describe("chunked encoding size hardening", () => {
-  test("rejects extremely large chunk size hex values", async () => {
-    // Chunk sizes with many hex digits should be rejected by the overflow check.
-    // 'FFFFFFFFFFFFFFFF' sets bits in the overflow-detection region (bits 56-59),
-    // so the parser must return an error.
-    let bodyReadSucceeded = false;
-
-    await using server = Bun.serve({
-      port: 0,
-      async fetch(req) {
-        try {
-          await req.text();
-          bodyReadSucceeded = true;
-        } catch {
-          // Expected to fail
-        }
-        return new Response("OK");
-      },
-    });
-
-    const client = net.connect(server.port, "127.0.0.1");
-
-    // 16 hex digits all 'F' — sets overflow bits and must be rejected
-    const maliciousRequest =
-      "POST / HTTP/1.1\r\n" +
-      "Host: localhost\r\n" +
-      "Transfer-Encoding: chunked\r\n" +
-      "\r\n" +
-      "FFFFFFFFFFFFFFFF\r\n" +
-      "data\r\n" +
-      "0\r\n" +
-      "\r\n";
-
-    await new Promise<void>(resolve => {
-      let responseData = "";
-      client.on("error", () => resolve());
-      client.on("data", data => {
-        responseData += data.toString();
-      });
-      client.on("close", () => {
-        expect(responseData).toContain("HTTP/1.1 400");
-        expect(bodyReadSucceeded).toBe(false);
-        resolve();
-      });
-      client.write(maliciousRequest);
-    });
-  });
-
-  test("large chunk size exceeding 32 bits does not produce empty body", async () => {
-    // '100000000' hex = 2^32 (4294967296). If the chunk size were truncated
-    // to 32 bits, this would become 0, and the +2 for CRLF would make it
-    // look like the end-of-chunks marker (size=2), producing an empty body.
-    // With correct 64-bit handling, the parser treats this as a large
-    // pending chunk — the body read should fail when we close the connection,
-    // because the server is still expecting ~4GB of data.
-    let receivedBody: string | null = null;
-    let bodyError = false;
-
-    const { promise: headersReceived, resolve: onHeadersReceived } = Promise.withResolvers<void>();
-    const { promise: bodyHandled, resolve: bodyDone } = Promise.withResolvers<void>();
-
-    await using server = Bun.serve({
-      port: 0,
-      async fetch(req) {
-        // Signal that headers have been parsed and the fetch handler entered
-        onHeadersReceived();
-        try {
-          receivedBody = await req.text();
-        } catch {
-          bodyError = true;
-        }
-        bodyDone();
-        return new Response("OK");
-      },
-    });
-
-    const client = net.connect(server.port, "127.0.0.1");
-
-    // Send the chunk header claiming 4GB of data, followed by a few bytes,
-    // then close the connection.
-    const maliciousRequest =
-      "POST / HTTP/1.1\r\n" +
-      "Host: localhost\r\n" +
-      "Transfer-Encoding: chunked\r\n" +
-      "\r\n" +
-      "100000000\r\n" +
-      "AAAA\r\n";
-
-    client.write(maliciousRequest);
-
-    // Wait until the server has parsed headers and entered the fetch handler,
-    // then close the connection to trigger the body error (since we won't send 4GB).
-    await headersReceived;
-    client.end();
-
-    await bodyHandled;
-
-    // With correct 64-bit handling, the body read must fail because we
-    // disconnected before sending 4GB of chunk data.
-    // With truncation to 32-bit zero, the body would be "" with no error.
-    expect(bodyError).toBe(true);
-    expect(receivedBody).toBeNull();
-  });
-
-  test("accepts valid chunk sizes within normal range", async () => {
-    // Normal-sized chunks should still work fine
-    let receivedBody = "";
-
-    await using server = Bun.serve({
-      port: 0,
-      async fetch(req) {
-        receivedBody = await req.text();
-        return new Response("Success");
-      },
-    });
-
-    const client = net.connect(server.port, "127.0.0.1");
-
-    // Use hex chunk sizes that are perfectly valid
-    const validRequest =
-      "POST / HTTP/1.1\r\n" +
-      "Host: localhost\r\n" +
-      "Transfer-Encoding: chunked\r\n" +
-      "\r\n" +
-      "a\r\n" + // 10 bytes
-      "0123456789\r\n" +
-      "FF\r\n" + // 255 bytes
-      Buffer.alloc(255, "A").toString() +
-      "\r\n" +
-      "0\r\n" +
-      "\r\n";
-
-    await new Promise<void>((resolve, reject) => {
-      client.on("error", reject);
-      client.on("data", data => {
-        const response = data.toString();
-        expect(response).toContain("HTTP/1.1 200");
-        expect(receivedBody).toBe("0123456789" + Buffer.alloc(255, "A").toString());
-        client.end();
-        resolve();
-      });
-      client.write(validRequest);
-    });
-  });
-});
-
 // Tests for strict RFC 7230 HEXDIG validation in chunk size parsing.
 // Chunk sizes must only contain characters from the set [0-9a-fA-F].
 // Non-HEXDIG characters must be rejected to ensure consistent parsing
--- a/test/regression/issue/27478.test.ts
+++ b/test/regression/issue/27478.test.ts
@@ -1,120 +0,0 @@
-import { expect, test } from "bun:test";
-
-// https://github.com/oven-sh/bun/issues/27478
-// Request.formData() truncates small binary files at first null byte
-test("multipart formdata preserves null bytes in small binary files", async () => {
-  const boundary = "----bun-null-byte-boundary";
-  const source = Buffer.from([0x1f, 0x8b, 0x08, 0x00]);
-  const payload = Buffer.concat([
-    Buffer.from(
-      `--${boundary}\r\n` +
-        `Content-Disposition: form-data; name="file"; filename="test.bin"\r\n` +
-        `Content-Type: application/octet-stream\r\n\r\n`,
-      "utf8",
-    ),
-    source,
-    Buffer.from(`\r\n--${boundary}--\r\n`, "utf8"),
-  ]);
-
-  const request = new Request("http://localhost/", {
-    method: "POST",
-    headers: { "content-type": `multipart/form-data; boundary=${boundary}` },
-    body: payload,
-  });
-
-  const form = await request.formData();
-  const file = form.get("file");
-  expect(file).toBeInstanceOf(File);
-
-  const parsed = new Uint8Array(await (file as File).arrayBuffer());
-  expect(Array.from(parsed)).toEqual(Array.from(source));
-  expect(parsed.byteLength).toBe(source.byteLength);
-});
-
-test("multipart formdata preserves files that are all null bytes", async () => {
-  const boundary = "----bun-test-boundary";
-  const source = Buffer.from([0x00, 0x00, 0x00, 0x00]);
-  const payload = Buffer.concat([
-    Buffer.from(
-      `--${boundary}\r\n` +
-        `Content-Disposition: form-data; name="file"; filename="zeros.bin"\r\n` +
-        `Content-Type: application/octet-stream\r\n\r\n`,
-      "utf8",
-    ),
-    source,
-    Buffer.from(`\r\n--${boundary}--\r\n`, "utf8"),
-  ]);
-
-  const request = new Request("http://localhost/", {
-    method: "POST",
-    headers: { "content-type": `multipart/form-data; boundary=${boundary}` },
-    body: payload,
-  });
-
-  const form = await request.formData();
-  const file = form.get("file");
-  expect(file).toBeInstanceOf(File);
-
-  const parsed = new Uint8Array(await (file as File).arrayBuffer());
-  expect(Array.from(parsed)).toEqual([0x00, 0x00, 0x00, 0x00]);
-  expect(parsed.byteLength).toBe(4);
-});
-
-test("multipart formdata preserves single null byte file", async () => {
-  const boundary = "----bun-test-boundary";
-  const source = Buffer.from([0x00]);
-  const payload = Buffer.concat([
-    Buffer.from(
-      `--${boundary}\r\n` +
-        `Content-Disposition: form-data; name="file"; filename="null.bin"\r\n` +
-        `Content-Type: application/octet-stream\r\n\r\n`,
-      "utf8",
-    ),
-    source,
-    Buffer.from(`\r\n--${boundary}--\r\n`, "utf8"),
-  ]);
-
-  const request = new Request("http://localhost/", {
-    method: "POST",
-    headers: { "content-type": `multipart/form-data; boundary=${boundary}` },
-    body: payload,
-  });
-
-  const form = await request.formData();
-  const file = form.get("file");
-  expect(file).toBeInstanceOf(File);
-
-  const parsed = new Uint8Array(await (file as File).arrayBuffer());
-  expect(Array.from(parsed)).toEqual([0x00]);
-  expect(parsed.byteLength).toBe(1);
-});
-
-test("multipart formdata preserves 8-byte binary with embedded nulls", async () => {
-  const boundary = "----bun-test-boundary";
-  // Exactly 8 bytes (max inline length of Semver.String) with nulls interspersed
-  const source = Buffer.from([0x01, 0x00, 0x02, 0x00, 0x03, 0x00, 0x04, 0x00]);
-  const payload = Buffer.concat([
-    Buffer.from(
-      `--${boundary}\r\n` +
-        `Content-Disposition: form-data; name="file"; filename="mixed.bin"\r\n` +
-        `Content-Type: application/octet-stream\r\n\r\n`,
-      "utf8",
-    ),
-    source,
-    Buffer.from(`\r\n--${boundary}--\r\n`, "utf8"),
-  ]);
-
-  const request = new Request("http://localhost/", {
-    method: "POST",
-    headers: { "content-type": `multipart/form-data; boundary=${boundary}` },
-    body: payload,
-  });
-
-  const form = await request.formData();
-  const file = form.get("file");
-  expect(file).toBeInstanceOf(File);
-
-  const parsed = new Uint8Array(await (file as File).arrayBuffer());
-  expect(Array.from(parsed)).toEqual([0x01, 0x00, 0x02, 0x00, 0x03, 0x00, 0x04, 0x00]);
-  expect(parsed.byteLength).toBe(8);
-});
--- a/test/regression/issue/27526.test.ts
+++ b/test/regression/issue/27526.test.ts
@@ -1,92 +0,0 @@
-import { expect, test } from "bun:test";
-import { bunEnv, bunExe, tempDir } from "harness";
-
-// When emitDecoratorMetadata is true in tsconfig but experimentalDecorators is
-// absent, Bun should use legacy decorator semantics (not TC39 standard).
-// emitDecoratorMetadata only makes sense with legacy decorators.
-test("legacy decorators work when emitDecoratorMetadata is true without experimentalDecorators", async () => {
-  using dir = tempDir("issue-27526", {
-    "tsconfig.json": JSON.stringify({
-      compilerOptions: {
-        target: "ES2021",
-        module: "commonjs",
-        strict: true,
-        esModuleInterop: true,
-        emitDecoratorMetadata: true,
-      },
-    }),
-    "index.ts": `
-function MyDecorator(target: any, key: string, descriptor: PropertyDescriptor) {
-  const original = descriptor.value;
-  descriptor.value = function(...args: any[]) {
-    return "decorated:" + original.apply(this, args);
-  };
-}
-
-class Foo {
-  @MyDecorator
-  hello() {
-    return "world";
-  }
-}
-
-console.log(new Foo().hello());
-`,
-  });
-
-  await using proc = Bun.spawn({
-    cmd: [bunExe(), "index.ts"],
-    env: bunEnv,
-    cwd: String(dir),
-    stdout: "pipe",
-    stderr: "pipe",
-  });
-
-  const [stdout, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]);
-
-  expect(stdout.trim()).toBe("decorated:world");
-  expect(exitCode).toBe(0);
-});
-
-// When neither emitDecoratorMetadata nor experimentalDecorators is set,
-// TypeScript files should use TC39 standard decorators.
-test("TC39 standard decorators work when neither emitDecoratorMetadata nor experimentalDecorators is set", async () => {
-  using dir = tempDir("issue-27526-standard", {
-    "tsconfig.json": JSON.stringify({
-      compilerOptions: {
-        target: "ES2021",
-        module: "commonjs",
-        strict: true,
-      },
-    }),
-    "index.ts": `
-function MyDecorator(value: Function, context: ClassMethodDecoratorContext) {
-  return function(this: any, ...args: any[]) {
-    return "decorated:" + (value as any).apply(this, args);
-  };
-}
-
-class Foo {
-  @MyDecorator
-  hello() {
-    return "world";
-  }
-}
-
-console.log(new Foo().hello());
-`,
-  });
-
-  await using proc = Bun.spawn({
-    cmd: [bunExe(), "index.ts"],
-    env: bunEnv,
-    cwd: String(dir),
-    stdout: "pipe",
-    stderr: "pipe",
-  });
-
-  const [stdout, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]);
-
-  expect(stdout.trim()).toBe("decorated:world");
-  expect(exitCode).toBe(0);
-});
--- a/test/regression/issue/27556.test.ts
+++ b/test/regression/issue/27556.test.ts
@@ -0,0 +1,77 @@
+import { describe, expect, test } from "bun:test";
+import { bunEnv, bunExe } from "harness";
+
+async function runRepl(input: string | string[]): Promise<{ stdout: string; stderr: string; exitCode: number }> {
+  const inputStr = Array.isArray(input) ? input.join("\n") + "\n" : input;
+
+  await using proc = Bun.spawn({
+    cmd: [bunExe(), "repl"],
+    stdin: Buffer.from(inputStr),
+    stdout: "pipe",
+    stderr: "pipe",
+    env: {
+      ...bunEnv,
+      TERM: "dumb",
+      NO_COLOR: "1",
+    },
+  });
+
+  const exitCode = await proc.exited;
+  const stdout = await new Response(proc.stdout).text();
+  const stderr = await new Response(proc.stderr).text();
+
+  return { stdout, stderr, exitCode };
+}
+
+const stripAnsi = Bun.stripANSI;
+
+describe("REPL Unicode support (#27556)", () => {
+  test("evaluates Chinese characters in strings", async () => {
+    const { stdout, exitCode } = await runRepl(['console.log("你好世界")', ".exit"]);
+    const output = stripAnsi(stdout);
+    expect(output).toContain("你好世界");
+    expect(exitCode).toBe(0);
+  });
+
+  test("evaluates Japanese characters in strings", async () => {
+    const { stdout, exitCode } = await runRepl(['console.log("こんにちは")', ".exit"]);
+    const output = stripAnsi(stdout);
+    expect(output).toContain("こんにちは");
+    expect(exitCode).toBe(0);
+  });
+
+  test("evaluates Korean characters in strings", async () => {
+    const { stdout, exitCode } = await runRepl(['console.log("안녕하세요")', ".exit"]);
+    const output = stripAnsi(stdout);
+    expect(output).toContain("안녕하세요");
+    expect(exitCode).toBe(0);
+  });
+
+  test("evaluates accented Latin characters", async () => {
+    const { stdout, exitCode } = await runRepl(['console.log("café résumé")', ".exit"]);
+    const output = stripAnsi(stdout);
+    expect(output).toContain("café résumé");
+    expect(exitCode).toBe(0);
+  });
+
+  test("evaluates emoji characters", async () => {
+    const { stdout, exitCode } = await runRepl(['console.log("🎉🚀")', ".exit"]);
+    const output = stripAnsi(stdout);
+    expect(output).toContain("🎉🚀");
+    expect(exitCode).toBe(0);
+  });
+
+  test("Unicode string concatenation works", async () => {
+    const { stdout, exitCode } = await runRepl(['"你好" + " " + "世界"', ".exit"]);
+    const output = stripAnsi(stdout);
+    expect(output).toContain("你好 世界");
+    expect(exitCode).toBe(0);
+  });
+
+  test("Unicode string length is correct", async () => {
+    const { stdout, exitCode } = await runRepl(['"__LEN__" + "你好".length', ".exit"]);
+    const output = stripAnsi(stdout);
+    expect(output).toContain("__LEN__2");
+    expect(exitCode).toBe(0);
+  });
+});