Compare commits

..

2 Commits

Author SHA1 Message Date
Claude Bot
918845e4b0 fix(repl): validate UTF-8 sequences and harden against malformed input
- Validate continuation bytes (10xxxxxx) in readKey before accepting
  multi-byte sequences, rejecting malformed UTF-8 at input time
- charLenAt: validate buffer bounds and continuation bytes, returning 1
  for invalid/truncated sequences to ensure forward progress
- charLenBefore: cap backward scan to 4 bytes and validate that the
  start byte's expected length matches, returning 1 on mismatch
- Remove unused Multibyte.fromLen helper
- Make test assertion more specific to avoid matching unrelated digits

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-02-28 18:28:08 +00:00
Claude Bot
2386998dbc fix(repl): support Unicode/CJK character input in REPL
The REPL's key input pipeline only recognized ASCII bytes (32-126),
silently dropping all multi-byte UTF-8 characters (Chinese, Japanese,
Korean, accented Latin, emoji, etc.). This adds proper UTF-8 support:

- Add `multibyte` variant to Key type for 2-4 byte UTF-8 sequences
- Detect UTF-8 lead bytes in readKey and accumulate full sequences
- Make LineEditor cursor movement, backspace, delete, and word
  operations UTF-8-aware (operating on character boundaries)
- Use display width (via visibleCodepointWidth) for cursor positioning
  so CJK double-width characters align correctly

Closes #27556

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-02-28 18:14:16 +00:00
10 changed files with 251 additions and 405 deletions

View File

@@ -4,7 +4,7 @@ register_repository(
REPOSITORY
cloudflare/lol-html
COMMIT
e3aa54798602dd27250fafde1b5a66f080046252
e9e16dca48dd4a8ffbc77642bc4be60407585f11
)
set(LOLHTML_CWD ${VENDOR_PATH}/lolhtml/c-api)

View File

@@ -36,7 +36,7 @@ namespace uWS {
constexpr uint64_t STATE_IS_ERROR = ~0ull;//0xFFFFFFFFFFFFFFFF;
constexpr uint64_t STATE_SIZE_OVERFLOW = 0x0Full << (sizeof(uint64_t) * 8 - 8);//0x0F00000000000000;
inline uint64_t chunkSize(uint64_t state) {
inline unsigned int chunkSize(uint64_t state) {
return state & STATE_SIZE_MASK;
}
@@ -139,7 +139,7 @@ namespace uWS {
// short read
}
inline void decChunkSize(uint64_t &state, uint64_t by) {
inline void decChunkSize(uint64_t &state, unsigned int by) {
//unsigned int bits = state & STATE_IS_CHUNKED;
@@ -208,7 +208,7 @@ namespace uWS {
}
// do we have data to emit all?
uint64_t remaining = chunkSize(state);
unsigned int remaining = chunkSize(state);
if (data.length() >= remaining) {
// emit all but 2 bytes then reset state to 0 and goto beginning
// not fin
@@ -248,7 +248,7 @@ namespace uWS {
} else {
/* We will consume all our input data */
std::string_view emitSoon;
uint64_t size = chunkSize(state);
unsigned int size = chunkSize(state);
size_t len = data.length();
if (size > 2) {
uint64_t maximalAppEmit = size - 2;
@@ -284,7 +284,7 @@ namespace uWS {
return std::nullopt;
}
}
decChunkSize(state, (uint64_t) len);
decChunkSize(state, (unsigned int) len);
state |= STATE_IS_CHUNKED;
data.remove_prefix(len);
if (emitSoon.length()) {

View File

@@ -1219,10 +1219,7 @@ fn runWithSourceCode(
opts.features.minify_keep_names = transpiler.options.keep_names;
opts.features.minify_whitespace = transpiler.options.minify_whitespace;
opts.features.emit_decorator_metadata = task.emit_decorator_metadata;
// emitDecoratorMetadata implies legacy/experimental decorators, as it only
// makes sense with TypeScript's legacy decorator system (reflect-metadata).
// TC39 standard decorators have their own metadata mechanism.
opts.features.standard_decorators = !loader.isTypeScript() or !(task.experimental_decorators or task.emit_decorator_metadata);
opts.features.standard_decorators = !loader.isTypeScript() or !task.experimental_decorators;
opts.features.unwrap_commonjs_packages = transpiler.options.unwrap_commonjs_packages;
opts.features.bundler_feature_flags = transpiler.options.bundler_feature_flags;
opts.features.hot_module_reloading = output_format == .internal_bake_dev and !source.index.isRuntime();

View File

@@ -120,9 +120,17 @@ const Key = union(enum) {
// Regular printable character
char: u8,
// Multi-byte UTF-8 character (2-4 bytes)
multibyte: Multibyte,
// Unknown/unhandled
unknown,
const Multibyte = struct {
bytes: [4]u8,
len: u3,
};
pub fn fromByte(byte: u8) Key {
return switch (byte) {
1 => .ctrl_a,
@@ -319,7 +327,7 @@ const History = struct {
const LineEditor = struct {
buffer: ArrayList(u8),
cursor: usize = 0,
cursor: usize = 0, // byte position in buffer
allocator: Allocator,
pub fn init(allocator: Allocator) LineEditor {
@@ -362,21 +370,66 @@ const LineEditor = struct {
self.cursor += slice.len;
}
/// Returns the byte length of the UTF-8 character at the given byte position.
/// Returns 1 for invalid/truncated sequences to ensure forward progress.
fn charLenAt(self: *const LineEditor, pos: usize) usize {
if (pos >= self.buffer.items.len) return 0;
const seq_len = strings.codepointSize(u8, self.buffer.items[pos]);
if (seq_len < 2) return 1; // ASCII or invalid lead byte
const len: usize = @as(usize, seq_len);
// Validate: enough bytes remain and all continuation bytes are 10xxxxxx
if (pos + len > self.buffer.items.len) return 1;
for (1..len) |i| {
if (self.buffer.items[pos + i] & 0xC0 != 0x80) return 1;
}
return len;
}
/// Returns the byte length of the UTF-8 character ending at or before the given byte position.
/// Returns 1 for invalid sequences to ensure backward progress.
fn charLenBefore(self: *const LineEditor, pos: usize) usize {
if (pos == 0) return 0;
// Walk backward over continuation bytes (10xxxxxx), up to 3 continuation bytes
var i = pos;
const limit = pos -| 4; // don't scan more than 4 bytes back
while (i > limit) {
i -= 1;
if (self.buffer.items[i] & 0xC0 != 0x80) {
// Found a start byte; validate the sequence length matches
const expected_len = strings.codepointSize(u8, self.buffer.items[i]);
if (expected_len >= 2 and @as(usize, expected_len) == pos - i) {
return pos - i;
}
// Mismatch: treat as single byte
return 1;
}
}
return 1; // fallback: step back one byte
}
pub fn deleteChar(self: *LineEditor) void {
if (self.cursor < self.buffer.items.len) {
_ = self.buffer.orderedRemove(self.cursor);
const char_len = self.charLenAt(self.cursor);
var i: usize = 0;
while (i < char_len and self.cursor < self.buffer.items.len) : (i += 1) {
_ = self.buffer.orderedRemove(self.cursor);
}
}
}
pub fn backspace(self: *LineEditor) void {
if (self.cursor > 0) {
self.cursor -= 1;
_ = self.buffer.orderedRemove(self.cursor);
const char_len = self.charLenBefore(self.cursor);
self.cursor -= char_len;
var i: usize = 0;
while (i < char_len and self.cursor < self.buffer.items.len) : (i += 1) {
_ = self.buffer.orderedRemove(self.cursor);
}
}
}
pub fn deleteWord(self: *LineEditor) void {
// Delete word forward
// Delete word forward — skip whitespace, then non-whitespace
while (self.cursor < self.buffer.items.len and
std.ascii.isWhitespace(self.buffer.items[self.cursor]))
{
@@ -385,23 +438,31 @@ const LineEditor = struct {
while (self.cursor < self.buffer.items.len and
!std.ascii.isWhitespace(self.buffer.items[self.cursor]))
{
_ = self.buffer.orderedRemove(self.cursor);
const char_len = self.charLenAt(self.cursor);
var i: usize = 0;
while (i < char_len and self.cursor < self.buffer.items.len) : (i += 1) {
_ = self.buffer.orderedRemove(self.cursor);
}
}
}
pub fn backspaceWord(self: *LineEditor) void {
// Delete word backward
// Delete word backward — skip whitespace, then non-whitespace
while (self.cursor > 0 and
std.ascii.isWhitespace(self.buffer.items[self.cursor - 1]))
{
self.cursor -= 1;
_ = self.buffer.orderedRemove(self.cursor);
}
while (self.cursor > 0 and
!std.ascii.isWhitespace(self.buffer.items[self.cursor - 1]))
{
self.cursor -= 1;
_ = self.buffer.orderedRemove(self.cursor);
while (self.cursor > 0) {
const start = self.prevCharStart();
if (std.ascii.isWhitespace(self.buffer.items[start])) break;
const char_len = self.cursor - start;
self.cursor = start;
var i: usize = 0;
while (i < char_len and self.cursor < self.buffer.items.len) : (i += 1) {
_ = self.buffer.orderedRemove(self.cursor);
}
}
}
@@ -419,34 +480,36 @@ const LineEditor = struct {
pub fn moveLeft(self: *LineEditor) void {
if (self.cursor > 0) {
self.cursor -= 1;
self.cursor -= self.charLenBefore(self.cursor);
}
}
pub fn moveRight(self: *LineEditor) void {
if (self.cursor < self.buffer.items.len) {
self.cursor += 1;
self.cursor += self.charLenAt(self.cursor);
}
}
pub fn moveWordLeft(self: *LineEditor) void {
// Skip whitespace, then skip non-whitespace
while (self.cursor > 0 and
std.ascii.isWhitespace(self.buffer.items[self.cursor - 1]))
{
self.cursor -= 1;
}
while (self.cursor > 0 and
!std.ascii.isWhitespace(self.buffer.items[self.cursor - 1]))
!std.ascii.isWhitespace(self.buffer.items[self.prevCharStart()]))
{
self.cursor -= 1;
self.cursor -= self.charLenBefore(self.cursor);
}
}
pub fn moveWordRight(self: *LineEditor) void {
// Skip non-whitespace, then skip whitespace
while (self.cursor < self.buffer.items.len and
!std.ascii.isWhitespace(self.buffer.items[self.cursor]))
{
self.cursor += 1;
self.cursor += self.charLenAt(self.cursor);
}
while (self.cursor < self.buffer.items.len and
std.ascii.isWhitespace(self.buffer.items[self.cursor]))
@@ -455,6 +518,12 @@ const LineEditor = struct {
}
}
/// Returns the byte offset of the start of the character before cursor.
fn prevCharStart(self: *const LineEditor) usize {
if (self.cursor == 0) return 0;
return self.cursor - self.charLenBefore(self.cursor);
}
pub fn moveToStart(self: *LineEditor) void {
self.cursor = 0;
}
@@ -464,18 +533,66 @@ const LineEditor = struct {
}
pub fn swap(self: *LineEditor) void {
// Swap operates on the two UTF-8 characters around the cursor.
if (self.cursor > 0 and self.cursor < self.buffer.items.len) {
const temp = self.buffer.items[self.cursor - 1];
self.buffer.items[self.cursor - 1] = self.buffer.items[self.cursor];
self.buffer.items[self.cursor] = temp;
self.cursor += 1;
} else if (self.cursor > 1 and self.cursor == self.buffer.items.len) {
const temp = self.buffer.items[self.cursor - 2];
self.buffer.items[self.cursor - 2] = self.buffer.items[self.cursor - 1];
self.buffer.items[self.cursor - 1] = temp;
const left_len = self.charLenBefore(self.cursor);
const right_len = self.charLenAt(self.cursor);
const left_start = self.cursor - left_len;
const right_end = self.cursor + right_len;
if (right_end <= self.buffer.items.len) {
// Copy left char to temp
var tmp: [4]u8 = undefined;
@memcpy(tmp[0..left_len], self.buffer.items[left_start..self.cursor]);
// Shift right char into left position
std.mem.copyForwards(u8, self.buffer.items[left_start..], self.buffer.items[self.cursor..right_end]);
// Copy temp (left char) after right char
@memcpy(self.buffer.items[left_start + right_len ..][0..left_len], tmp[0..left_len]);
self.cursor = right_end;
}
} else if (self.cursor > 0 and self.cursor == self.buffer.items.len) {
// At end of line: swap the two characters before cursor
const right_len = self.charLenBefore(self.cursor);
const right_start = self.cursor - right_len;
if (right_start > 0) {
const left_len = self.charLenBefore(right_start);
const left_start = right_start - left_len;
var tmp: [4]u8 = undefined;
@memcpy(tmp[0..left_len], self.buffer.items[left_start..right_start]);
std.mem.copyForwards(u8, self.buffer.items[left_start..], self.buffer.items[right_start..self.cursor]);
@memcpy(self.buffer.items[left_start + right_len ..][0..left_len], tmp[0..left_len]);
}
}
}
/// Calculate display width of buffer content up to the given byte position.
pub fn displayWidth(self: *const LineEditor, end_pos: usize) usize {
var width: usize = 0;
var pos: usize = 0;
const buf = self.buffer.items;
const limit = @min(end_pos, buf.len);
while (pos < limit) {
const byte_len = strings.codepointSize(u8, buf[pos]);
if (byte_len == 0 or pos + byte_len > buf.len) {
// Invalid UTF-8 or truncated: treat as 1-wide
width += 1;
pos += 1;
} else if (byte_len == 1) {
width += 1;
pos += 1;
} else {
// Pad to 4 bytes for decodeWTF8RuneT
var tmp: [4]u8 = .{ 0, 0, 0, 0 };
for (0..@as(usize, byte_len)) |i| {
tmp[i] = buf[pos + i];
}
const cp = strings.decodeWTF8RuneT(&tmp, byte_len, u32, 0xFFFD);
width += @as(usize, strings.visibleCodepointWidth(cp, false));
pos += @as(usize, byte_len);
}
}
return width;
}
pub fn getLine(self: *const LineEditor) []const u8 {
return self.buffer.items;
}
@@ -922,6 +1039,20 @@ fn readKey(self: *Repl) ?Key {
return .escape;
}
// Handle UTF-8 multi-byte sequences
const seq_len = strings.codepointSize(u8, byte);
if (seq_len >= 2 and seq_len <= 4) {
const len: u3 = @intCast(seq_len);
var mb = Key.Multibyte{ .bytes = .{ byte, 0, 0, 0 }, .len = len };
for (1..seq_len) |i| {
const cont = self.readByte() orelse return .unknown;
// Validate continuation byte (must be 10xxxxxx)
if (cont & 0xC0 != 0x80) return .unknown;
mb.bytes[i] = cont;
}
return .{ .multibyte = mb };
}
return Key.fromByte(byte);
}
@@ -974,8 +1105,9 @@ fn refreshLine(self: *Repl) void {
self.write(line);
}
// Position cursor
const cursor_pos = prompt_len + self.line_editor.cursor;
// Position cursor using display width (not byte count)
const cursor_display_width = self.line_editor.displayWidth(self.line_editor.cursor);
const cursor_pos = prompt_len + cursor_display_width;
if (cursor_pos < self.terminal_width) {
self.write("\r");
if (cursor_pos > 0) {
@@ -1781,6 +1913,10 @@ pub fn runWithVM(self: *Repl, vm: ?*jsc.VirtualMachine) !void {
self.line_editor.insert(c) catch {};
self.refreshLine();
},
.multibyte => |mb| {
self.line_editor.insertSlice(mb.bytes[0..mb.len]) catch {};
self.refreshLine();
},
else => {},
}
}

View File

@@ -1103,10 +1103,7 @@ pub const Transpiler = struct {
var opts = js_parser.Parser.Options.init(jsx, loader);
opts.features.emit_decorator_metadata = this_parse.emit_decorator_metadata;
// emitDecoratorMetadata implies legacy/experimental decorators, as it only
// makes sense with TypeScript's legacy decorator system (reflect-metadata).
// TC39 standard decorators have their own metadata mechanism.
opts.features.standard_decorators = !loader.isTypeScript() or !(this_parse.experimental_decorators or this_parse.emit_decorator_metadata);
opts.features.standard_decorators = !loader.isTypeScript() or !this_parse.experimental_decorators;
opts.features.allow_runtime = transpiler.options.allow_runtime;
opts.features.set_breakpoint_on_first_line = this_parse.set_breakpoint_on_first_line;
opts.features.trim_unused_imports = transpiler.options.trim_unused_imports orelse loader.isTypeScript();

View File

@@ -976,10 +976,7 @@ pub const FormData = struct {
}
pub const Field = struct {
/// Raw slice into the input buffer. Not using `bun.Semver.String` because
/// file bodies are binary data that can contain null bytes, which
/// Semver.String's inline storage treats as terminators.
value: []const u8 = "",
value: bun.Semver.String = .{},
filename: bun.Semver.String = .{},
content_type: bun.Semver.String = .{},
is_file: bool = false,
@@ -1091,7 +1088,7 @@ pub const FormData = struct {
form: *jsc.DOMFormData,
pub fn onEntry(wrap: *@This(), name: bun.Semver.String, field: Field, buf: []const u8) void {
const value_str = field.value;
const value_str = field.value.slice(buf);
var key = jsc.ZigString.initUTF8(name.slice(buf));
if (field.is_file) {
@@ -1281,7 +1278,7 @@ pub const FormData = struct {
if (strings.endsWithComptime(body, "\r\n")) {
body = body[0 .. body.len - 2];
}
field.value = body;
field.value = subslicer.sub(body).value();
field.filename = filename orelse .{};
field.is_file = is_file;

View File

@@ -561,152 +561,6 @@ describe("SPILL.TERM - invalid chunk terminators", () => {
});
});
describe("chunked encoding size hardening", () => {
test("rejects extremely large chunk size hex values", async () => {
// Chunk sizes with many hex digits should be rejected by the overflow check.
// 'FFFFFFFFFFFFFFFF' sets bits in the overflow-detection region (bits 56-59),
// so the parser must return an error.
let bodyReadSucceeded = false;
await using server = Bun.serve({
port: 0,
async fetch(req) {
try {
await req.text();
bodyReadSucceeded = true;
} catch {
// Expected to fail
}
return new Response("OK");
},
});
const client = net.connect(server.port, "127.0.0.1");
// 16 hex digits all 'F' — sets overflow bits and must be rejected
const maliciousRequest =
"POST / HTTP/1.1\r\n" +
"Host: localhost\r\n" +
"Transfer-Encoding: chunked\r\n" +
"\r\n" +
"FFFFFFFFFFFFFFFF\r\n" +
"data\r\n" +
"0\r\n" +
"\r\n";
await new Promise<void>(resolve => {
let responseData = "";
client.on("error", () => resolve());
client.on("data", data => {
responseData += data.toString();
});
client.on("close", () => {
expect(responseData).toContain("HTTP/1.1 400");
expect(bodyReadSucceeded).toBe(false);
resolve();
});
client.write(maliciousRequest);
});
});
test("large chunk size exceeding 32 bits does not produce empty body", async () => {
// '100000000' hex = 2^32 (4294967296). If the chunk size were truncated
// to 32 bits, this would become 0, and the +2 for CRLF would make it
// look like the end-of-chunks marker (size=2), producing an empty body.
// With correct 64-bit handling, the parser treats this as a large
// pending chunk — the body read should fail when we close the connection,
// because the server is still expecting ~4GB of data.
let receivedBody: string | null = null;
let bodyError = false;
const { promise: headersReceived, resolve: onHeadersReceived } = Promise.withResolvers<void>();
const { promise: bodyHandled, resolve: bodyDone } = Promise.withResolvers<void>();
await using server = Bun.serve({
port: 0,
async fetch(req) {
// Signal that headers have been parsed and the fetch handler entered
onHeadersReceived();
try {
receivedBody = await req.text();
} catch {
bodyError = true;
}
bodyDone();
return new Response("OK");
},
});
const client = net.connect(server.port, "127.0.0.1");
// Send the chunk header claiming 4GB of data, followed by a few bytes,
// then close the connection.
const maliciousRequest =
"POST / HTTP/1.1\r\n" +
"Host: localhost\r\n" +
"Transfer-Encoding: chunked\r\n" +
"\r\n" +
"100000000\r\n" +
"AAAA\r\n";
client.write(maliciousRequest);
// Wait until the server has parsed headers and entered the fetch handler,
// then close the connection to trigger the body error (since we won't send 4GB).
await headersReceived;
client.end();
await bodyHandled;
// With correct 64-bit handling, the body read must fail because we
// disconnected before sending 4GB of chunk data.
// With truncation to 32-bit zero, the body would be "" with no error.
expect(bodyError).toBe(true);
expect(receivedBody).toBeNull();
});
test("accepts valid chunk sizes within normal range", async () => {
// Normal-sized chunks should still work fine
let receivedBody = "";
await using server = Bun.serve({
port: 0,
async fetch(req) {
receivedBody = await req.text();
return new Response("Success");
},
});
const client = net.connect(server.port, "127.0.0.1");
// Use hex chunk sizes that are perfectly valid
const validRequest =
"POST / HTTP/1.1\r\n" +
"Host: localhost\r\n" +
"Transfer-Encoding: chunked\r\n" +
"\r\n" +
"a\r\n" + // 10 bytes
"0123456789\r\n" +
"FF\r\n" + // 255 bytes
Buffer.alloc(255, "A").toString() +
"\r\n" +
"0\r\n" +
"\r\n";
await new Promise<void>((resolve, reject) => {
client.on("error", reject);
client.on("data", data => {
const response = data.toString();
expect(response).toContain("HTTP/1.1 200");
expect(receivedBody).toBe("0123456789" + Buffer.alloc(255, "A").toString());
client.end();
resolve();
});
client.write(validRequest);
});
});
});
// Tests for strict RFC 7230 HEXDIG validation in chunk size parsing.
// Chunk sizes must only contain characters from the set [0-9a-fA-F].
// Non-HEXDIG characters must be rejected to ensure consistent parsing

View File

@@ -1,120 +0,0 @@
import { expect, test } from "bun:test";
// https://github.com/oven-sh/bun/issues/27478
// Request.formData() truncates small binary files at first null byte
test("multipart formdata preserves null bytes in small binary files", async () => {
const boundary = "----bun-null-byte-boundary";
const source = Buffer.from([0x1f, 0x8b, 0x08, 0x00]);
const payload = Buffer.concat([
Buffer.from(
`--${boundary}\r\n` +
`Content-Disposition: form-data; name="file"; filename="test.bin"\r\n` +
`Content-Type: application/octet-stream\r\n\r\n`,
"utf8",
),
source,
Buffer.from(`\r\n--${boundary}--\r\n`, "utf8"),
]);
const request = new Request("http://localhost/", {
method: "POST",
headers: { "content-type": `multipart/form-data; boundary=${boundary}` },
body: payload,
});
const form = await request.formData();
const file = form.get("file");
expect(file).toBeInstanceOf(File);
const parsed = new Uint8Array(await (file as File).arrayBuffer());
expect(Array.from(parsed)).toEqual(Array.from(source));
expect(parsed.byteLength).toBe(source.byteLength);
});
test("multipart formdata preserves files that are all null bytes", async () => {
const boundary = "----bun-test-boundary";
const source = Buffer.from([0x00, 0x00, 0x00, 0x00]);
const payload = Buffer.concat([
Buffer.from(
`--${boundary}\r\n` +
`Content-Disposition: form-data; name="file"; filename="zeros.bin"\r\n` +
`Content-Type: application/octet-stream\r\n\r\n`,
"utf8",
),
source,
Buffer.from(`\r\n--${boundary}--\r\n`, "utf8"),
]);
const request = new Request("http://localhost/", {
method: "POST",
headers: { "content-type": `multipart/form-data; boundary=${boundary}` },
body: payload,
});
const form = await request.formData();
const file = form.get("file");
expect(file).toBeInstanceOf(File);
const parsed = new Uint8Array(await (file as File).arrayBuffer());
expect(Array.from(parsed)).toEqual([0x00, 0x00, 0x00, 0x00]);
expect(parsed.byteLength).toBe(4);
});
test("multipart formdata preserves single null byte file", async () => {
const boundary = "----bun-test-boundary";
const source = Buffer.from([0x00]);
const payload = Buffer.concat([
Buffer.from(
`--${boundary}\r\n` +
`Content-Disposition: form-data; name="file"; filename="null.bin"\r\n` +
`Content-Type: application/octet-stream\r\n\r\n`,
"utf8",
),
source,
Buffer.from(`\r\n--${boundary}--\r\n`, "utf8"),
]);
const request = new Request("http://localhost/", {
method: "POST",
headers: { "content-type": `multipart/form-data; boundary=${boundary}` },
body: payload,
});
const form = await request.formData();
const file = form.get("file");
expect(file).toBeInstanceOf(File);
const parsed = new Uint8Array(await (file as File).arrayBuffer());
expect(Array.from(parsed)).toEqual([0x00]);
expect(parsed.byteLength).toBe(1);
});
test("multipart formdata preserves 8-byte binary with embedded nulls", async () => {
const boundary = "----bun-test-boundary";
// Exactly 8 bytes (max inline length of Semver.String) with nulls interspersed
const source = Buffer.from([0x01, 0x00, 0x02, 0x00, 0x03, 0x00, 0x04, 0x00]);
const payload = Buffer.concat([
Buffer.from(
`--${boundary}\r\n` +
`Content-Disposition: form-data; name="file"; filename="mixed.bin"\r\n` +
`Content-Type: application/octet-stream\r\n\r\n`,
"utf8",
),
source,
Buffer.from(`\r\n--${boundary}--\r\n`, "utf8"),
]);
const request = new Request("http://localhost/", {
method: "POST",
headers: { "content-type": `multipart/form-data; boundary=${boundary}` },
body: payload,
});
const form = await request.formData();
const file = form.get("file");
expect(file).toBeInstanceOf(File);
const parsed = new Uint8Array(await (file as File).arrayBuffer());
expect(Array.from(parsed)).toEqual([0x01, 0x00, 0x02, 0x00, 0x03, 0x00, 0x04, 0x00]);
expect(parsed.byteLength).toBe(8);
});

View File

@@ -1,92 +0,0 @@
import { expect, test } from "bun:test";
import { bunEnv, bunExe, tempDir } from "harness";
// When emitDecoratorMetadata is true in tsconfig but experimentalDecorators is
// absent, Bun should use legacy decorator semantics (not TC39 standard).
// emitDecoratorMetadata only makes sense with legacy decorators.
test("legacy decorators work when emitDecoratorMetadata is true without experimentalDecorators", async () => {
using dir = tempDir("issue-27526", {
"tsconfig.json": JSON.stringify({
compilerOptions: {
target: "ES2021",
module: "commonjs",
strict: true,
esModuleInterop: true,
emitDecoratorMetadata: true,
},
}),
"index.ts": `
function MyDecorator(target: any, key: string, descriptor: PropertyDescriptor) {
const original = descriptor.value;
descriptor.value = function(...args: any[]) {
return "decorated:" + original.apply(this, args);
};
}
class Foo {
@MyDecorator
hello() {
return "world";
}
}
console.log(new Foo().hello());
`,
});
await using proc = Bun.spawn({
cmd: [bunExe(), "index.ts"],
env: bunEnv,
cwd: String(dir),
stdout: "pipe",
stderr: "pipe",
});
const [stdout, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]);
expect(stdout.trim()).toBe("decorated:world");
expect(exitCode).toBe(0);
});
// When neither emitDecoratorMetadata nor experimentalDecorators is set,
// TypeScript files should use TC39 standard decorators.
test("TC39 standard decorators work when neither emitDecoratorMetadata nor experimentalDecorators is set", async () => {
using dir = tempDir("issue-27526-standard", {
"tsconfig.json": JSON.stringify({
compilerOptions: {
target: "ES2021",
module: "commonjs",
strict: true,
},
}),
"index.ts": `
function MyDecorator(value: Function, context: ClassMethodDecoratorContext) {
return function(this: any, ...args: any[]) {
return "decorated:" + (value as any).apply(this, args);
};
}
class Foo {
@MyDecorator
hello() {
return "world";
}
}
console.log(new Foo().hello());
`,
});
await using proc = Bun.spawn({
cmd: [bunExe(), "index.ts"],
env: bunEnv,
cwd: String(dir),
stdout: "pipe",
stderr: "pipe",
});
const [stdout, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]);
expect(stdout.trim()).toBe("decorated:world");
expect(exitCode).toBe(0);
});

View File

@@ -0,0 +1,77 @@
import { describe, expect, test } from "bun:test";
import { bunEnv, bunExe } from "harness";
async function runRepl(input: string | string[]): Promise<{ stdout: string; stderr: string; exitCode: number }> {
const inputStr = Array.isArray(input) ? input.join("\n") + "\n" : input;
await using proc = Bun.spawn({
cmd: [bunExe(), "repl"],
stdin: Buffer.from(inputStr),
stdout: "pipe",
stderr: "pipe",
env: {
...bunEnv,
TERM: "dumb",
NO_COLOR: "1",
},
});
const exitCode = await proc.exited;
const stdout = await new Response(proc.stdout).text();
const stderr = await new Response(proc.stderr).text();
return { stdout, stderr, exitCode };
}
const stripAnsi = Bun.stripANSI;
describe("REPL Unicode support (#27556)", () => {
test("evaluates Chinese characters in strings", async () => {
const { stdout, exitCode } = await runRepl(['console.log("你好世界")', ".exit"]);
const output = stripAnsi(stdout);
expect(output).toContain("你好世界");
expect(exitCode).toBe(0);
});
test("evaluates Japanese characters in strings", async () => {
const { stdout, exitCode } = await runRepl(['console.log("こんにちは")', ".exit"]);
const output = stripAnsi(stdout);
expect(output).toContain("こんにちは");
expect(exitCode).toBe(0);
});
test("evaluates Korean characters in strings", async () => {
const { stdout, exitCode } = await runRepl(['console.log("안녕하세요")', ".exit"]);
const output = stripAnsi(stdout);
expect(output).toContain("안녕하세요");
expect(exitCode).toBe(0);
});
test("evaluates accented Latin characters", async () => {
const { stdout, exitCode } = await runRepl(['console.log("café résumé")', ".exit"]);
const output = stripAnsi(stdout);
expect(output).toContain("café résumé");
expect(exitCode).toBe(0);
});
test("evaluates emoji characters", async () => {
const { stdout, exitCode } = await runRepl(['console.log("🎉🚀")', ".exit"]);
const output = stripAnsi(stdout);
expect(output).toContain("🎉🚀");
expect(exitCode).toBe(0);
});
test("Unicode string concatenation works", async () => {
const { stdout, exitCode } = await runRepl(['"你好" + " " + "世界"', ".exit"]);
const output = stripAnsi(stdout);
expect(output).toContain("你好 世界");
expect(exitCode).toBe(0);
});
test("Unicode string length is correct", async () => {
const { stdout, exitCode } = await runRepl(['"__LEN__" + "你好".length', ".exit"]);
const output = stripAnsi(stdout);
expect(output).toContain("__LEN__2");
expect(exitCode).toBe(0);
});
});