Compare commits

..

1 Commits

Author SHA1 Message Date
Claude Bot
21cf8ca5de harden multipart/form-data encoding per WHATWG spec
Percent-encode double quotes ("), CR (0x0D), and LF (0x0A) in name and
filename values within Content-Disposition headers during multipart
form-data serialization, as required by the WHATWG multipart/form-data
encoding algorithm.

Also sanitize blob content_type by stripping CR/LF characters before
emitting Content-Type headers in multipart bodies.

Co-Authored-By: Claude <noreply@anthropic.com>
2026-02-27 05:58:47 +00:00
7 changed files with 360 additions and 208 deletions

View File

@@ -228,16 +228,16 @@ To build for macOS x64:
The order of the `--target` flag does not matter, as long as they're delimited by a `-`.
| --target | Operating System | Architecture | Modern | Baseline | Libc |
| -------------------- | ---------------- | ------------ | ------ | -------- | ----- |
| bun-linux-x64 | Linux | x64 | ✅ | ✅ | glibc |
| bun-linux-arm64 | Linux | arm64 | ✅ | N/A | glibc |
| bun-windows-x64 | Windows | x64 | ✅ | ✅ | - |
| bun-windows-arm64 | Windows | arm64 | ✅ | N/A | - |
| bun-darwin-x64 | macOS | x64 | ✅ | ✅ | - |
| bun-darwin-arm64 | macOS | arm64 | ✅ | N/A | - |
| bun-linux-x64-musl | Linux | x64 | ✅ | ✅ | musl |
| bun-linux-arm64-musl | Linux | arm64 | ✅ | N/A | musl |
| --target | Operating System | Architecture | Modern | Baseline | Libc |
| --------------------- | ---------------- | ------------ | ------ | -------- | ----- |
| bun-linux-x64 | Linux | x64 | ✅ | ✅ | glibc |
| bun-linux-arm64 | Linux | arm64 | ✅ | N/A | glibc |
| bun-windows-x64 | Windows | x64 | ✅ | ✅ | - |
| bun-windows-arm64 | Windows | arm64 | ✅ | N/A | - |
| bun-darwin-x64 | macOS | x64 | ✅ | ✅ | - |
| bun-darwin-arm64 | macOS | arm64 | ✅ | N/A | - |
| bun-linux-x64-musl | Linux | x64 | ✅ | ✅ | musl |
| bun-linux-arm64-musl | Linux | arm64 | ✅ | N/A | musl |
<Warning>
On x64 platforms, Bun uses SIMD optimizations which require a modern CPU supporting AVX2 instructions. The `-baseline`

View File

@@ -74,61 +74,30 @@ pub fn ReplTransforms(comptime P: type) type {
for (all_stmts) |stmt| {
switch (stmt.data) {
.s_local => |local| {
const is_const = local.kind == .k_const or local.kind == .k_using or local.kind == .k_await_using;
// Hoist all declarations as var so they become context properties
// In sloppy mode, var at top level becomes a property of the global/context object
// This is essential for REPL variable persistence across vm.runInContext calls
const kind: S.Local.Kind = .k_var;
if (is_const) {
// For const/using declarations, preserve the original declaration inside the IIFE
// to maintain immutability semantics. After the declaration, use
// Object.defineProperty(globalThis, name, { value, writable: false, configurable: true, enumerable: true })
// to persist each binding on globalThis as a non-writable property.
// configurable: true allows re-declaration in subsequent REPL lines.
try inner_stmts.append(stmt);
// Extract individual identifiers from binding patterns for hoisting
var hoisted_decl_list = ListManaged(G.Decl).init(allocator);
for (local.decls.slice()) |decl| {
try extractIdentifiersFromBinding(p, decl.binding, &hoisted_decl_list);
}
// Add Object.defineProperty calls for each identifier in the binding
for (local.decls.slice()) |decl| {
try emitDefinePropertyCalls(p, decl.binding, &inner_stmts, allocator, stmt.loc);
}
if (hoisted_decl_list.items.len > 0) {
try hoisted_stmts.append(p.s(S.Local{
.kind = kind,
.decls = Decl.List.fromOwnedSlice(hoisted_decl_list.items),
}, stmt.loc));
}
// Add the last declarator's first identifier as a result
// expression so wrapLastExpressionWithReturn can capture it
// for display. We emit the identifier (not the initializer)
// to avoid re-evaluating side-effectful expressions.
const decls = local.decls.slice();
if (decls.len > 0) {
const last_decl = decls[decls.len - 1];
if (last_decl.value != null) {
if (getFirstIdentifierRef(last_decl.binding)) |ref| {
try inner_stmts.append(p.s(S.SExpr{
.value = p.newExpr(E.Identifier{ .ref = ref }, stmt.loc),
}, stmt.loc));
}
}
}
} else {
// For var/let declarations, hoist as var so they become global properties
// In sloppy mode, var at top level becomes a property of the global/context object
// This is essential for REPL variable persistence across vm.runInContext calls
// Extract individual identifiers from binding patterns for hoisting
var hoisted_decl_list = ListManaged(G.Decl).init(allocator);
for (local.decls.slice()) |decl| {
try extractIdentifiersFromBinding(p, decl.binding, &hoisted_decl_list);
}
if (hoisted_decl_list.items.len > 0) {
try hoisted_stmts.append(p.s(S.Local{
.kind = .k_var,
.decls = Decl.List.fromOwnedSlice(hoisted_decl_list.items),
}, stmt.loc));
}
// Create assignment expressions for the inner statements
for (local.decls.slice()) |decl| {
if (decl.value) |value| {
// Create assignment expression: binding = value
const assign_expr = createBindingAssignment(p, decl.binding, value, allocator);
try inner_stmts.append(p.s(S.SExpr{ .value = assign_expr }, stmt.loc));
}
// Create assignment expressions for the inner statements
for (local.decls.slice()) |decl| {
if (decl.value) |value| {
// Create assignment expression: binding = value
const assign_expr = createBindingAssignment(p, decl.binding, value, allocator);
try inner_stmts.append(p.s(S.SExpr{ .value = assign_expr }, stmt.loc));
}
}
},
@@ -415,63 +384,6 @@ pub fn ReplTransforms(comptime P: type) type {
}
}
/// Get the first identifier ref from a binding pattern
fn getFirstIdentifierRef(binding: Binding) ?Ref {
switch (binding.data) {
.b_identifier => |ident| return ident.ref,
.b_array => |arr| {
for (arr.items) |item| {
if (getFirstIdentifierRef(item.binding)) |ref| return ref;
}
return null;
},
.b_object => |obj| {
for (obj.properties) |prop| {
if (getFirstIdentifierRef(prop.value)) |ref| return ref;
}
return null;
},
.b_missing => return null,
}
}
/// Emit __repl_defineConst("name", name) for each identifier in a binding pattern.
/// This persists const bindings on globalThis using getter/setter so that
/// subsequent REPL evaluations cannot reassign them.
/// The __repl_defineConst helper is initialized during REPL startup (see repl.zig).
fn emitDefinePropertyCalls(p: *P, binding: Binding, inner_stmts: *ListManaged(Stmt), allocator: Allocator, loc: logger.Loc) !void {
switch (binding.data) {
.b_identifier => |ident| {
const name = p.symbols.items[ident.ref.innerIndex()].original_name;
// __repl_defineConst("name", name)
const helper_ref = try p.newSymbol(.unbound, "__repl_defineConst");
const helper = p.newExpr(E.Identifier{ .ref = helper_ref }, loc);
var args = bun.handleOom(allocator.alloc(Expr, 2));
args[0] = p.newExpr(E.String{ .data = name }, loc);
args[1] = p.newExpr(E.Identifier{ .ref = ident.ref }, loc);
const call = p.newExpr(E.Call{
.target = helper,
.args = ExprNodeList.fromOwnedSlice(args),
}, loc);
try inner_stmts.append(p.s(S.SExpr{ .value = call }, loc));
},
.b_array => |arr| {
for (arr.items) |item| {
try emitDefinePropertyCalls(p, item.binding, inner_stmts, allocator, loc);
}
},
.b_object => |obj| {
for (obj.properties) |prop| {
try emitDefinePropertyCalls(p, prop.value, inner_stmts, allocator, loc);
}
},
.b_missing => {},
}
}
/// Create { __proto__: null, value: expr } wrapper object
/// Uses null prototype to create a clean data object
fn wrapExprInValueObject(p: *P, expr: Expr, allocator: Allocator) Expr {
@@ -591,7 +503,6 @@ const Binding = js_ast.Binding;
const E = js_ast.E;
const Expr = js_ast.Expr;
const ExprNodeList = js_ast.ExprNodeList;
const Ref = js_ast.Ref;
const S = js_ast.S;
const Stmt = js_ast.Stmt;

View File

@@ -201,6 +201,81 @@ const FormDataContext = struct {
failed: bool = false,
globalThis: *jsc.JSGlobalObject,
/// Per the WHATWG multipart/form-data spec, name and filename values in
/// Content-Disposition headers must have 0x0A (LF), 0x0D (CR), and 0x22 (")
/// percent-encoded to prevent malformed multipart bodies.
fn escapeFormDataNameOrFilename(input: []const u8, allocator: std.mem.Allocator) ?[]const u8 {
// Fast path: check if any escaping is needed.
const needs_escape = brk: {
for (input) |c| {
if (c == '"' or c == '\r' or c == '\n') break :brk true;
}
break :brk false;
};
if (!needs_escape) return null;
// Count output size: each special char expands from 1 byte to 3 bytes (%XX).
var extra: usize = 0;
for (input) |c| {
if (c == '"' or c == '\r' or c == '\n') extra += 2;
}
const buf = allocator.alloc(u8, input.len + extra) catch |err| bun.handleOom(err);
var i: usize = 0;
for (input) |c| {
switch (c) {
'"' => {
buf[i] = '%';
buf[i + 1] = '2';
buf[i + 2] = '2';
i += 3;
},
'\r' => {
buf[i] = '%';
buf[i + 1] = '0';
buf[i + 2] = 'D';
i += 3;
},
'\n' => {
buf[i] = '%';
buf[i + 1] = '0';
buf[i + 2] = 'A';
i += 3;
},
else => {
buf[i] = c;
i += 1;
},
}
}
return buf[0..i];
}
/// Sanitize content_type for use in multipart headers by stripping
/// CR and LF characters that could break the header structure.
fn sanitizeContentType(content_type: []const u8, allocator: std.mem.Allocator) ?[]const u8 {
const needs_sanitize = brk: {
for (content_type) |c| {
if (c == '\r' or c == '\n') break :brk true;
}
break :brk false;
};
if (!needs_sanitize) return null;
var count: usize = 0;
for (content_type) |c| {
if (c != '\r' and c != '\n') count += 1;
}
const buf = allocator.alloc(u8, count) catch |err| bun.handleOom(err);
var i: usize = 0;
for (content_type) |c| {
if (c != '\r' and c != '\n') {
buf[i] = c;
i += 1;
}
}
return buf[0..i];
}
pub fn onEntry(this: *FormDataContext, name: ZigString, entry: jsc.DOMFormData.FormDataEntry) void {
if (this.failed) return;
var globalThis = this.globalThis;
@@ -215,7 +290,11 @@ const FormDataContext = struct {
joiner.pushStatic("Content-Disposition: form-data; name=\"");
const name_slice = name.toSlice(allocator);
joiner.push(name_slice.slice(), name_slice.allocator.get());
if (escapeFormDataNameOrFilename(name_slice.slice(), allocator)) |escaped| {
joiner.push(escaped, allocator);
} else {
joiner.push(name_slice.slice(), name_slice.allocator.get());
}
switch (entry) {
.string => |value| {
@@ -226,13 +305,22 @@ const FormDataContext = struct {
.file => |value| {
joiner.pushStatic("\"; filename=\"");
const filename_slice = value.filename.toSlice(allocator);
joiner.push(filename_slice.slice(), filename_slice.allocator.get());
if (escapeFormDataNameOrFilename(filename_slice.slice(), allocator)) |escaped| {
joiner.push(escaped, allocator);
} else {
joiner.push(filename_slice.slice(), filename_slice.allocator.get());
}
joiner.pushStatic("\"\r\n");
const blob = value.blob;
const content_type = if (blob.content_type.len > 0) blob.content_type else "application/octet-stream";
const raw_content_type = if (blob.content_type.len > 0) blob.content_type else "application/octet-stream";
const content_type = sanitizeContentType(raw_content_type, allocator) orelse raw_content_type;
joiner.pushStatic("Content-Type: ");
joiner.pushStatic(content_type);
if (content_type.ptr != raw_content_type.ptr) {
joiner.push(content_type, allocator);
} else {
joiner.pushStatic(content_type);
}
joiner.pushStatic("\r\n\r\n");
if (blob.store) |store| {

View File

@@ -176,9 +176,6 @@ const ReplRunner = struct {
}
vm.transpiler.env.loadTracy();
// Set up the const variable helper for REPL persistence
this.repl.initConstHelper();
}
};

View File

@@ -708,38 +708,6 @@ pub fn init(allocator: Allocator) Repl {
};
}
/// Initialize the REPL helper for const variable protection.
/// This defines a helper function on globalThis that uses getter/setter
/// to make const variables non-reassignable across REPL evaluations.
/// Must be called after the VM and global object are fully initialized.
pub fn initConstHelper(self: *Repl) void {
const global = self.global orelse return;
const helper_code =
\\Object.defineProperty(globalThis, "__repl_defineConst", {
\\ value: function(name, value) {
\\ Object.defineProperty(globalThis, name, {
\\ get: function() { return value; },
\\ set: function() { throw new TypeError("Assignment to constant variable."); },
\\ configurable: true,
\\ enumerable: true
\\ });
\\ },
\\ writable: false,
\\ configurable: false,
\\ enumerable: false
\\});
;
var exception: jsc.JSValue = .js_undefined;
_ = Bun__REPL__evaluate(
global,
helper_code.ptr,
helper_code.len,
"[repl-init]".ptr,
"[repl-init]".len,
&exception,
);
}
pub fn deinit(self: *Repl) void {
self.restoreTerminal();
self.history.save();

View File

@@ -619,55 +619,6 @@ describe.concurrent("Bun REPL", () => {
expect(exitCode).toBe(0);
});
test("const cannot be reassigned across lines (#27485)", async () => {
const { stdout, stderr, exitCode } = await runRepl(["const a = 1", "a = 2", ".exit"]);
const output = stripAnsi(stdout + stderr);
expect(output).toMatch(/TypeError.*Assignment to constant variable/i);
expect(exitCode).toBe(0);
});
test("const destructured variables cannot be reassigned (#27485)", async () => {
const { stdout, stderr, exitCode } = await runRepl(["const [x, y] = [10, 20]", "x = 99", ".exit"]);
const output = stripAnsi(stdout + stderr);
expect(output).toMatch(/TypeError.*Assignment to constant variable/i);
expect(exitCode).toBe(0);
});
test("const object destructured variables cannot be reassigned (#27485)", async () => {
const { stdout, stderr, exitCode } = await runRepl(["const { a, b } = { a: 1, b: 2 }", "a = 99", ".exit"]);
const output = stripAnsi(stdout + stderr);
expect(output).toMatch(/TypeError.*Assignment to constant variable/i);
expect(exitCode).toBe(0);
});
test("const value is preserved after failed reassignment (#27485)", async () => {
const { stdout, stderr, exitCode } = await runRepl(["const a = 42", "try { a = 0 } catch(e) {}", "a", ".exit"]);
const output = stripAnsi(stdout + stderr);
// The last "a" evaluation should still return 42
// Split output lines and check the last result line before .exit
const lines = output
.split("\n")
.map((l: string) => l.trim())
.filter((l: string) => l === "42");
// Should find 42 at least twice: once from declaration, once from final read
expect(lines.length).toBeGreaterThanOrEqual(2);
expect(exitCode).toBe(0);
});
test("let can still be reassigned across lines", async () => {
const { stdout, exitCode } = await runRepl(["let v = 1", "v = 2", "v", ".exit"]);
const output = stripAnsi(stdout);
expect(output).toContain("2");
expect(exitCode).toBe(0);
});
test("var can still be reassigned across lines", async () => {
const { stdout, exitCode } = await runRepl(["var v = 1", "v = 2", "v", ".exit"]);
const output = stripAnsi(stdout);
expect(output).toContain("2");
expect(exitCode).toBe(0);
});
test("array destructuring persists", async () => {
const { stdout, exitCode } = await runRepl(["const [a, b, c] = [10, 20, 30]", "a + b + c", ".exit"]);
expect(stripAnsi(stdout)).toContain("60");

View File

@@ -0,0 +1,237 @@
import { describe, expect, it } from "bun:test";
describe("FormData multipart encoding hardening", () => {
describe("name and filename percent-encoding per WHATWG spec", () => {
it("should percent-encode double quotes in filename", async () => {
const fd = new FormData();
fd.append("file", new Blob(["hello"]), 'my"file.txt');
const response = new Response(fd);
const body = await response.text();
// The double quote must be percent-encoded as %22
expect(body).toContain('filename="my%22file.txt"');
// Must NOT contain an unescaped quote that breaks out of the filename field
expect(body).not.toContain('filename="my"file.txt"');
});
it("should percent-encode CR and LF in filename", async () => {
const fd = new FormData();
fd.append("file", new Blob(["hello"]), "file\r\nname.txt");
const response = new Response(fd);
const body = await response.text();
// CR and LF must be percent-encoded
expect(body).toContain('filename="file%0D%0Aname.txt"');
});
it("should percent-encode LF alone in filename", async () => {
const fd = new FormData();
fd.append("file", new Blob(["hello"]), "file\nname.txt");
const response = new Response(fd);
const body = await response.text();
expect(body).toContain('filename="file%0Aname.txt"');
});
it("should percent-encode CR alone in filename", async () => {
const fd = new FormData();
fd.append("file", new Blob(["hello"]), "file\rname.txt");
const response = new Response(fd);
const body = await response.text();
expect(body).toContain('filename="file%0Dname.txt"');
});
it("should percent-encode double quotes in name", async () => {
const fd = new FormData();
fd.append('na"me', "value");
const response = new Response(fd);
const body = await response.text();
expect(body).toContain('name="na%22me"');
});
it("should percent-encode CR and LF in name", async () => {
const fd = new FormData();
fd.append("na\r\nme", "value");
const response = new Response(fd);
const body = await response.text();
expect(body).toContain('name="na%0D%0Ame"');
});
it("should percent-encode multiple special chars in filename", async () => {
const fd = new FormData();
fd.append("file", new Blob(["content"]), 'a"b\rc\nd');
const response = new Response(fd);
const body = await response.text();
expect(body).toContain('filename="a%22b%0Dc%0Ad"');
});
it("should not alter names/filenames without special characters", async () => {
const fd = new FormData();
fd.append("file", new Blob(["content"]), "normal-file.txt");
const response = new Response(fd);
const body = await response.text();
expect(body).toContain('filename="normal-file.txt"');
expect(body).toContain('name="file"');
});
it("should handle filename that is only special characters", async () => {
const fd = new FormData();
fd.append("file", new Blob(["x"]), '"\r\n');
const response = new Response(fd);
const body = await response.text();
expect(body).toContain('filename="%22%0D%0A"');
});
it("should properly encode name for file entries too", async () => {
const fd = new FormData();
fd.append('up"load', new Blob(["data"]), "file.bin");
const response = new Response(fd);
const body = await response.text();
expect(body).toContain('name="up%22load"');
expect(body).toContain('filename="file.bin"');
});
});
describe("content-type sanitization", () => {
it("should strip CR/LF from blob content-type in multipart output", async () => {
const blob = new Blob(["data"], { type: "text/plain" });
const fd = new FormData();
fd.append("file", blob, "test.txt");
const response = new Response(fd);
const body = await response.text();
// Normal content-type should pass through fine
expect(body).toContain("Content-Type: text/plain");
});
it("should not contain bare CR or LF in content-type header line", async () => {
// Blob constructor lowercases and validates ASCII, but we verify the
// multipart output does not contain unexpected line breaks in the
// Content-Type header region.
const fd = new FormData();
fd.append("file", new Blob(["data"], { type: "application/octet-stream" }), "test.bin");
const response = new Response(fd);
const body = await response.text();
// Extract the Content-Type line from the multipart body
const lines = body.split("\r\n");
const ctLine = lines.find((l: string) => l.startsWith("Content-Type: "));
expect(ctLine).toBeDefined();
// The Content-Type value should not contain any CR or LF
const ctValue = ctLine!.slice("Content-Type: ".length);
expect(ctValue).not.toContain("\r");
expect(ctValue).not.toContain("\n");
});
});
describe("roundtrip with special characters", () => {
it("should roundtrip FormData with quotes in filename", async () => {
const fd = new FormData();
const content = "file content here";
fd.append("upload", new Blob([content]), 'my"file.txt');
// Serialize to multipart
const response = new Response(fd);
const contentType = response.headers.get("Content-Type")!;
// Parse back
const parsed = await new Response(await response.blob(), {
headers: { "Content-Type": contentType },
}).formData();
// Verify the file content survived
const file = parsed.get("upload") as File;
expect(file).toBeInstanceOf(File);
expect(await file.text()).toBe(content);
});
it("should roundtrip FormData with CRLF in name", async () => {
const fd = new FormData();
fd.append("field\r\nname", "value123");
const response = new Response(fd);
const contentType = response.headers.get("Content-Type")!;
const parsed = await new Response(await response.blob(), {
headers: { "Content-Type": contentType },
}).formData();
// The value should be retrievable (name may be decoded differently
// depending on parser, but the structure should not be corrupted)
const entries = Array.from(parsed.entries());
expect(entries.length).toBe(1);
expect(entries[0][1]).toBe("value123");
});
it("should not allow filename to inject additional form fields", async () => {
// This is the key test: a crafted filename should not be able to
// inject extra multipart fields into the serialized body.
const fd = new FormData();
const maliciousFilename =
'safe.png"\r\nContent-Type: text/html\r\n\r\n<script>alert(1)</script>\r\n--boundary\r\nContent-Disposition: form-data; name="injected"\r\n\r\nevil';
fd.append("file", new Blob(["real content"]), maliciousFilename);
const response = new Response(fd);
const contentType = response.headers.get("Content-Type")!;
const body = await response.text();
// The double quotes and CRLF in the filename must be percent-encoded
// so they can't break out of the Content-Disposition header value.
// Verify no raw CRLF appears between the filename quotes.
const filenameMatch = body.match(/filename="([^"]*)"/);
expect(filenameMatch).not.toBeNull();
const filenameValue = filenameMatch![1];
// The encoded filename must not contain raw CR or LF
expect(filenameValue).not.toContain("\r");
expect(filenameValue).not.toContain("\n");
// The quote in the original filename must be percent-encoded
expect(filenameValue).toContain("%22");
// CR and LF must be percent-encoded
expect(filenameValue).toContain("%0D");
expect(filenameValue).toContain("%0A");
// The multipart body should have exactly one boundary-delimited part
// (the crafted filename must not create additional parts)
const boundary = contentType.split("boundary=")[1];
const parts = body.split("--" + boundary).filter((p: string) => p !== "" && p !== "--\r\n");
expect(parts.length).toBe(1);
});
it("should not allow name to inject additional headers", async () => {
const fd = new FormData();
fd.append('field"\r\nEvil-Header: injected\r\n\r\nbadcontent', "legitimate value");
const response = new Response(fd);
const body = await response.text();
// The CRLF in the name should be percent-encoded, preventing it
// from being parsed as a separate header line.
// Split body into actual lines and check no line starts with "Evil-Header:"
const lines = body.split("\r\n");
const hasInjectedHeader = lines.some((line: string) => line.startsWith("Evil-Header:"));
expect(hasInjectedHeader).toBe(false);
// The value should still be present
expect(body).toContain("legitimate value");
});
});
});