Compare commits

...

3 Commits

Author SHA1 Message Date
Claude Bot
7bd58749fc fix: fallback to application/octet-stream when sanitized content-type is empty
When a blob's content-type consists entirely of CR/LF characters,
sanitizeContentType strips them all and returns an empty string. This
resulted in emitting "Content-Type: " with no value. Now falls back to
"application/octet-stream" when the sanitized result is empty.

Also strengthened content-type sanitization tests to actually use blobs
with CR/LF in their type, exercising the real sanitization path.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-02-28 09:39:54 +00:00
Claude Bot
8c20a1f593 fix: free original slice when escape path is taken in multipart encoding
When escapeFormDataNameOrFilename returns a new escaped buffer, the
original name_slice/filename_slice from toSlice() was never freed,
leaking memory when the string required both UTF-16→UTF-8 conversion
and escape processing.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-02-28 09:22:24 +00:00
Claude Bot
21cf8ca5de harden multipart/form-data encoding per WHATWG spec
Percent-encode double quotes ("), CR (0x0D), and LF (0x0A) in name and
filename values within Content-Disposition headers during multipart
form-data serialization, as required by the WHATWG multipart/form-data
encoding algorithm.

Also sanitize blob content_type by stripping CR/LF characters before
emitting Content-Type headers in multipart bodies.

Co-Authored-By: Claude <noreply@anthropic.com>
2026-02-27 05:58:47 +00:00
2 changed files with 350 additions and 4 deletions

View File

@@ -201,6 +201,81 @@ const FormDataContext = struct {
failed: bool = false,
globalThis: *jsc.JSGlobalObject,
/// Per the WHATWG multipart/form-data spec, name and filename values in
/// Content-Disposition headers must have 0x0A (LF), 0x0D (CR), and 0x22 (")
/// percent-encoded to prevent malformed multipart bodies.
fn escapeFormDataNameOrFilename(input: []const u8, allocator: std.mem.Allocator) ?[]const u8 {
// Fast path: check if any escaping is needed.
const needs_escape = brk: {
for (input) |c| {
if (c == '"' or c == '\r' or c == '\n') break :brk true;
}
break :brk false;
};
if (!needs_escape) return null;
// Count output size: each special char expands from 1 byte to 3 bytes (%XX).
var extra: usize = 0;
for (input) |c| {
if (c == '"' or c == '\r' or c == '\n') extra += 2;
}
const buf = allocator.alloc(u8, input.len + extra) catch |err| bun.handleOom(err);
var i: usize = 0;
for (input) |c| {
switch (c) {
'"' => {
buf[i] = '%';
buf[i + 1] = '2';
buf[i + 2] = '2';
i += 3;
},
'\r' => {
buf[i] = '%';
buf[i + 1] = '0';
buf[i + 2] = 'D';
i += 3;
},
'\n' => {
buf[i] = '%';
buf[i + 1] = '0';
buf[i + 2] = 'A';
i += 3;
},
else => {
buf[i] = c;
i += 1;
},
}
}
return buf[0..i];
}
/// Sanitize content_type for use in multipart headers by stripping
/// CR and LF characters that could break the header structure.
fn sanitizeContentType(content_type: []const u8, allocator: std.mem.Allocator) ?[]const u8 {
const needs_sanitize = brk: {
for (content_type) |c| {
if (c == '\r' or c == '\n') break :brk true;
}
break :brk false;
};
if (!needs_sanitize) return null;
var count: usize = 0;
for (content_type) |c| {
if (c != '\r' and c != '\n') count += 1;
}
const buf = allocator.alloc(u8, count) catch |err| bun.handleOom(err);
var i: usize = 0;
for (content_type) |c| {
if (c != '\r' and c != '\n') {
buf[i] = c;
i += 1;
}
}
return buf[0..i];
}
pub fn onEntry(this: *FormDataContext, name: ZigString, entry: jsc.DOMFormData.FormDataEntry) void {
if (this.failed) return;
var globalThis = this.globalThis;
@@ -215,7 +290,12 @@ const FormDataContext = struct {
joiner.pushStatic("Content-Disposition: form-data; name=\"");
const name_slice = name.toSlice(allocator);
joiner.push(name_slice.slice(), name_slice.allocator.get());
if (escapeFormDataNameOrFilename(name_slice.slice(), allocator)) |escaped| {
name_slice.deinit();
joiner.push(escaped, allocator);
} else {
joiner.push(name_slice.slice(), name_slice.allocator.get());
}
switch (entry) {
.string => |value| {
@@ -226,13 +306,27 @@ const FormDataContext = struct {
.file => |value| {
joiner.pushStatic("\"; filename=\"");
const filename_slice = value.filename.toSlice(allocator);
joiner.push(filename_slice.slice(), filename_slice.allocator.get());
if (escapeFormDataNameOrFilename(filename_slice.slice(), allocator)) |escaped| {
filename_slice.deinit();
joiner.push(escaped, allocator);
} else {
joiner.push(filename_slice.slice(), filename_slice.allocator.get());
}
joiner.pushStatic("\"\r\n");
const blob = value.blob;
const content_type = if (blob.content_type.len > 0) blob.content_type else "application/octet-stream";
const raw_content_type = if (blob.content_type.len > 0) blob.content_type else "application/octet-stream";
joiner.pushStatic("Content-Type: ");
joiner.pushStatic(content_type);
if (sanitizeContentType(raw_content_type, allocator)) |sanitized| {
if (sanitized.len > 0) {
joiner.push(sanitized, allocator);
} else {
allocator.free(sanitized);
joiner.pushStatic("application/octet-stream");
}
} else {
joiner.pushStatic(raw_content_type);
}
joiner.pushStatic("\r\n\r\n");
if (blob.store) |store| {

View File

@@ -0,0 +1,252 @@
import { describe, expect, it } from "bun:test";
describe("FormData multipart encoding hardening", () => {
describe("name and filename percent-encoding per WHATWG spec", () => {
it("should percent-encode double quotes in filename", async () => {
const fd = new FormData();
fd.append("file", new Blob(["hello"]), 'my"file.txt');
const response = new Response(fd);
const body = await response.text();
// The double quote must be percent-encoded as %22
expect(body).toContain('filename="my%22file.txt"');
// Must NOT contain an unescaped quote that breaks out of the filename field
expect(body).not.toContain('filename="my"file.txt"');
});
it("should percent-encode CR and LF in filename", async () => {
const fd = new FormData();
fd.append("file", new Blob(["hello"]), "file\r\nname.txt");
const response = new Response(fd);
const body = await response.text();
// CR and LF must be percent-encoded
expect(body).toContain('filename="file%0D%0Aname.txt"');
});
it("should percent-encode LF alone in filename", async () => {
const fd = new FormData();
fd.append("file", new Blob(["hello"]), "file\nname.txt");
const response = new Response(fd);
const body = await response.text();
expect(body).toContain('filename="file%0Aname.txt"');
});
it("should percent-encode CR alone in filename", async () => {
const fd = new FormData();
fd.append("file", new Blob(["hello"]), "file\rname.txt");
const response = new Response(fd);
const body = await response.text();
expect(body).toContain('filename="file%0Dname.txt"');
});
it("should percent-encode double quotes in name", async () => {
const fd = new FormData();
fd.append('na"me', "value");
const response = new Response(fd);
const body = await response.text();
expect(body).toContain('name="na%22me"');
});
it("should percent-encode CR and LF in name", async () => {
const fd = new FormData();
fd.append("na\r\nme", "value");
const response = new Response(fd);
const body = await response.text();
expect(body).toContain('name="na%0D%0Ame"');
});
it("should percent-encode multiple special chars in filename", async () => {
const fd = new FormData();
fd.append("file", new Blob(["content"]), 'a"b\rc\nd');
const response = new Response(fd);
const body = await response.text();
expect(body).toContain('filename="a%22b%0Dc%0Ad"');
});
it("should not alter names/filenames without special characters", async () => {
const fd = new FormData();
fd.append("file", new Blob(["content"]), "normal-file.txt");
const response = new Response(fd);
const body = await response.text();
expect(body).toContain('filename="normal-file.txt"');
expect(body).toContain('name="file"');
});
it("should handle filename that is only special characters", async () => {
const fd = new FormData();
fd.append("file", new Blob(["x"]), '"\r\n');
const response = new Response(fd);
const body = await response.text();
expect(body).toContain('filename="%22%0D%0A"');
});
it("should properly encode name for file entries too", async () => {
const fd = new FormData();
fd.append('up"load', new Blob(["data"]), "file.bin");
const response = new Response(fd);
const body = await response.text();
expect(body).toContain('name="up%22load"');
expect(body).toContain('filename="file.bin"');
});
});
describe("content-type sanitization", () => {
it("should strip CR/LF from blob content-type in multipart output", async () => {
const blob = new Blob(["data"], { type: "text/evil\r\nx-injected: true" });
const fd = new FormData();
fd.append("file", blob, "test.txt");
const response = new Response(fd);
const body = await response.text();
// CR/LF should be stripped from the content-type value
const match = body.match(/Content-Type: ([^\r\n]*)/);
expect(match).not.toBeNull();
const ctValue = match![1];
expect(ctValue).not.toContain("\r");
expect(ctValue).not.toContain("\n");
expect(ctValue).toBe("text/evilx-injected: true");
});
it("should fallback to application/octet-stream when content-type is all CR/LF", async () => {
const blob = new Blob(["data"], { type: "\r\n" });
const fd = new FormData();
fd.append("file", blob, "test.bin");
const response = new Response(fd);
const body = await response.text();
// All-CR/LF content-type should fallback to application/octet-stream
const match = body.match(/Content-Type: ([^\r\n]*)/);
expect(match).not.toBeNull();
expect(match![1]).toBe("application/octet-stream");
});
it("should not contain bare CR or LF in content-type header line", async () => {
// Use a content-type with embedded CR/LF to exercise sanitization
const fd = new FormData();
fd.append("file", new Blob(["data"], { type: "text/plain\r\nX-Bad: header" }), "test.bin");
const response = new Response(fd);
const body = await response.text();
// Extract the Content-Type value using regex against the raw body
const match = body.match(/Content-Type: ([^\r\n]*)/);
expect(match).not.toBeNull();
const ctValue = match![1];
expect(ctValue).not.toContain("\r");
expect(ctValue).not.toContain("\n");
});
});
describe("roundtrip with special characters", () => {
it("should roundtrip FormData with quotes in filename", async () => {
const fd = new FormData();
const content = "file content here";
fd.append("upload", new Blob([content]), 'my"file.txt');
// Serialize to multipart
const response = new Response(fd);
const contentType = response.headers.get("Content-Type")!;
// Parse back
const parsed = await new Response(await response.blob(), {
headers: { "Content-Type": contentType },
}).formData();
// Verify the file content survived
const file = parsed.get("upload") as File;
expect(file).toBeInstanceOf(File);
expect(await file.text()).toBe(content);
});
it("should roundtrip FormData with CRLF in name", async () => {
const fd = new FormData();
fd.append("field\r\nname", "value123");
const response = new Response(fd);
const contentType = response.headers.get("Content-Type")!;
const parsed = await new Response(await response.blob(), {
headers: { "Content-Type": contentType },
}).formData();
// The value should be retrievable (name may be decoded differently
// depending on parser, but the structure should not be corrupted)
const entries = Array.from(parsed.entries());
expect(entries.length).toBe(1);
expect(entries[0][1]).toBe("value123");
});
it("should not allow filename to inject additional form fields", async () => {
// This is the key test: a crafted filename should not be able to
// inject extra multipart fields into the serialized body.
const fd = new FormData();
const maliciousFilename =
'safe.png"\r\nContent-Type: text/html\r\n\r\n<script>alert(1)</script>\r\n--boundary\r\nContent-Disposition: form-data; name="injected"\r\n\r\nevil';
fd.append("file", new Blob(["real content"]), maliciousFilename);
const response = new Response(fd);
const contentType = response.headers.get("Content-Type")!;
const body = await response.text();
// The double quotes and CRLF in the filename must be percent-encoded
// so they can't break out of the Content-Disposition header value.
// Verify no raw CRLF appears between the filename quotes.
const filenameMatch = body.match(/filename="([^"]*)"/);
expect(filenameMatch).not.toBeNull();
const filenameValue = filenameMatch![1];
// The encoded filename must not contain raw CR or LF
expect(filenameValue).not.toContain("\r");
expect(filenameValue).not.toContain("\n");
// The quote in the original filename must be percent-encoded
expect(filenameValue).toContain("%22");
// CR and LF must be percent-encoded
expect(filenameValue).toContain("%0D");
expect(filenameValue).toContain("%0A");
// The multipart body should have exactly one boundary-delimited part
// (the crafted filename must not create additional parts)
const boundary = contentType.split("boundary=")[1];
const parts = body.split("--" + boundary).filter((p: string) => p !== "" && p !== "--\r\n");
expect(parts.length).toBe(1);
});
it("should not allow name to inject additional headers", async () => {
const fd = new FormData();
fd.append('field"\r\nEvil-Header: injected\r\n\r\nbadcontent', "legitimate value");
const response = new Response(fd);
const body = await response.text();
// The CRLF in the name should be percent-encoded, preventing it
// from being parsed as a separate header line.
// Split body into actual lines and check no line starts with "Evil-Header:"
const lines = body.split("\r\n");
const hasInjectedHeader = lines.some((line: string) => line.startsWith("Evil-Header:"));
expect(hasInjectedHeader).toBe(false);
// The value should still be present
expect(body).toContain("legitimate value");
});
});
});