mirror of
https://github.com/oven-sh/bun
synced 2026-02-02 15:08:46 +00:00
fix(FormData): throw error instead of assertion failure on very large input (#25006)
## Summary - Fix crash in `FormData.from()` when called with very large ArrayBuffer input - Add length check in C++ `toString` function against both Bun's synthetic limit and WebKit's `String::MaxLength` - For UTF-8 tagged strings, use simdutf to calculate actual UTF-16 length only when byte length exceeds the limit ## Root Cause When `FormData.from()` was called with a very large ArrayBuffer (e.g., `new Uint32Array(913148244)` = ~3.6GB), the code would crash with: ``` ASSERTION FAILED: data.size() <= MaxLength vendor/WebKit/Source/WTF/wtf/text/StringImpl.h(886) ``` The `toString()` function in `helpers.h` was only checking against `Bun__stringSyntheticAllocationLimit` (which defaults to ~4GB), but not against WebKit's `String::MaxLength` (INT32_MAX, ~2GB). When the input exceeded `String::MaxLength`, `createWithoutCopying()` would fail with an assertion. ## Changes 1. **helpers.h**: Added `|| str.len > WTF::String::MaxLength` checks to all three code paths in `toString()`: - UTF-8 tagged pointer path (with simdutf length calculation only when needed) - External pointer path - Non-copying creation path 2. **url.zig**: Reverted the incorrect Zig-side check (UTF-8 byte length != UTF-16 character length) ## Test plan - [x] Added test that verifies FormData.from with oversized input doesn't crash - [x] Verified original crash case now returns empty FormData instead of crashing: ```js const v3 = new Uint32Array(913148244); FormData.from(v3); // No longer crashes ``` 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-authored-by: Claude Bot <claude-bot@bun.sh> Co-authored-by: Claude <noreply@anthropic.com> Co-authored-by: Jarred Sumner <jarred@jarredsumner.com>
This commit is contained in:
@@ -5677,7 +5677,15 @@ CPP_DECL JSC::EncodedJSValue WebCore__DOMFormData__createFromURLQuery(JSC::JSGlo
|
||||
{
|
||||
Zig::GlobalObject* globalObject = static_cast<Zig::GlobalObject*>(arg0);
|
||||
// don't need to copy the string because it internally does.
|
||||
auto formData = DOMFormData::create(globalObject->scriptExecutionContext(), toString(*arg1));
|
||||
auto str = toString(*arg1);
|
||||
// toString() in helpers.h returns an empty string when the input exceeds
|
||||
// String::MaxLength or Bun's synthetic allocation limit. This is the only
|
||||
// condition under which toString() returns empty for non-empty input.
|
||||
if (str.isEmpty() && arg1->len > 0) {
|
||||
auto scope = DECLARE_THROW_SCOPE(globalObject->vm());
|
||||
return Bun::ERR::STRING_TOO_LONG(scope, globalObject);
|
||||
}
|
||||
auto formData = DOMFormData::create(globalObject->scriptExecutionContext(), WTFMove(str));
|
||||
return JSValue::encode(toJSNewlyCreated(arg0, globalObject, WTFMove(formData)));
|
||||
}
|
||||
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
|
||||
#include "root.h"
|
||||
#include "wtf/text/ASCIILiteral.h"
|
||||
#include "wtf/SIMDUTF.h"
|
||||
|
||||
#include <JavaScriptCore/Error.h>
|
||||
#include <JavaScriptCore/Exception.h>
|
||||
@@ -79,12 +80,24 @@ static const WTF::String toString(ZigString str)
|
||||
}
|
||||
if (isTaggedUTF8Ptr(str.ptr)) [[unlikely]] {
|
||||
ASSERT_WITH_MESSAGE(!isTaggedExternalPtr(str.ptr), "UTF8 and external ptr are mutually exclusive. The external will never be freed.");
|
||||
// Check if the resulting UTF-16 string could possibly exceed the maximum length.
|
||||
// For valid UTF-8, the number of UTF-16 code units is <= the number of UTF-8 bytes
|
||||
// (ASCII is 1:1; other code points use multiple UTF-8 bytes per UTF-16 code unit).
|
||||
// We only need to compute the actual UTF-16 length when the byte length exceeds the limit.
|
||||
size_t maxLength = std::min(Bun__stringSyntheticAllocationLimit, static_cast<size_t>(WTF::String::MaxLength));
|
||||
if (str.len > maxLength) [[unlikely]] {
|
||||
// UTF-8 byte length != UTF-16 length, so use simdutf to calculate the actual UTF-16 length.
|
||||
size_t utf16Length = simdutf::utf16_length_from_utf8(reinterpret_cast<const char*>(untag(str.ptr)), str.len);
|
||||
if (utf16Length > maxLength) {
|
||||
return {};
|
||||
}
|
||||
}
|
||||
return WTF::String::fromUTF8ReplacingInvalidSequences(std::span { untag(str.ptr), str.len });
|
||||
}
|
||||
|
||||
if (isTaggedExternalPtr(str.ptr)) [[unlikely]] {
|
||||
// This will fail if the string is too long. Let's make it explicit instead of an ASSERT.
|
||||
if (str.len > Bun__stringSyntheticAllocationLimit) [[unlikely]] {
|
||||
if (str.len > Bun__stringSyntheticAllocationLimit || str.len > WTF::String::MaxLength) [[unlikely]] {
|
||||
free_global_string(nullptr, reinterpret_cast<void*>(const_cast<unsigned char*>(untag(str.ptr))), static_cast<unsigned>(str.len));
|
||||
return {};
|
||||
}
|
||||
@@ -95,7 +108,7 @@ static const WTF::String toString(ZigString str)
|
||||
}
|
||||
|
||||
// This will fail if the string is too long. Let's make it explicit instead of an ASSERT.
|
||||
if (str.len > Bun__stringSyntheticAllocationLimit) [[unlikely]] {
|
||||
if (str.len > Bun__stringSyntheticAllocationLimit || str.len > WTF::String::MaxLength) [[unlikely]] {
|
||||
return {};
|
||||
}
|
||||
|
||||
@@ -121,11 +134,19 @@ static const WTF::String toString(ZigString str, StringPointer ptr)
|
||||
return WTF::String();
|
||||
}
|
||||
if (isTaggedUTF8Ptr(str.ptr)) [[unlikely]] {
|
||||
// Check if the resulting UTF-16 string could possibly exceed the maximum length.
|
||||
size_t maxLength = std::min(Bun__stringSyntheticAllocationLimit, static_cast<size_t>(WTF::String::MaxLength));
|
||||
if (ptr.len > maxLength) [[unlikely]] {
|
||||
size_t utf16Length = simdutf::utf16_length_from_utf8(reinterpret_cast<const char*>(&untag(str.ptr)[ptr.off]), ptr.len);
|
||||
if (utf16Length > maxLength) {
|
||||
return {};
|
||||
}
|
||||
}
|
||||
return WTF::String::fromUTF8ReplacingInvalidSequences(std::span { &untag(str.ptr)[ptr.off], ptr.len });
|
||||
}
|
||||
|
||||
// This will fail if the string is too long. Let's make it explicit instead of an ASSERT.
|
||||
if (str.len > Bun__stringSyntheticAllocationLimit) [[unlikely]] {
|
||||
if (ptr.len > Bun__stringSyntheticAllocationLimit || ptr.len > WTF::String::MaxLength) [[unlikely]] {
|
||||
return {};
|
||||
}
|
||||
|
||||
@@ -141,11 +162,19 @@ static const WTF::String toStringCopy(ZigString str, StringPointer ptr)
|
||||
return WTF::String();
|
||||
}
|
||||
if (isTaggedUTF8Ptr(str.ptr)) [[unlikely]] {
|
||||
// Check if the resulting UTF-16 string could possibly exceed the maximum length.
|
||||
size_t maxLength = std::min(Bun__stringSyntheticAllocationLimit, static_cast<size_t>(WTF::String::MaxLength));
|
||||
if (ptr.len > maxLength) [[unlikely]] {
|
||||
size_t utf16Length = simdutf::utf16_length_from_utf8(reinterpret_cast<const char*>(&untag(str.ptr)[ptr.off]), ptr.len);
|
||||
if (utf16Length > maxLength) {
|
||||
return {};
|
||||
}
|
||||
}
|
||||
return WTF::String::fromUTF8ReplacingInvalidSequences(std::span { &untag(str.ptr)[ptr.off], ptr.len });
|
||||
}
|
||||
|
||||
// This will fail if the string is too long. Let's make it explicit instead of an ASSERT.
|
||||
if (str.len > Bun__stringSyntheticAllocationLimit) [[unlikely]] {
|
||||
if (ptr.len > Bun__stringSyntheticAllocationLimit || ptr.len > WTF::String::MaxLength) [[unlikely]] {
|
||||
return {};
|
||||
}
|
||||
|
||||
@@ -161,6 +190,14 @@ static const WTF::String toStringCopy(ZigString str)
|
||||
return WTF::String();
|
||||
}
|
||||
if (isTaggedUTF8Ptr(str.ptr)) [[unlikely]] {
|
||||
// Check if the resulting UTF-16 string could possibly exceed the maximum length.
|
||||
size_t maxLength = std::min(Bun__stringSyntheticAllocationLimit, static_cast<size_t>(WTF::String::MaxLength));
|
||||
if (str.len > maxLength) [[unlikely]] {
|
||||
size_t utf16Length = simdutf::utf16_length_from_utf8(reinterpret_cast<const char*>(untag(str.ptr)), str.len);
|
||||
if (utf16Length > maxLength) {
|
||||
return {};
|
||||
}
|
||||
}
|
||||
return WTF::String::fromUTF8ReplacingInvalidSequences(std::span { untag(str.ptr), str.len });
|
||||
}
|
||||
|
||||
@@ -188,6 +225,14 @@ static void appendToBuilder(ZigString str, WTF::StringBuilder& builder)
|
||||
return;
|
||||
}
|
||||
if (isTaggedUTF8Ptr(str.ptr)) [[unlikely]] {
|
||||
// Check if the resulting UTF-16 string could possibly exceed the maximum length.
|
||||
size_t maxLength = std::min(Bun__stringSyntheticAllocationLimit, static_cast<size_t>(WTF::String::MaxLength));
|
||||
if (str.len > maxLength) [[unlikely]] {
|
||||
size_t utf16Length = simdutf::utf16_length_from_utf8(reinterpret_cast<const char*>(untag(str.ptr)), str.len);
|
||||
if (utf16Length > maxLength) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
WTF::String converted = WTF::String::fromUTF8ReplacingInvalidSequences(std::span { untag(str.ptr), str.len });
|
||||
builder.append(converted);
|
||||
return;
|
||||
|
||||
13
src/url.zig
13
src/url.zig
@@ -984,7 +984,12 @@ pub const FormData = struct {
|
||||
switch (encoding) {
|
||||
.URLEncoded => {
|
||||
var str = jsc.ZigString.fromUTF8(strings.withoutUTF8BOM(input));
|
||||
return jsc.DOMFormData.createFromURLQuery(globalThis, &str);
|
||||
const result = jsc.DOMFormData.createFromURLQuery(globalThis, &str);
|
||||
// Check if an exception was thrown (e.g., string too long)
|
||||
if (result == .zero) {
|
||||
return error.JSError;
|
||||
}
|
||||
return result;
|
||||
},
|
||||
.Multipart => |boundary| return toJSFromMultipartData(globalThis, input, boundary),
|
||||
}
|
||||
@@ -1041,7 +1046,11 @@ pub const FormData = struct {
|
||||
return globalThis.throwInvalidArguments("input must be a string or ArrayBufferView", .{});
|
||||
}
|
||||
|
||||
return FormData.toJS(globalThis, input, encoding) catch |err| return globalThis.throwError(err, "while parsing FormData");
|
||||
return FormData.toJS(globalThis, input, encoding) catch |err| {
|
||||
if (err == error.JSError) return error.JSError;
|
||||
if (err == error.JSTerminated) return error.JSTerminated;
|
||||
return globalThis.throwError(err, "while parsing FormData");
|
||||
};
|
||||
}
|
||||
|
||||
comptime {
|
||||
|
||||
@@ -277,6 +277,23 @@ describe("FormData", () => {
|
||||
expect(fd.toJSON()).toEqual({ "1": "1" });
|
||||
});
|
||||
|
||||
test("FormData.from throws on very large input instead of crashing", () => {
|
||||
// This test verifies that FormData.from throws an exception instead of crashing
|
||||
// when given input larger than WebKit's String::MaxLength (INT32_MAX ~= 2GB).
|
||||
// We use a smaller test case with the synthetic limit to avoid actually allocating 2GB+.
|
||||
const { setSyntheticAllocationLimitForTesting } = require("bun:internal-for-testing");
|
||||
// Set a small limit so we can test the boundary without allocating gigabytes
|
||||
const originalLimit = setSyntheticAllocationLimitForTesting(1024 * 1024); // 1MB limit
|
||||
try {
|
||||
// Create a buffer larger than the limit
|
||||
const largeBuffer = new Uint8Array(2 * 1024 * 1024); // 2MB
|
||||
// @ts-expect-error - FormData.from is a Bun extension
|
||||
expect(() => FormData.from(largeBuffer)).toThrow("Cannot create a string longer than");
|
||||
} finally {
|
||||
setSyntheticAllocationLimitForTesting(originalLimit);
|
||||
}
|
||||
});
|
||||
|
||||
it("should throw on bad boundary", async () => {
|
||||
const response = new Response('foo\r\nContent-Disposition: form-data; name="foo"\r\n\r\nbar\r\n', {
|
||||
headers: {
|
||||
|
||||
Reference in New Issue
Block a user