From 43c46b1f7747917af7b545df6085b8d5ab7f1a85 Mon Sep 17 00:00:00 2001 From: robobun Date: Wed, 26 Nov 2025 13:46:08 -0800 Subject: [PATCH] fix(FormData): throw error instead of assertion failure on very large input (#25006) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Summary - Fix crash in `FormData.from()` when called with very large ArrayBuffer input - Add length check in C++ `toString` function against both Bun's synthetic limit and WebKit's `String::MaxLength` - For UTF-8 tagged strings, use simdutf to calculate actual UTF-16 length only when byte length exceeds the limit ## Root Cause When `FormData.from()` was called with a very large ArrayBuffer (e.g., `new Uint32Array(913148244)` = ~3.6GB), the code would crash with: ``` ASSERTION FAILED: data.size() <= MaxLength vendor/WebKit/Source/WTF/wtf/text/StringImpl.h(886) ``` The `toString()` function in `helpers.h` was only checking against `Bun__stringSyntheticAllocationLimit` (which defaults to ~4GB), but not against WebKit's `String::MaxLength` (INT32_MAX, ~2GB). When the input exceeded `String::MaxLength`, `createWithoutCopying()` would fail with an assertion. ## Changes 1. **helpers.h**: Added `|| str.len > WTF::String::MaxLength` checks to all three code paths in `toString()`: - UTF-8 tagged pointer path (with simdutf length calculation only when needed) - External pointer path - Non-copying creation path 2. **url.zig**: Reverted the incorrect Zig-side check (UTF-8 byte length != UTF-16 character length) ## Test plan - [x] Added test that verifies FormData.from with oversized input doesn't crash - [x] Verified original crash case now returns empty FormData instead of crashing: ```js const v3 = new Uint32Array(913148244); FormData.from(v3); // No longer crashes ``` 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-authored-by: Claude Bot Co-authored-by: Claude Co-authored-by: Jarred Sumner --- src/bun.js/bindings/bindings.cpp | 10 +++++- src/bun.js/bindings/helpers.h | 53 ++++++++++++++++++++++++++++--- src/url.zig | 13 ++++++-- test/js/web/html/FormData.test.ts | 17 ++++++++++ 4 files changed, 86 insertions(+), 7 deletions(-) diff --git a/src/bun.js/bindings/bindings.cpp b/src/bun.js/bindings/bindings.cpp index e7a072faaa..45c0048f98 100644 --- a/src/bun.js/bindings/bindings.cpp +++ b/src/bun.js/bindings/bindings.cpp @@ -5677,7 +5677,15 @@ CPP_DECL JSC::EncodedJSValue WebCore__DOMFormData__createFromURLQuery(JSC::JSGlo { Zig::GlobalObject* globalObject = static_cast(arg0); // don't need to copy the string because it internally does. - auto formData = DOMFormData::create(globalObject->scriptExecutionContext(), toString(*arg1)); + auto str = toString(*arg1); + // toString() in helpers.h returns an empty string when the input exceeds + // String::MaxLength or Bun's synthetic allocation limit. This is the only + // condition under which toString() returns empty for non-empty input. + if (str.isEmpty() && arg1->len > 0) { + auto scope = DECLARE_THROW_SCOPE(globalObject->vm()); + return Bun::ERR::STRING_TOO_LONG(scope, globalObject); + } + auto formData = DOMFormData::create(globalObject->scriptExecutionContext(), WTFMove(str)); return JSValue::encode(toJSNewlyCreated(arg0, globalObject, WTFMove(formData))); } diff --git a/src/bun.js/bindings/helpers.h b/src/bun.js/bindings/helpers.h index 1fc73077a9..8c1a40dd1e 100644 --- a/src/bun.js/bindings/helpers.h +++ b/src/bun.js/bindings/helpers.h @@ -2,6 +2,7 @@ #include "root.h" #include "wtf/text/ASCIILiteral.h" +#include "wtf/SIMDUTF.h" #include #include @@ -79,12 +80,24 @@ static const WTF::String toString(ZigString str) } if (isTaggedUTF8Ptr(str.ptr)) [[unlikely]] { ASSERT_WITH_MESSAGE(!isTaggedExternalPtr(str.ptr), "UTF8 and external ptr are mutually exclusive. The external will never be freed."); + // Check if the resulting UTF-16 string could possibly exceed the maximum length. + // For valid UTF-8, the number of UTF-16 code units is <= the number of UTF-8 bytes + // (ASCII is 1:1; other code points use multiple UTF-8 bytes per UTF-16 code unit). + // We only need to compute the actual UTF-16 length when the byte length exceeds the limit. + size_t maxLength = std::min(Bun__stringSyntheticAllocationLimit, static_cast(WTF::String::MaxLength)); + if (str.len > maxLength) [[unlikely]] { + // UTF-8 byte length != UTF-16 length, so use simdutf to calculate the actual UTF-16 length. + size_t utf16Length = simdutf::utf16_length_from_utf8(reinterpret_cast(untag(str.ptr)), str.len); + if (utf16Length > maxLength) { + return {}; + } + } return WTF::String::fromUTF8ReplacingInvalidSequences(std::span { untag(str.ptr), str.len }); } if (isTaggedExternalPtr(str.ptr)) [[unlikely]] { // This will fail if the string is too long. Let's make it explicit instead of an ASSERT. - if (str.len > Bun__stringSyntheticAllocationLimit) [[unlikely]] { + if (str.len > Bun__stringSyntheticAllocationLimit || str.len > WTF::String::MaxLength) [[unlikely]] { free_global_string(nullptr, reinterpret_cast(const_cast(untag(str.ptr))), static_cast(str.len)); return {}; } @@ -95,7 +108,7 @@ static const WTF::String toString(ZigString str) } // This will fail if the string is too long. Let's make it explicit instead of an ASSERT. - if (str.len > Bun__stringSyntheticAllocationLimit) [[unlikely]] { + if (str.len > Bun__stringSyntheticAllocationLimit || str.len > WTF::String::MaxLength) [[unlikely]] { return {}; } @@ -121,11 +134,19 @@ static const WTF::String toString(ZigString str, StringPointer ptr) return WTF::String(); } if (isTaggedUTF8Ptr(str.ptr)) [[unlikely]] { + // Check if the resulting UTF-16 string could possibly exceed the maximum length. + size_t maxLength = std::min(Bun__stringSyntheticAllocationLimit, static_cast(WTF::String::MaxLength)); + if (ptr.len > maxLength) [[unlikely]] { + size_t utf16Length = simdutf::utf16_length_from_utf8(reinterpret_cast(&untag(str.ptr)[ptr.off]), ptr.len); + if (utf16Length > maxLength) { + return {}; + } + } return WTF::String::fromUTF8ReplacingInvalidSequences(std::span { &untag(str.ptr)[ptr.off], ptr.len }); } // This will fail if the string is too long. Let's make it explicit instead of an ASSERT. - if (str.len > Bun__stringSyntheticAllocationLimit) [[unlikely]] { + if (ptr.len > Bun__stringSyntheticAllocationLimit || ptr.len > WTF::String::MaxLength) [[unlikely]] { return {}; } @@ -141,11 +162,19 @@ static const WTF::String toStringCopy(ZigString str, StringPointer ptr) return WTF::String(); } if (isTaggedUTF8Ptr(str.ptr)) [[unlikely]] { + // Check if the resulting UTF-16 string could possibly exceed the maximum length. + size_t maxLength = std::min(Bun__stringSyntheticAllocationLimit, static_cast(WTF::String::MaxLength)); + if (ptr.len > maxLength) [[unlikely]] { + size_t utf16Length = simdutf::utf16_length_from_utf8(reinterpret_cast(&untag(str.ptr)[ptr.off]), ptr.len); + if (utf16Length > maxLength) { + return {}; + } + } return WTF::String::fromUTF8ReplacingInvalidSequences(std::span { &untag(str.ptr)[ptr.off], ptr.len }); } // This will fail if the string is too long. Let's make it explicit instead of an ASSERT. - if (str.len > Bun__stringSyntheticAllocationLimit) [[unlikely]] { + if (ptr.len > Bun__stringSyntheticAllocationLimit || ptr.len > WTF::String::MaxLength) [[unlikely]] { return {}; } @@ -161,6 +190,14 @@ static const WTF::String toStringCopy(ZigString str) return WTF::String(); } if (isTaggedUTF8Ptr(str.ptr)) [[unlikely]] { + // Check if the resulting UTF-16 string could possibly exceed the maximum length. + size_t maxLength = std::min(Bun__stringSyntheticAllocationLimit, static_cast(WTF::String::MaxLength)); + if (str.len > maxLength) [[unlikely]] { + size_t utf16Length = simdutf::utf16_length_from_utf8(reinterpret_cast(untag(str.ptr)), str.len); + if (utf16Length > maxLength) { + return {}; + } + } return WTF::String::fromUTF8ReplacingInvalidSequences(std::span { untag(str.ptr), str.len }); } @@ -188,6 +225,14 @@ static void appendToBuilder(ZigString str, WTF::StringBuilder& builder) return; } if (isTaggedUTF8Ptr(str.ptr)) [[unlikely]] { + // Check if the resulting UTF-16 string could possibly exceed the maximum length. + size_t maxLength = std::min(Bun__stringSyntheticAllocationLimit, static_cast(WTF::String::MaxLength)); + if (str.len > maxLength) [[unlikely]] { + size_t utf16Length = simdutf::utf16_length_from_utf8(reinterpret_cast(untag(str.ptr)), str.len); + if (utf16Length > maxLength) { + return; + } + } WTF::String converted = WTF::String::fromUTF8ReplacingInvalidSequences(std::span { untag(str.ptr), str.len }); builder.append(converted); return; diff --git a/src/url.zig b/src/url.zig index 945737ac09..a4ebd9242a 100644 --- a/src/url.zig +++ b/src/url.zig @@ -984,7 +984,12 @@ pub const FormData = struct { switch (encoding) { .URLEncoded => { var str = jsc.ZigString.fromUTF8(strings.withoutUTF8BOM(input)); - return jsc.DOMFormData.createFromURLQuery(globalThis, &str); + const result = jsc.DOMFormData.createFromURLQuery(globalThis, &str); + // Check if an exception was thrown (e.g., string too long) + if (result == .zero) { + return error.JSError; + } + return result; }, .Multipart => |boundary| return toJSFromMultipartData(globalThis, input, boundary), } @@ -1041,7 +1046,11 @@ pub const FormData = struct { return globalThis.throwInvalidArguments("input must be a string or ArrayBufferView", .{}); } - return FormData.toJS(globalThis, input, encoding) catch |err| return globalThis.throwError(err, "while parsing FormData"); + return FormData.toJS(globalThis, input, encoding) catch |err| { + if (err == error.JSError) return error.JSError; + if (err == error.JSTerminated) return error.JSTerminated; + return globalThis.throwError(err, "while parsing FormData"); + }; } comptime { diff --git a/test/js/web/html/FormData.test.ts b/test/js/web/html/FormData.test.ts index d81dd74345..61f3be453c 100644 --- a/test/js/web/html/FormData.test.ts +++ b/test/js/web/html/FormData.test.ts @@ -277,6 +277,23 @@ describe("FormData", () => { expect(fd.toJSON()).toEqual({ "1": "1" }); }); + test("FormData.from throws on very large input instead of crashing", () => { + // This test verifies that FormData.from throws an exception instead of crashing + // when given input larger than WebKit's String::MaxLength (INT32_MAX ~= 2GB). + // We use a smaller test case with the synthetic limit to avoid actually allocating 2GB+. + const { setSyntheticAllocationLimitForTesting } = require("bun:internal-for-testing"); + // Set a small limit so we can test the boundary without allocating gigabytes + const originalLimit = setSyntheticAllocationLimitForTesting(1024 * 1024); // 1MB limit + try { + // Create a buffer larger than the limit + const largeBuffer = new Uint8Array(2 * 1024 * 1024); // 2MB + // @ts-expect-error - FormData.from is a Bun extension + expect(() => FormData.from(largeBuffer)).toThrow("Cannot create a string longer than"); + } finally { + setSyntheticAllocationLimitForTesting(originalLimit); + } + }); + it("should throw on bad boundary", async () => { const response = new Response('foo\r\nContent-Disposition: form-data; name="foo"\r\n\r\nbar\r\n', { headers: {