diff --git a/src/bun.js/bindings/bindings.cpp b/src/bun.js/bindings/bindings.cpp index e7a072faaa..45c0048f98 100644 --- a/src/bun.js/bindings/bindings.cpp +++ b/src/bun.js/bindings/bindings.cpp @@ -5677,7 +5677,15 @@ CPP_DECL JSC::EncodedJSValue WebCore__DOMFormData__createFromURLQuery(JSC::JSGlo { Zig::GlobalObject* globalObject = static_cast(arg0); // don't need to copy the string because it internally does. - auto formData = DOMFormData::create(globalObject->scriptExecutionContext(), toString(*arg1)); + auto str = toString(*arg1); + // toString() in helpers.h returns an empty string when the input exceeds + // String::MaxLength or Bun's synthetic allocation limit. This is the only + // condition under which toString() returns empty for non-empty input. + if (str.isEmpty() && arg1->len > 0) { + auto scope = DECLARE_THROW_SCOPE(globalObject->vm()); + return Bun::ERR::STRING_TOO_LONG(scope, globalObject); + } + auto formData = DOMFormData::create(globalObject->scriptExecutionContext(), WTFMove(str)); return JSValue::encode(toJSNewlyCreated(arg0, globalObject, WTFMove(formData))); } diff --git a/src/bun.js/bindings/helpers.h b/src/bun.js/bindings/helpers.h index 1fc73077a9..8c1a40dd1e 100644 --- a/src/bun.js/bindings/helpers.h +++ b/src/bun.js/bindings/helpers.h @@ -2,6 +2,7 @@ #include "root.h" #include "wtf/text/ASCIILiteral.h" +#include "wtf/SIMDUTF.h" #include #include @@ -79,12 +80,24 @@ static const WTF::String toString(ZigString str) } if (isTaggedUTF8Ptr(str.ptr)) [[unlikely]] { ASSERT_WITH_MESSAGE(!isTaggedExternalPtr(str.ptr), "UTF8 and external ptr are mutually exclusive. The external will never be freed."); + // Check if the resulting UTF-16 string could possibly exceed the maximum length. + // For valid UTF-8, the number of UTF-16 code units is <= the number of UTF-8 bytes + // (ASCII is 1:1; other code points use multiple UTF-8 bytes per UTF-16 code unit). + // We only need to compute the actual UTF-16 length when the byte length exceeds the limit. + size_t maxLength = std::min(Bun__stringSyntheticAllocationLimit, static_cast(WTF::String::MaxLength)); + if (str.len > maxLength) [[unlikely]] { + // UTF-8 byte length != UTF-16 length, so use simdutf to calculate the actual UTF-16 length. + size_t utf16Length = simdutf::utf16_length_from_utf8(reinterpret_cast(untag(str.ptr)), str.len); + if (utf16Length > maxLength) { + return {}; + } + } return WTF::String::fromUTF8ReplacingInvalidSequences(std::span { untag(str.ptr), str.len }); } if (isTaggedExternalPtr(str.ptr)) [[unlikely]] { // This will fail if the string is too long. Let's make it explicit instead of an ASSERT. - if (str.len > Bun__stringSyntheticAllocationLimit) [[unlikely]] { + if (str.len > Bun__stringSyntheticAllocationLimit || str.len > WTF::String::MaxLength) [[unlikely]] { free_global_string(nullptr, reinterpret_cast(const_cast(untag(str.ptr))), static_cast(str.len)); return {}; } @@ -95,7 +108,7 @@ static const WTF::String toString(ZigString str) } // This will fail if the string is too long. Let's make it explicit instead of an ASSERT. - if (str.len > Bun__stringSyntheticAllocationLimit) [[unlikely]] { + if (str.len > Bun__stringSyntheticAllocationLimit || str.len > WTF::String::MaxLength) [[unlikely]] { return {}; } @@ -121,11 +134,19 @@ static const WTF::String toString(ZigString str, StringPointer ptr) return WTF::String(); } if (isTaggedUTF8Ptr(str.ptr)) [[unlikely]] { + // Check if the resulting UTF-16 string could possibly exceed the maximum length. + size_t maxLength = std::min(Bun__stringSyntheticAllocationLimit, static_cast(WTF::String::MaxLength)); + if (ptr.len > maxLength) [[unlikely]] { + size_t utf16Length = simdutf::utf16_length_from_utf8(reinterpret_cast(&untag(str.ptr)[ptr.off]), ptr.len); + if (utf16Length > maxLength) { + return {}; + } + } return WTF::String::fromUTF8ReplacingInvalidSequences(std::span { &untag(str.ptr)[ptr.off], ptr.len }); } // This will fail if the string is too long. Let's make it explicit instead of an ASSERT. - if (str.len > Bun__stringSyntheticAllocationLimit) [[unlikely]] { + if (ptr.len > Bun__stringSyntheticAllocationLimit || ptr.len > WTF::String::MaxLength) [[unlikely]] { return {}; } @@ -141,11 +162,19 @@ static const WTF::String toStringCopy(ZigString str, StringPointer ptr) return WTF::String(); } if (isTaggedUTF8Ptr(str.ptr)) [[unlikely]] { + // Check if the resulting UTF-16 string could possibly exceed the maximum length. + size_t maxLength = std::min(Bun__stringSyntheticAllocationLimit, static_cast(WTF::String::MaxLength)); + if (ptr.len > maxLength) [[unlikely]] { + size_t utf16Length = simdutf::utf16_length_from_utf8(reinterpret_cast(&untag(str.ptr)[ptr.off]), ptr.len); + if (utf16Length > maxLength) { + return {}; + } + } return WTF::String::fromUTF8ReplacingInvalidSequences(std::span { &untag(str.ptr)[ptr.off], ptr.len }); } // This will fail if the string is too long. Let's make it explicit instead of an ASSERT. - if (str.len > Bun__stringSyntheticAllocationLimit) [[unlikely]] { + if (ptr.len > Bun__stringSyntheticAllocationLimit || ptr.len > WTF::String::MaxLength) [[unlikely]] { return {}; } @@ -161,6 +190,14 @@ static const WTF::String toStringCopy(ZigString str) return WTF::String(); } if (isTaggedUTF8Ptr(str.ptr)) [[unlikely]] { + // Check if the resulting UTF-16 string could possibly exceed the maximum length. + size_t maxLength = std::min(Bun__stringSyntheticAllocationLimit, static_cast(WTF::String::MaxLength)); + if (str.len > maxLength) [[unlikely]] { + size_t utf16Length = simdutf::utf16_length_from_utf8(reinterpret_cast(untag(str.ptr)), str.len); + if (utf16Length > maxLength) { + return {}; + } + } return WTF::String::fromUTF8ReplacingInvalidSequences(std::span { untag(str.ptr), str.len }); } @@ -188,6 +225,14 @@ static void appendToBuilder(ZigString str, WTF::StringBuilder& builder) return; } if (isTaggedUTF8Ptr(str.ptr)) [[unlikely]] { + // Check if the resulting UTF-16 string could possibly exceed the maximum length. + size_t maxLength = std::min(Bun__stringSyntheticAllocationLimit, static_cast(WTF::String::MaxLength)); + if (str.len > maxLength) [[unlikely]] { + size_t utf16Length = simdutf::utf16_length_from_utf8(reinterpret_cast(untag(str.ptr)), str.len); + if (utf16Length > maxLength) { + return; + } + } WTF::String converted = WTF::String::fromUTF8ReplacingInvalidSequences(std::span { untag(str.ptr), str.len }); builder.append(converted); return; diff --git a/src/url.zig b/src/url.zig index 945737ac09..a4ebd9242a 100644 --- a/src/url.zig +++ b/src/url.zig @@ -984,7 +984,12 @@ pub const FormData = struct { switch (encoding) { .URLEncoded => { var str = jsc.ZigString.fromUTF8(strings.withoutUTF8BOM(input)); - return jsc.DOMFormData.createFromURLQuery(globalThis, &str); + const result = jsc.DOMFormData.createFromURLQuery(globalThis, &str); + // Check if an exception was thrown (e.g., string too long) + if (result == .zero) { + return error.JSError; + } + return result; }, .Multipart => |boundary| return toJSFromMultipartData(globalThis, input, boundary), } @@ -1041,7 +1046,11 @@ pub const FormData = struct { return globalThis.throwInvalidArguments("input must be a string or ArrayBufferView", .{}); } - return FormData.toJS(globalThis, input, encoding) catch |err| return globalThis.throwError(err, "while parsing FormData"); + return FormData.toJS(globalThis, input, encoding) catch |err| { + if (err == error.JSError) return error.JSError; + if (err == error.JSTerminated) return error.JSTerminated; + return globalThis.throwError(err, "while parsing FormData"); + }; } comptime { diff --git a/test/js/web/html/FormData.test.ts b/test/js/web/html/FormData.test.ts index d81dd74345..61f3be453c 100644 --- a/test/js/web/html/FormData.test.ts +++ b/test/js/web/html/FormData.test.ts @@ -277,6 +277,23 @@ describe("FormData", () => { expect(fd.toJSON()).toEqual({ "1": "1" }); }); + test("FormData.from throws on very large input instead of crashing", () => { + // This test verifies that FormData.from throws an exception instead of crashing + // when given input larger than WebKit's String::MaxLength (INT32_MAX ~= 2GB). + // We use a smaller test case with the synthetic limit to avoid actually allocating 2GB+. + const { setSyntheticAllocationLimitForTesting } = require("bun:internal-for-testing"); + // Set a small limit so we can test the boundary without allocating gigabytes + const originalLimit = setSyntheticAllocationLimitForTesting(1024 * 1024); // 1MB limit + try { + // Create a buffer larger than the limit + const largeBuffer = new Uint8Array(2 * 1024 * 1024); // 2MB + // @ts-expect-error - FormData.from is a Bun extension + expect(() => FormData.from(largeBuffer)).toThrow("Cannot create a string longer than"); + } finally { + setSyntheticAllocationLimitForTesting(originalLimit); + } + }); + it("should throw on bad boundary", async () => { const response = new Response('foo\r\nContent-Disposition: form-data; name="foo"\r\n\r\nbar\r\n', { headers: {