mirror of
https://github.com/oven-sh/bun
synced 2026-02-25 19:17:20 +01:00
## Summary - Update LLVM version references across build scripts, Dockerfiles, CI, Nix configs, and documentation - Fix LLVM 21 `-Wcharacter-conversion` errors in WebKit bindings: - `EncodingTables.h`: pragma for intentional char32_t/char16_t comparisons - `TextCodecCJK.cpp`: widen `gb18030AsymmetricEncode` param to char32_t - `URLPatternParser`: widen `isValidNameCodepoint` param to char32_t, cast for `startsWith` - Fix `__libcpp_verbose_abort` noexcept mismatch (LLVM 21 uses `_NOEXCEPT`) - Fix dangling pointer in `BunJSCModule.h` (`toCString` temporary lifetime) - Remove `useMathSumPreciseMethod` (removed upstream in JSC) **Before merging:** Merge https://github.com/oven-sh/WebKit/pull/153 first, then update `WEBKIT_VERSION` in `cmake/tools/SetupWebKit.cmake` to point to the merged commit. ## Test plan - [ ] Build bun debug on macOS with LLVM 21 - [ ] Build bun on Linux (glibc) - [ ] Build bun on Linux (musl) - [ ] Build bun on Windows - [ ] Run test suite Generated with [Claude Code](https://claude.com/claude-code) --------- Co-authored-by: Claude <noreply@anthropic.com> Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
102 lines
4.1 KiB
C++
102 lines
4.1 KiB
C++
#include "EncodeURIComponent.h"
|
|
|
|
// from JSGlobalObjectFunctions.cpp
|
|
|
|
namespace JSC {
|
|
|
|
template<typename CharacterType>
|
|
static WebCore::ExceptionOr<void> encode(VM& vm, const WTF::BitSet<256>& doNotEscape, std::span<const CharacterType> characters, StringBuilder& builder)
|
|
{
|
|
auto scope = DECLARE_THROW_SCOPE(vm);
|
|
|
|
// 18.2.6.1.1 Runtime Semantics: Encode ( string, unescapedSet )
|
|
// https://tc39.github.io/ecma262/#sec-encode
|
|
|
|
auto throwException = [] {
|
|
return WebCore::ExceptionOr<void>(WebCore::Exception { WebCore::EncodingError, "String contained an illegal UTF-16 sequence."_s });
|
|
};
|
|
|
|
builder.reserveCapacity(characters.size());
|
|
|
|
// 4. Repeat
|
|
auto* end = characters.data() + characters.size();
|
|
for (auto* cursor = characters.data(); cursor != end; ++cursor) {
|
|
auto character = *cursor;
|
|
|
|
// 4-c. If C is in unescapedSet, then
|
|
if (character < doNotEscape.size() && doNotEscape.get(character)) {
|
|
// 4-c-i. Let S be a String containing only the code unit C.
|
|
// 4-c-ii. Let R be a new String value computed by concatenating the previous value of R and S.
|
|
builder.append(static_cast<Latin1Character>(character));
|
|
continue;
|
|
}
|
|
|
|
// 4-d-i. If the code unit value of C is not less than 0xDC00 and not greater than 0xDFFF, throw a URIError exception.
|
|
if (U16_IS_TRAIL(character))
|
|
return throwException();
|
|
|
|
// 4-d-ii. If the code unit value of C is less than 0xD800 or greater than 0xDBFF, then
|
|
// 4-d-ii-1. Let V be the code unit value of C.
|
|
char32_t codePoint;
|
|
if (!U16_IS_LEAD(character))
|
|
codePoint = static_cast<char32_t>(character);
|
|
else {
|
|
// 4-d-iii. Else,
|
|
// 4-d-iii-1. Increase k by 1.
|
|
++cursor;
|
|
|
|
// 4-d-iii-2. If k equals strLen, throw a URIError exception.
|
|
if (cursor == end)
|
|
return throwException();
|
|
|
|
// 4-d-iii-3. Let kChar be the code unit value of the code unit at index k within string.
|
|
auto trail = *cursor;
|
|
|
|
// 4-d-iii-4. If kChar is less than 0xDC00 or greater than 0xDFFF, throw a URIError exception.
|
|
if (!U16_IS_TRAIL(trail))
|
|
return throwException();
|
|
|
|
// 4-d-iii-5. Let V be UTF16Decode(C, kChar).
|
|
codePoint = U16_GET_SUPPLEMENTARY(character, trail);
|
|
}
|
|
|
|
// 4-d-iv. Let Octets be the array of octets resulting by applying the UTF-8 transformation to V, and let L be the array size.
|
|
Latin1Character utf8OctetsBuffer[U8_MAX_LENGTH];
|
|
unsigned utf8Length = 0;
|
|
// We can use U8_APPEND_UNSAFE here since codePoint is either
|
|
// 1. non surrogate one, correct code point.
|
|
// 2. correct code point generated from validated lead and trail surrogates.
|
|
U8_APPEND_UNSAFE(utf8OctetsBuffer, utf8Length, codePoint);
|
|
|
|
// 4-d-v. Let j be 0.
|
|
// 4-d-vi. Repeat, while j < L
|
|
for (unsigned index = 0; index < utf8Length; ++index) {
|
|
// 4-d-vi-1. Let jOctet be the value at index j within Octets.
|
|
// 4-d-vi-2. Let S be a String containing three code units "%XY" where XY are two uppercase hexadecimal digits encoding the value of jOctet.
|
|
// 4-d-vi-3. Let R be a new String value computed by concatenating the previous value of R and S.
|
|
builder.append('%');
|
|
builder.append(hex(utf8OctetsBuffer[index], 2));
|
|
}
|
|
}
|
|
return {};
|
|
}
|
|
|
|
static WebCore::ExceptionOr<void> encode(VM& vm, WTF::StringView view, const WTF::BitSet<256>& doNotEscape, StringBuilder& builder)
|
|
{
|
|
if (view.is8Bit())
|
|
return encode(vm, doNotEscape, view.span8(), builder);
|
|
return encode(vm, doNotEscape, view.span16(), builder);
|
|
}
|
|
|
|
WebCore::ExceptionOr<void> encodeURIComponent(VM& vm, WTF::StringView source, StringBuilder& builder)
|
|
{
|
|
static constexpr auto doNotEscapeWhenEncodingURIComponent = makeLatin1CharacterBitSet(
|
|
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
|
"abcdefghijklmnopqrstuvwxyz"
|
|
"0123456789"
|
|
"!'()*-._~");
|
|
return encode(vm, source, doNotEscapeWhenEncodingURIComponent, builder);
|
|
}
|
|
|
|
}
|