Files
bun.sh/src/bun.js/bindings/EncodeURIComponent.cpp
Dylan Conway 41de7a3bfb [publish images] Upgrade LLVM toolchain from 19.1.7 to 21.1.8 (#26667)
## Summary
- Update LLVM version references across build scripts, Dockerfiles, CI,
Nix configs, and documentation
- Fix LLVM 21 `-Wcharacter-conversion` errors in WebKit bindings:
- `EncodingTables.h`: pragma for intentional char32_t/char16_t
comparisons
- `TextCodecCJK.cpp`: widen `gb18030AsymmetricEncode` param to char32_t
- `URLPatternParser`: widen `isValidNameCodepoint` param to char32_t,
cast for `startsWith`
- Fix `__libcpp_verbose_abort` noexcept mismatch (LLVM 21 uses
`_NOEXCEPT`)
- Fix dangling pointer in `BunJSCModule.h` (`toCString` temporary
lifetime)
- Remove `useMathSumPreciseMethod` (removed upstream in JSC)

**Before merging:** Merge https://github.com/oven-sh/WebKit/pull/153
first, then update `WEBKIT_VERSION` in `cmake/tools/SetupWebKit.cmake`
to point to the merged commit.

## Test plan
- [ ] Build bun debug on macOS with LLVM 21
- [ ] Build bun on Linux (glibc)
- [ ] Build bun on Linux (musl)
- [ ] Build bun on Windows
- [ ] Run test suite

Generated with [Claude Code](https://claude.com/claude-code)

---------

Co-authored-by: Claude <noreply@anthropic.com>
Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
2026-02-02 23:12:21 -08:00

102 lines
4.1 KiB
C++

#include "EncodeURIComponent.h"
// from JSGlobalObjectFunctions.cpp
namespace JSC {
template<typename CharacterType>
static WebCore::ExceptionOr<void> encode(VM& vm, const WTF::BitSet<256>& doNotEscape, std::span<const CharacterType> characters, StringBuilder& builder)
{
auto scope = DECLARE_THROW_SCOPE(vm);
// 18.2.6.1.1 Runtime Semantics: Encode ( string, unescapedSet )
// https://tc39.github.io/ecma262/#sec-encode
auto throwException = [] {
return WebCore::ExceptionOr<void>(WebCore::Exception { WebCore::EncodingError, "String contained an illegal UTF-16 sequence."_s });
};
builder.reserveCapacity(characters.size());
// 4. Repeat
auto* end = characters.data() + characters.size();
for (auto* cursor = characters.data(); cursor != end; ++cursor) {
auto character = *cursor;
// 4-c. If C is in unescapedSet, then
if (character < doNotEscape.size() && doNotEscape.get(character)) {
// 4-c-i. Let S be a String containing only the code unit C.
// 4-c-ii. Let R be a new String value computed by concatenating the previous value of R and S.
builder.append(static_cast<Latin1Character>(character));
continue;
}
// 4-d-i. If the code unit value of C is not less than 0xDC00 and not greater than 0xDFFF, throw a URIError exception.
if (U16_IS_TRAIL(character))
return throwException();
// 4-d-ii. If the code unit value of C is less than 0xD800 or greater than 0xDBFF, then
// 4-d-ii-1. Let V be the code unit value of C.
char32_t codePoint;
if (!U16_IS_LEAD(character))
codePoint = static_cast<char32_t>(character);
else {
// 4-d-iii. Else,
// 4-d-iii-1. Increase k by 1.
++cursor;
// 4-d-iii-2. If k equals strLen, throw a URIError exception.
if (cursor == end)
return throwException();
// 4-d-iii-3. Let kChar be the code unit value of the code unit at index k within string.
auto trail = *cursor;
// 4-d-iii-4. If kChar is less than 0xDC00 or greater than 0xDFFF, throw a URIError exception.
if (!U16_IS_TRAIL(trail))
return throwException();
// 4-d-iii-5. Let V be UTF16Decode(C, kChar).
codePoint = U16_GET_SUPPLEMENTARY(character, trail);
}
// 4-d-iv. Let Octets be the array of octets resulting by applying the UTF-8 transformation to V, and let L be the array size.
Latin1Character utf8OctetsBuffer[U8_MAX_LENGTH];
unsigned utf8Length = 0;
// We can use U8_APPEND_UNSAFE here since codePoint is either
// 1. non surrogate one, correct code point.
// 2. correct code point generated from validated lead and trail surrogates.
U8_APPEND_UNSAFE(utf8OctetsBuffer, utf8Length, codePoint);
// 4-d-v. Let j be 0.
// 4-d-vi. Repeat, while j < L
for (unsigned index = 0; index < utf8Length; ++index) {
// 4-d-vi-1. Let jOctet be the value at index j within Octets.
// 4-d-vi-2. Let S be a String containing three code units "%XY" where XY are two uppercase hexadecimal digits encoding the value of jOctet.
// 4-d-vi-3. Let R be a new String value computed by concatenating the previous value of R and S.
builder.append('%');
builder.append(hex(utf8OctetsBuffer[index], 2));
}
}
return {};
}
static WebCore::ExceptionOr<void> encode(VM& vm, WTF::StringView view, const WTF::BitSet<256>& doNotEscape, StringBuilder& builder)
{
if (view.is8Bit())
return encode(vm, doNotEscape, view.span8(), builder);
return encode(vm, doNotEscape, view.span16(), builder);
}
WebCore::ExceptionOr<void> encodeURIComponent(VM& vm, WTF::StringView source, StringBuilder& builder)
{
static constexpr auto doNotEscapeWhenEncodingURIComponent = makeLatin1CharacterBitSet(
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
"abcdefghijklmnopqrstuvwxyz"
"0123456789"
"!'()*-._~");
return encode(vm, source, doNotEscapeWhenEncodingURIComponent, builder);
}
}