mirror of
https://github.com/oven-sh/bun
synced 2026-02-02 15:08:46 +00:00
perf(buffer): optimize swap16/swap64 with __builtin_bswap (#26190)
## Summary Optimize `Buffer.swap16()` and `Buffer.swap64()` by replacing byte-by-byte swapping loops with `__builtin_bswap16/64` compiler intrinsics. ## Problem `Buffer.swap16` and `Buffer.swap64` were significantly slower than Node.js due to inefficient byte-level operations: - **swap16**: Swapped bytes one at a time in a loop - **swap64**: Used a nested loop with 4 byte swaps per 8-byte element ## Solution Replace the manual byte swapping with `__builtin_bswap16/64` intrinsics, which compile to single CPU instructions (`BSWAP` on x86, `REV` on ARM). Use `memcpy` for loading/storing values to handle potentially unaligned buffers safely. ## Benchmark Results (64KB buffer, Apple M4 Max) | Operation | Bun 1.3.6 | Node.js 24 | This PR | Improvement | |-----------|-----------|------------|---------|-------------| | swap16 | 1.00 µs | 0.57 µs | 0.56 µs | **1.79x faster** | | swap32 | 0.55 µs | 0.77 µs | 0.54 µs | (no change, already fast) | | swap64 | 2.02 µs | 0.58 µs | 0.56 µs | **3.6x faster** | Bun now matches or exceeds Node.js performance for all swap operations. ## Notes - `swap32` was not modified as the compiler already optimizes the 4-byte swap pattern - All existing tests pass
This commit is contained in:
@@ -1649,8 +1649,8 @@ static JSC::EncodedJSValue jsBufferPrototypeFunction_swap16Body(JSC::JSGlobalObj
|
||||
auto& vm = JSC::getVM(lexicalGlobalObject);
|
||||
auto scope = DECLARE_THROW_SCOPE(vm);
|
||||
|
||||
constexpr int elemSize = 2;
|
||||
int64_t length = static_cast<int64_t>(castedThis->byteLength());
|
||||
constexpr size_t elemSize = 2;
|
||||
size_t length = castedThis->byteLength();
|
||||
if (length % elemSize != 0) {
|
||||
throwNodeRangeError(lexicalGlobalObject, scope, "Buffer size must be a multiple of 16-bits"_s);
|
||||
return {};
|
||||
@@ -1661,14 +1661,14 @@ static JSC::EncodedJSValue jsBufferPrototypeFunction_swap16Body(JSC::JSGlobalObj
|
||||
return {};
|
||||
}
|
||||
|
||||
uint8_t* typedVector = castedThis->typedVector();
|
||||
uint8_t* data = castedThis->typedVector();
|
||||
size_t count = length / elemSize;
|
||||
|
||||
for (size_t elem = 0; elem < length; elem += elemSize) {
|
||||
const size_t right = elem + 1;
|
||||
|
||||
uint8_t temp = typedVector[elem];
|
||||
typedVector[elem] = typedVector[right];
|
||||
typedVector[right] = temp;
|
||||
for (size_t i = 0; i < count; i++) {
|
||||
uint16_t val;
|
||||
memcpy(&val, data + i * elemSize, sizeof(val));
|
||||
val = __builtin_bswap16(val);
|
||||
memcpy(data + i * elemSize, &val, sizeof(val));
|
||||
}
|
||||
|
||||
return JSC::JSValue::encode(castedThis);
|
||||
@@ -1715,7 +1715,7 @@ static JSC::EncodedJSValue jsBufferPrototypeFunction_swap64Body(JSC::JSGlobalObj
|
||||
auto scope = DECLARE_THROW_SCOPE(vm);
|
||||
|
||||
constexpr size_t elemSize = 8;
|
||||
int64_t length = static_cast<int64_t>(castedThis->byteLength());
|
||||
size_t length = castedThis->byteLength();
|
||||
if (length % elemSize != 0) {
|
||||
throwNodeRangeError(lexicalGlobalObject, scope, "Buffer size must be a multiple of 64-bits"_s);
|
||||
return {};
|
||||
@@ -1726,19 +1726,14 @@ static JSC::EncodedJSValue jsBufferPrototypeFunction_swap64Body(JSC::JSGlobalObj
|
||||
return {};
|
||||
}
|
||||
|
||||
uint8_t* typedVector = castedThis->typedVector();
|
||||
uint8_t* data = castedThis->typedVector();
|
||||
size_t count = length / elemSize;
|
||||
|
||||
constexpr size_t swaps = elemSize / 2;
|
||||
for (size_t elem = 0; elem < length; elem += elemSize) {
|
||||
const size_t right = elem + elemSize - 1;
|
||||
for (size_t k = 0; k < swaps; k++) {
|
||||
const size_t i = right - k;
|
||||
const size_t j = elem + k;
|
||||
|
||||
uint8_t temp = typedVector[i];
|
||||
typedVector[i] = typedVector[j];
|
||||
typedVector[j] = temp;
|
||||
}
|
||||
for (size_t i = 0; i < count; i++) {
|
||||
uint64_t val;
|
||||
memcpy(&val, data + i * elemSize, sizeof(val));
|
||||
val = __builtin_bswap64(val);
|
||||
memcpy(data + i * elemSize, &val, sizeof(val));
|
||||
}
|
||||
|
||||
return JSC::JSValue::encode(castedThis);
|
||||
|
||||
Reference in New Issue
Block a user