feat: add Bun.indexOfFirstDifference() for SIMD-accelerated TypedArray comparison

Adds a new `Bun.indexOfFirstDifference(a, b)` function that returns the index
of the first element where `a[i] !== b[i]`, comparing up to `min(a.length, b.length)`
elements. Returns the minimum length if the compared prefix is identical.

- Uses Google Highway SIMD library for fast byte-level comparison
- Converts byte offsets back to element indices for multi-byte TypedArrays
- Supports all integer TypedArray types (Uint8, Int8, Uint8Clamped, Uint16,
  Int16, Uint32, Int32, BigInt64, BigUint64)
- Rejects float arrays (Float16/32/64) since NaN !== NaN and NaN has
  multiple bit representations
- Validates same-type requirement and handles detached buffers
This commit is contained in:
Jarred Sumner
2026-01-26 20:19:09 -08:00
parent ba426210c2
commit 8ed853c8d1
4 changed files with 452 additions and 0 deletions

View File

@@ -7092,6 +7092,17 @@ declare module "bun" {
*/
function indexOfLine(buffer: ArrayBufferView | ArrayBufferLike, offset?: number): number;
/**
* Returns the index of the first element where `a[i] !== b[i]`,
* comparing up to `min(a.length, b.length)` elements.
* Returns that minimum length if the compared prefix is identical.
* Uses SIMD acceleration for fast comparison.
*
* Both arrays must be the same TypedArray type, otherwise a TypeError is thrown.
* Float typed arrays (Float16Array, Float32Array, Float64Array) are not supported.
*/
function indexOfFirstDifference(a: NodeJS.TypedArray, b: NodeJS.TypedArray): number;
interface GlobScanOptions {
/**
* The root directory to start matching from. Defaults to `process.cwd()`

View File

@@ -755,6 +755,75 @@ JSC_DEFINE_HOST_FUNCTION(functionBunDeepMatch, (JSGlobalObject * globalObject, J
return JSValue::encode(jsBoolean(match));
}
extern "C" size_t highway_index_of_first_difference(const uint8_t*, const uint8_t*, size_t);
JSC_DECLARE_HOST_FUNCTION(functionBunIndexOfFirstDifference);
JSC_DEFINE_HOST_FUNCTION(functionBunIndexOfFirstDifference, (JSGlobalObject * globalObject, JSC::CallFrame* callFrame))
{
auto& vm = JSC::getVM(globalObject);
auto throwScope = DECLARE_THROW_SCOPE(vm);
if (callFrame->argumentCount() < 2) {
throwTypeError(globalObject, throwScope, "indexOfFirstDifference requires 2 arguments"_s);
return {};
}
auto arg0 = callFrame->uncheckedArgument(0);
auto arg1 = callFrame->uncheckedArgument(1);
if (!arg0.isCell() || !isTypedArrayType(arg0.asCell()->type())) {
throwTypeError(globalObject, throwScope, "First argument must be a TypedArray"_s);
return {};
}
auto arrayType = arg0.asCell()->type();
if (!arg1.isCell()) {
throwTypeError(globalObject, throwScope, "Second argument must be a TypedArray"_s);
return {};
}
auto arrayType1 = arg1.asCell()->type();
if (!isTypedArrayType(arrayType1)) {
throwTypeError(globalObject, throwScope, "Second argument must be a TypedArray"_s);
return {};
}
// Must be same typed array type
if (arrayType != arrayType1) {
throwTypeError(globalObject, throwScope, "Both arguments must be the same TypedArray type"_s);
return {};
}
// Reject float arrays — NaN has multiple bit patterns and NaN !== NaN,
// so byte-level comparison would give wrong results
if (arrayType == JSC::Float32ArrayType || arrayType == JSC::Float64ArrayType || arrayType == JSC::Float16ArrayType) {
throwTypeError(globalObject, throwScope, "Float typed arrays are not supported"_s);
return {};
}
auto* viewA = jsCast<JSArrayBufferView*>(arg0.asCell());
auto* viewB = jsCast<JSArrayBufferView*>(arg1.asCell());
if (viewA->isDetached() || viewB->isDetached()) {
throwTypeError(globalObject, throwScope, "Cannot compare detached ArrayBuffers"_s);
return {};
}
auto spanA = viewA->span();
auto spanB = viewB->span();
size_t minByteLen = std::min(spanA.size(), spanB.size());
size_t firstDiffByte = highway_index_of_first_difference(spanA.data(), spanB.data(), minByteLen);
// Convert byte offset to element index
unsigned elemSize = JSC::elementSize(JSC::typedArrayType(arrayType));
size_t elementIndex = firstDiffByte / elemSize;
return JSValue::encode(jsNumber(static_cast<double>(elementIndex)));
}
JSC_DEFINE_HOST_FUNCTION(functionBunNanoseconds, (JSGlobalObject * globalObject, JSC::CallFrame* callFrame))
{
uint64_t time = Bun__readOriginTimer(bunVM(globalObject));
@@ -950,6 +1019,7 @@ JSC_DEFINE_HOST_FUNCTION(functionFileURLToPath, (JSC::JSGlobalObject * globalObj
gunzipSync BunObject_callback_gunzipSync DontDelete|Function 1
gzipSync BunObject_callback_gzipSync DontDelete|Function 1
hash BunObject_lazyPropCb_wrap_hash DontDelete|PropertyCallback
indexOfFirstDifference functionBunIndexOfFirstDifference DontDelete|Function 2
indexOfLine BunObject_callback_indexOfLine DontDelete|Function 1
inflateSync BunObject_callback_inflateSync DontDelete|Function 1
inspect BunObject_lazyPropCb_wrap_inspect DontDelete|PropertyCallback

View File

@@ -632,6 +632,32 @@ void FillWithSkipMaskImpl(const uint8_t* HWY_RESTRICT mask, size_t mask_len, uin
}
}
size_t IndexOfFirstDifferenceImpl(
const uint8_t* HWY_RESTRICT a,
const uint8_t* HWY_RESTRICT b,
size_t byte_len)
{
if (byte_len == 0) return 0;
D8 d;
const size_t N = hn::Lanes(d);
const size_t simd_len = byte_len - (byte_len % N);
size_t i = 0;
for (; i < simd_len; i += N) {
const auto va = hn::LoadU(d, a + i);
const auto vb = hn::LoadU(d, b + i);
const auto ne = hn::Ne(va, vb);
const intptr_t pos = hn::FindFirstTrue(d, ne);
if (pos >= 0) return i + pos;
}
// Scalar tail
for (; i < byte_len; ++i) {
if (a[i] != b[i]) return i;
}
return byte_len;
}
} // namespace HWY_NAMESPACE
} // namespace bun
HWY_AFTER_NAMESPACE();
@@ -655,6 +681,7 @@ HWY_EXPORT(IndexOfNeedsEscapeForJavaScriptStringImplQuote);
HWY_EXPORT(IndexOfNewlineOrNonASCIIImpl);
HWY_EXPORT(IndexOfNewlineOrNonASCIIOrHashOrAtImpl);
HWY_EXPORT(IndexOfSpaceOrNewlineOrNonASCIIImpl);
HWY_EXPORT(IndexOfFirstDifferenceImpl);
HWY_EXPORT(MemMemImpl);
HWY_EXPORT(ScanCharFrequencyImpl);
// Define the C-callable wrappers that use HWY_DYNAMIC_DISPATCH.
@@ -767,6 +794,11 @@ void highway_fill_with_skip_mask(
HWY_DYNAMIC_DISPATCH(FillWithSkipMaskImpl)(mask, mask_len, output, input, length, skip_mask);
}
size_t highway_index_of_first_difference(const uint8_t* a, const uint8_t* b, size_t byte_len)
{
return HWY_DYNAMIC_DISPATCH(IndexOfFirstDifferenceImpl)(a, b, byte_len);
}
} // extern "C"
} // namespace bun

View File

@@ -0,0 +1,339 @@
import { describe, expect, test } from "bun:test";
describe("Bun.indexOfFirstDifference", () => {
test("identical arrays return length", () => {
const a = new Uint8Array([1, 2, 3, 4, 5]);
const b = new Uint8Array([1, 2, 3, 4, 5]);
expect(Bun.indexOfFirstDifference(a, b)).toBe(5);
});
test("difference at start returns 0", () => {
const a = new Uint8Array([1, 2, 3]);
const b = new Uint8Array([9, 2, 3]);
expect(Bun.indexOfFirstDifference(a, b)).toBe(0);
});
test("difference at end returns length - 1", () => {
const a = new Uint8Array([1, 2, 3]);
const b = new Uint8Array([1, 2, 9]);
expect(Bun.indexOfFirstDifference(a, b)).toBe(2);
});
test("difference in middle returns correct index", () => {
const a = new Uint8Array([1, 2, 3, 4, 5]);
const b = new Uint8Array([1, 2, 99, 4, 5]);
expect(Bun.indexOfFirstDifference(a, b)).toBe(2);
});
test("empty arrays return 0", () => {
const a = new Uint8Array([]);
const b = new Uint8Array([]);
expect(Bun.indexOfFirstDifference(a, b)).toBe(0);
});
describe("multiple TypedArray types", () => {
test("Uint8Array", () => {
const a = new Uint8Array([10, 20, 30]);
const b = new Uint8Array([10, 20, 99]);
expect(Bun.indexOfFirstDifference(a, b)).toBe(2);
});
test("Uint8ClampedArray", () => {
const a = new Uint8ClampedArray([10, 20, 30]);
const b = new Uint8ClampedArray([10, 20, 99]);
expect(Bun.indexOfFirstDifference(a, b)).toBe(2);
});
test("Int8Array", () => {
const a = new Int8Array([10, -20, 30]);
const b = new Int8Array([10, -20, 99]);
expect(Bun.indexOfFirstDifference(a, b)).toBe(2);
});
test("Uint16Array", () => {
const a = new Uint16Array([1000, 2000, 3000]);
const b = new Uint16Array([1000, 2000, 9999]);
expect(Bun.indexOfFirstDifference(a, b)).toBe(2);
});
test("Int16Array", () => {
const a = new Int16Array([1000, -2000, 3000]);
const b = new Int16Array([1000, -2000, 9999]);
expect(Bun.indexOfFirstDifference(a, b)).toBe(2);
});
test("Int32Array", () => {
const a = new Int32Array([100000, 200000, 300000]);
const b = new Int32Array([100000, 200000, 999999]);
expect(Bun.indexOfFirstDifference(a, b)).toBe(2);
});
test("Uint32Array", () => {
const a = new Uint32Array([100000, 200000, 300000]);
const b = new Uint32Array([100000, 200000, 999999]);
expect(Bun.indexOfFirstDifference(a, b)).toBe(2);
});
test("BigInt64Array", () => {
const a = new BigInt64Array([1n, 2n, 3n]);
const b = new BigInt64Array([1n, 2n, 99n]);
expect(Bun.indexOfFirstDifference(a, b)).toBe(2);
});
test("BigUint64Array", () => {
const a = new BigUint64Array([1n, 2n, 3n]);
const b = new BigUint64Array([1n, 2n, 99n]);
expect(Bun.indexOfFirstDifference(a, b)).toBe(2);
});
});
describe("float arrays throw TypeError", () => {
test("Float32Array", () => {
const a = new Float32Array([1.0, 2.0]);
const b = new Float32Array([1.0, 2.0]);
expect(() => Bun.indexOfFirstDifference(a, b)).toThrow(TypeError);
});
test("Float64Array", () => {
const a = new Float64Array([1.0, 2.0]);
const b = new Float64Array([1.0, 2.0]);
expect(() => Bun.indexOfFirstDifference(a, b)).toThrow(TypeError);
});
});
test("mismatched types throw TypeError", () => {
const a = new Uint8Array([1, 2, 3]);
const b = new Uint16Array([1, 2, 3]);
expect(() => Bun.indexOfFirstDifference(a as any, b as any)).toThrow(TypeError);
});
test("different lengths compares up to min length - identical prefix", () => {
const a = new Uint8Array([1, 2, 3, 4, 5]);
const b = new Uint8Array([1, 2, 3]);
expect(Bun.indexOfFirstDifference(a, b)).toBe(3);
});
test("different lengths compares up to min length - difference found", () => {
const a = new Uint8Array([1, 2, 3, 4, 5]);
const b = new Uint8Array([1, 99, 3]);
expect(Bun.indexOfFirstDifference(a, b)).toBe(1);
});
test("large arrays (10000 elements) correctness", () => {
const size = 10000;
const a = new Uint8Array(size);
const b = new Uint8Array(size);
for (let i = 0; i < size; i++) {
a[i] = i & 0xff;
b[i] = i & 0xff;
}
// Identical
expect(Bun.indexOfFirstDifference(a, b)).toBe(size);
// Difference near end
b[size - 1] = (b[size - 1]! + 1) & 0xff;
expect(Bun.indexOfFirstDifference(a, b)).toBe(size - 1);
// Restore and differ at start
b[size - 1] = a[size - 1]!;
b[0] = (b[0]! + 1) & 0xff;
expect(Bun.indexOfFirstDifference(a, b)).toBe(0);
});
test("large Int32Array correctness", () => {
const size = 5000;
const a = new Int32Array(size);
const b = new Int32Array(size);
for (let i = 0; i < size; i++) {
a[i] = i * 7;
b[i] = i * 7;
}
expect(Bun.indexOfFirstDifference(a, b)).toBe(size);
b[4999] = -1;
expect(Bun.indexOfFirstDifference(a, b)).toBe(4999);
});
// Exhaustive SIMD boundary tests: test every length from 0..256
// and every diff position to catch SIMD lane/tail edge cases
describe("exhaustive SIMD boundary tests", () => {
test("Uint8Array: every length 0..256, identical", () => {
for (let len = 0; len <= 256; len++) {
const a = new Uint8Array(len);
const b = new Uint8Array(len);
for (let i = 0; i < len; i++) {
a[i] = (i + 1) & 0xff;
b[i] = (i + 1) & 0xff;
}
expect(Bun.indexOfFirstDifference(a, b)).toBe(len);
}
});
test("Uint8Array: every length 0..256, diff at every position", () => {
for (let len = 1; len <= 256; len++) {
for (let diffAt = 0; diffAt < len; diffAt++) {
const a = new Uint8Array(len);
const b = new Uint8Array(len);
for (let i = 0; i < len; i++) {
a[i] = (i + 1) & 0xff;
b[i] = (i + 1) & 0xff;
}
b[diffAt] = (b[diffAt]! ^ 0x80) & 0xff;
expect(Bun.indexOfFirstDifference(a, b)).toBe(diffAt);
}
}
});
test("Uint16Array: every length 0..256, identical", () => {
for (let len = 0; len <= 256; len++) {
const a = new Uint16Array(len);
const b = new Uint16Array(len);
for (let i = 0; i < len; i++) {
a[i] = (i + 1) * 257;
b[i] = (i + 1) * 257;
}
expect(Bun.indexOfFirstDifference(a, b)).toBe(len);
}
});
test("Uint16Array: every length 1..128, diff at every position", () => {
for (let len = 1; len <= 128; len++) {
for (let diffAt = 0; diffAt < len; diffAt++) {
const a = new Uint16Array(len);
const b = new Uint16Array(len);
for (let i = 0; i < len; i++) {
a[i] = (i + 1) * 257;
b[i] = (i + 1) * 257;
}
b[diffAt] = b[diffAt]! ^ 0x8000;
expect(Bun.indexOfFirstDifference(a, b)).toBe(diffAt);
}
}
});
test("Int32Array: every length 0..256, identical", () => {
for (let len = 0; len <= 256; len++) {
const a = new Int32Array(len);
const b = new Int32Array(len);
for (let i = 0; i < len; i++) {
a[i] = (i + 1) * 100003;
b[i] = (i + 1) * 100003;
}
expect(Bun.indexOfFirstDifference(a, b)).toBe(len);
}
});
test("Int32Array: every length 1..128, diff at every position", () => {
for (let len = 1; len <= 128; len++) {
for (let diffAt = 0; diffAt < len; diffAt++) {
const a = new Int32Array(len);
const b = new Int32Array(len);
for (let i = 0; i < len; i++) {
a[i] = (i + 1) * 100003;
b[i] = (i + 1) * 100003;
}
b[diffAt] = ~b[diffAt]!;
expect(Bun.indexOfFirstDifference(a, b)).toBe(diffAt);
}
}
});
});
test("subarray views work correctly", () => {
const buf = new Uint8Array([0, 0, 1, 2, 3, 4, 5, 0, 0]);
const a = buf.subarray(2, 7); // [1, 2, 3, 4, 5]
const b = new Uint8Array([1, 2, 99, 4, 5]);
expect(Bun.indexOfFirstDifference(a, b)).toBe(2);
});
test("subarray views identical", () => {
const buf = new Uint8Array([0, 0, 1, 2, 3, 0, 0]);
const a = buf.subarray(2, 5); // [1, 2, 3]
const b = new Uint8Array([1, 2, 3]);
expect(Bun.indexOfFirstDifference(a, b)).toBe(3);
});
test("detached buffers throw TypeError", () => {
const buf = new ArrayBuffer(8);
const a = new Uint8Array(buf);
const b = new Uint8Array([1, 2, 3, 4, 5, 6, 7, 8]);
// Detach the buffer by transferring it
const transferred = buf.transfer();
void transferred;
expect(() => Bun.indexOfFirstDifference(a, b)).toThrow(TypeError);
});
test("too few arguments throws TypeError", () => {
expect(() => (Bun.indexOfFirstDifference as any)()).toThrow(TypeError);
expect(() => (Bun.indexOfFirstDifference as any)(new Uint8Array([1]))).toThrow(TypeError);
});
test("non-TypedArray arguments throw TypeError", () => {
expect(() => (Bun.indexOfFirstDifference as any)("hello", "world")).toThrow(TypeError);
expect(() => (Bun.indexOfFirstDifference as any)(123, 456)).toThrow(TypeError);
expect(() => (Bun.indexOfFirstDifference as any)(new Uint8Array([1]), "world")).toThrow(TypeError);
});
test("Uint16Array element-level index", () => {
// Each element is 2 bytes. Difference at element index 1.
const a = new Uint16Array([0xaaaa, 0xbbbb, 0xcccc]);
const b = new Uint16Array([0xaaaa, 0xffff, 0xcccc]);
expect(Bun.indexOfFirstDifference(a, b)).toBe(1);
});
test("Int32Array element-level index", () => {
// Each element is 4 bytes. Difference at element index 2.
const a = new Int32Array([1, 2, 3, 4]);
const b = new Int32Array([1, 2, 99, 4]);
expect(Bun.indexOfFirstDifference(a, b)).toBe(2);
});
test("Uint32Array difference only in 3rd byte of element", () => {
// On little-endian, 0x00000000 vs 0x00FF0000 differ at byte offset 2 within the element.
// The function should still return element index 0 (not byte index 2).
const a = new Uint32Array([0x00000000, 0x12345678]);
const b = new Uint32Array([0x00ff0000, 0x12345678]);
expect(Bun.indexOfFirstDifference(a, b)).toBe(0);
// Same but difference in element 1's 3rd byte
const c = new Uint32Array([0x12345678, 0x00000000, 0xaabbccdd]);
const d = new Uint32Array([0x12345678, 0x00ff0000, 0xaabbccdd]);
expect(Bun.indexOfFirstDifference(c, d)).toBe(1);
});
test("Uint32Array difference only in last byte of element", () => {
// 0x00000000 vs 0xFF000000 — differ only in the 4th byte (byte offset 3)
const a = new Uint32Array([0x11111111, 0x00000000]);
const b = new Uint32Array([0x11111111, 0xff000000]);
expect(Bun.indexOfFirstDifference(a, b)).toBe(1);
});
test("Uint32Array interior byte diff at every element and byte position", () => {
// For each element position and each byte within the element,
// ensure the correct element index is returned.
for (let numElems = 1; numElems <= 64; numElems++) {
for (let elemIdx = 0; elemIdx < numElems; elemIdx++) {
for (let byteWithin = 0; byteWithin < 4; byteWithin++) {
const a = new Uint32Array(numElems);
const b = new Uint32Array(numElems);
for (let i = 0; i < numElems; i++) {
a[i] = 0x01010101;
b[i] = 0x01010101;
}
// Flip just one byte within the element
b[elemIdx] = b[elemIdx]! ^ (0xff << (byteWithin * 8));
expect(Bun.indexOfFirstDifference(a, b)).toBe(elemIdx);
}
}
}
});
test("BigUint64Array element-level index", () => {
// Each element is 8 bytes. Difference at element index 1.
const a = new BigUint64Array([0xffffffffffffffffn, 0x1234567890abcdefn, 0n]);
const b = new BigUint64Array([0xffffffffffffffffn, 0n, 0n]);
expect(Bun.indexOfFirstDifference(a, b)).toBe(1);
});
});