From 8ed853c8d1f57e1dc7b933c0ec07b5f3d9b2e44f Mon Sep 17 00:00:00 2001 From: Jarred Sumner Date: Mon, 26 Jan 2026 20:19:09 -0800 Subject: [PATCH] feat: add `Bun.indexOfFirstDifference()` for SIMD-accelerated TypedArray comparison Adds a new `Bun.indexOfFirstDifference(a, b)` function that returns the index of the first element where `a[i] !== b[i]`, comparing up to `min(a.length, b.length)` elements. Returns the minimum length if the compared prefix is identical. - Uses Google Highway SIMD library for fast byte-level comparison - Converts byte offsets back to element indices for multi-byte TypedArrays - Supports all integer TypedArray types (Uint8, Int8, Uint8Clamped, Uint16, Int16, Uint32, Int32, BigInt64, BigUint64) - Rejects float arrays (Float16/32/64) since NaN !== NaN and NaN has multiple bit representations - Validates same-type requirement and handles detached buffers --- packages/bun-types/bun.d.ts | 11 + src/bun.js/bindings/BunObject.cpp | 70 ++++ src/bun.js/bindings/highway_strings.cpp | 32 ++ .../bun/util/indexOfFirstDifference.test.ts | 339 ++++++++++++++++++ 4 files changed, 452 insertions(+) create mode 100644 test/js/bun/util/indexOfFirstDifference.test.ts diff --git a/packages/bun-types/bun.d.ts b/packages/bun-types/bun.d.ts index 6098ea4db1..a40ce8663c 100644 --- a/packages/bun-types/bun.d.ts +++ b/packages/bun-types/bun.d.ts @@ -7092,6 +7092,17 @@ declare module "bun" { */ function indexOfLine(buffer: ArrayBufferView | ArrayBufferLike, offset?: number): number; + /** + * Returns the index of the first element where `a[i] !== b[i]`, + * comparing up to `min(a.length, b.length)` elements. + * Returns that minimum length if the compared prefix is identical. + * Uses SIMD acceleration for fast comparison. + * + * Both arrays must be the same TypedArray type, otherwise a TypeError is thrown. + * Float typed arrays (Float16Array, Float32Array, Float64Array) are not supported. + */ + function indexOfFirstDifference(a: NodeJS.TypedArray, b: NodeJS.TypedArray): number; + interface GlobScanOptions { /** * The root directory to start matching from. Defaults to `process.cwd()` diff --git a/src/bun.js/bindings/BunObject.cpp b/src/bun.js/bindings/BunObject.cpp index 60c91fa0f0..a6f90d80da 100644 --- a/src/bun.js/bindings/BunObject.cpp +++ b/src/bun.js/bindings/BunObject.cpp @@ -755,6 +755,75 @@ JSC_DEFINE_HOST_FUNCTION(functionBunDeepMatch, (JSGlobalObject * globalObject, J return JSValue::encode(jsBoolean(match)); } +extern "C" size_t highway_index_of_first_difference(const uint8_t*, const uint8_t*, size_t); + +JSC_DECLARE_HOST_FUNCTION(functionBunIndexOfFirstDifference); +JSC_DEFINE_HOST_FUNCTION(functionBunIndexOfFirstDifference, (JSGlobalObject * globalObject, JSC::CallFrame* callFrame)) +{ + auto& vm = JSC::getVM(globalObject); + auto throwScope = DECLARE_THROW_SCOPE(vm); + + if (callFrame->argumentCount() < 2) { + throwTypeError(globalObject, throwScope, "indexOfFirstDifference requires 2 arguments"_s); + return {}; + } + + auto arg0 = callFrame->uncheckedArgument(0); + auto arg1 = callFrame->uncheckedArgument(1); + + if (!arg0.isCell() || !isTypedArrayType(arg0.asCell()->type())) { + throwTypeError(globalObject, throwScope, "First argument must be a TypedArray"_s); + return {}; + } + + auto arrayType = arg0.asCell()->type(); + + if (!arg1.isCell()) { + throwTypeError(globalObject, throwScope, "Second argument must be a TypedArray"_s); + return {}; + } + + auto arrayType1 = arg1.asCell()->type(); + + if (!isTypedArrayType(arrayType1)) { + throwTypeError(globalObject, throwScope, "Second argument must be a TypedArray"_s); + return {}; + } + + // Must be same typed array type + if (arrayType != arrayType1) { + throwTypeError(globalObject, throwScope, "Both arguments must be the same TypedArray type"_s); + return {}; + } + + // Reject float arrays — NaN has multiple bit patterns and NaN !== NaN, + // so byte-level comparison would give wrong results + if (arrayType == JSC::Float32ArrayType || arrayType == JSC::Float64ArrayType || arrayType == JSC::Float16ArrayType) { + throwTypeError(globalObject, throwScope, "Float typed arrays are not supported"_s); + return {}; + } + + auto* viewA = jsCast(arg0.asCell()); + auto* viewB = jsCast(arg1.asCell()); + + if (viewA->isDetached() || viewB->isDetached()) { + throwTypeError(globalObject, throwScope, "Cannot compare detached ArrayBuffers"_s); + return {}; + } + + auto spanA = viewA->span(); + auto spanB = viewB->span(); + size_t minByteLen = std::min(spanA.size(), spanB.size()); + + size_t firstDiffByte = highway_index_of_first_difference(spanA.data(), spanB.data(), minByteLen); + + // Convert byte offset to element index + unsigned elemSize = JSC::elementSize(JSC::typedArrayType(arrayType)); + size_t elementIndex = firstDiffByte / elemSize; + + return JSValue::encode(jsNumber(static_cast(elementIndex))); +} + JSC_DEFINE_HOST_FUNCTION(functionBunNanoseconds, (JSGlobalObject * globalObject, JSC::CallFrame* callFrame)) { uint64_t time = Bun__readOriginTimer(bunVM(globalObject)); @@ -950,6 +1019,7 @@ JSC_DEFINE_HOST_FUNCTION(functionFileURLToPath, (JSC::JSGlobalObject * globalObj gunzipSync BunObject_callback_gunzipSync DontDelete|Function 1 gzipSync BunObject_callback_gzipSync DontDelete|Function 1 hash BunObject_lazyPropCb_wrap_hash DontDelete|PropertyCallback + indexOfFirstDifference functionBunIndexOfFirstDifference DontDelete|Function 2 indexOfLine BunObject_callback_indexOfLine DontDelete|Function 1 inflateSync BunObject_callback_inflateSync DontDelete|Function 1 inspect BunObject_lazyPropCb_wrap_inspect DontDelete|PropertyCallback diff --git a/src/bun.js/bindings/highway_strings.cpp b/src/bun.js/bindings/highway_strings.cpp index 4eb625a9bd..eeee075f99 100644 --- a/src/bun.js/bindings/highway_strings.cpp +++ b/src/bun.js/bindings/highway_strings.cpp @@ -632,6 +632,32 @@ void FillWithSkipMaskImpl(const uint8_t* HWY_RESTRICT mask, size_t mask_len, uin } } +size_t IndexOfFirstDifferenceImpl( + const uint8_t* HWY_RESTRICT a, + const uint8_t* HWY_RESTRICT b, + size_t byte_len) +{ + if (byte_len == 0) return 0; + D8 d; + const size_t N = hn::Lanes(d); + const size_t simd_len = byte_len - (byte_len % N); + size_t i = 0; + + for (; i < simd_len; i += N) { + const auto va = hn::LoadU(d, a + i); + const auto vb = hn::LoadU(d, b + i); + const auto ne = hn::Ne(va, vb); + const intptr_t pos = hn::FindFirstTrue(d, ne); + if (pos >= 0) return i + pos; + } + + // Scalar tail + for (; i < byte_len; ++i) { + if (a[i] != b[i]) return i; + } + return byte_len; +} + } // namespace HWY_NAMESPACE } // namespace bun HWY_AFTER_NAMESPACE(); @@ -655,6 +681,7 @@ HWY_EXPORT(IndexOfNeedsEscapeForJavaScriptStringImplQuote); HWY_EXPORT(IndexOfNewlineOrNonASCIIImpl); HWY_EXPORT(IndexOfNewlineOrNonASCIIOrHashOrAtImpl); HWY_EXPORT(IndexOfSpaceOrNewlineOrNonASCIIImpl); +HWY_EXPORT(IndexOfFirstDifferenceImpl); HWY_EXPORT(MemMemImpl); HWY_EXPORT(ScanCharFrequencyImpl); // Define the C-callable wrappers that use HWY_DYNAMIC_DISPATCH. @@ -767,6 +794,11 @@ void highway_fill_with_skip_mask( HWY_DYNAMIC_DISPATCH(FillWithSkipMaskImpl)(mask, mask_len, output, input, length, skip_mask); } +size_t highway_index_of_first_difference(const uint8_t* a, const uint8_t* b, size_t byte_len) +{ + return HWY_DYNAMIC_DISPATCH(IndexOfFirstDifferenceImpl)(a, b, byte_len); +} + } // extern "C" } // namespace bun diff --git a/test/js/bun/util/indexOfFirstDifference.test.ts b/test/js/bun/util/indexOfFirstDifference.test.ts new file mode 100644 index 0000000000..766584d2e2 --- /dev/null +++ b/test/js/bun/util/indexOfFirstDifference.test.ts @@ -0,0 +1,339 @@ +import { describe, expect, test } from "bun:test"; + +describe("Bun.indexOfFirstDifference", () => { + test("identical arrays return length", () => { + const a = new Uint8Array([1, 2, 3, 4, 5]); + const b = new Uint8Array([1, 2, 3, 4, 5]); + expect(Bun.indexOfFirstDifference(a, b)).toBe(5); + }); + + test("difference at start returns 0", () => { + const a = new Uint8Array([1, 2, 3]); + const b = new Uint8Array([9, 2, 3]); + expect(Bun.indexOfFirstDifference(a, b)).toBe(0); + }); + + test("difference at end returns length - 1", () => { + const a = new Uint8Array([1, 2, 3]); + const b = new Uint8Array([1, 2, 9]); + expect(Bun.indexOfFirstDifference(a, b)).toBe(2); + }); + + test("difference in middle returns correct index", () => { + const a = new Uint8Array([1, 2, 3, 4, 5]); + const b = new Uint8Array([1, 2, 99, 4, 5]); + expect(Bun.indexOfFirstDifference(a, b)).toBe(2); + }); + + test("empty arrays return 0", () => { + const a = new Uint8Array([]); + const b = new Uint8Array([]); + expect(Bun.indexOfFirstDifference(a, b)).toBe(0); + }); + + describe("multiple TypedArray types", () => { + test("Uint8Array", () => { + const a = new Uint8Array([10, 20, 30]); + const b = new Uint8Array([10, 20, 99]); + expect(Bun.indexOfFirstDifference(a, b)).toBe(2); + }); + + test("Uint8ClampedArray", () => { + const a = new Uint8ClampedArray([10, 20, 30]); + const b = new Uint8ClampedArray([10, 20, 99]); + expect(Bun.indexOfFirstDifference(a, b)).toBe(2); + }); + + test("Int8Array", () => { + const a = new Int8Array([10, -20, 30]); + const b = new Int8Array([10, -20, 99]); + expect(Bun.indexOfFirstDifference(a, b)).toBe(2); + }); + + test("Uint16Array", () => { + const a = new Uint16Array([1000, 2000, 3000]); + const b = new Uint16Array([1000, 2000, 9999]); + expect(Bun.indexOfFirstDifference(a, b)).toBe(2); + }); + + test("Int16Array", () => { + const a = new Int16Array([1000, -2000, 3000]); + const b = new Int16Array([1000, -2000, 9999]); + expect(Bun.indexOfFirstDifference(a, b)).toBe(2); + }); + + test("Int32Array", () => { + const a = new Int32Array([100000, 200000, 300000]); + const b = new Int32Array([100000, 200000, 999999]); + expect(Bun.indexOfFirstDifference(a, b)).toBe(2); + }); + + test("Uint32Array", () => { + const a = new Uint32Array([100000, 200000, 300000]); + const b = new Uint32Array([100000, 200000, 999999]); + expect(Bun.indexOfFirstDifference(a, b)).toBe(2); + }); + + test("BigInt64Array", () => { + const a = new BigInt64Array([1n, 2n, 3n]); + const b = new BigInt64Array([1n, 2n, 99n]); + expect(Bun.indexOfFirstDifference(a, b)).toBe(2); + }); + + test("BigUint64Array", () => { + const a = new BigUint64Array([1n, 2n, 3n]); + const b = new BigUint64Array([1n, 2n, 99n]); + expect(Bun.indexOfFirstDifference(a, b)).toBe(2); + }); + }); + + describe("float arrays throw TypeError", () => { + test("Float32Array", () => { + const a = new Float32Array([1.0, 2.0]); + const b = new Float32Array([1.0, 2.0]); + expect(() => Bun.indexOfFirstDifference(a, b)).toThrow(TypeError); + }); + + test("Float64Array", () => { + const a = new Float64Array([1.0, 2.0]); + const b = new Float64Array([1.0, 2.0]); + expect(() => Bun.indexOfFirstDifference(a, b)).toThrow(TypeError); + }); + }); + + test("mismatched types throw TypeError", () => { + const a = new Uint8Array([1, 2, 3]); + const b = new Uint16Array([1, 2, 3]); + expect(() => Bun.indexOfFirstDifference(a as any, b as any)).toThrow(TypeError); + }); + + test("different lengths compares up to min length - identical prefix", () => { + const a = new Uint8Array([1, 2, 3, 4, 5]); + const b = new Uint8Array([1, 2, 3]); + expect(Bun.indexOfFirstDifference(a, b)).toBe(3); + }); + + test("different lengths compares up to min length - difference found", () => { + const a = new Uint8Array([1, 2, 3, 4, 5]); + const b = new Uint8Array([1, 99, 3]); + expect(Bun.indexOfFirstDifference(a, b)).toBe(1); + }); + + test("large arrays (10000 elements) correctness", () => { + const size = 10000; + const a = new Uint8Array(size); + const b = new Uint8Array(size); + for (let i = 0; i < size; i++) { + a[i] = i & 0xff; + b[i] = i & 0xff; + } + // Identical + expect(Bun.indexOfFirstDifference(a, b)).toBe(size); + + // Difference near end + b[size - 1] = (b[size - 1]! + 1) & 0xff; + expect(Bun.indexOfFirstDifference(a, b)).toBe(size - 1); + + // Restore and differ at start + b[size - 1] = a[size - 1]!; + b[0] = (b[0]! + 1) & 0xff; + expect(Bun.indexOfFirstDifference(a, b)).toBe(0); + }); + + test("large Int32Array correctness", () => { + const size = 5000; + const a = new Int32Array(size); + const b = new Int32Array(size); + for (let i = 0; i < size; i++) { + a[i] = i * 7; + b[i] = i * 7; + } + expect(Bun.indexOfFirstDifference(a, b)).toBe(size); + + b[4999] = -1; + expect(Bun.indexOfFirstDifference(a, b)).toBe(4999); + }); + + // Exhaustive SIMD boundary tests: test every length from 0..256 + // and every diff position to catch SIMD lane/tail edge cases + describe("exhaustive SIMD boundary tests", () => { + test("Uint8Array: every length 0..256, identical", () => { + for (let len = 0; len <= 256; len++) { + const a = new Uint8Array(len); + const b = new Uint8Array(len); + for (let i = 0; i < len; i++) { + a[i] = (i + 1) & 0xff; + b[i] = (i + 1) & 0xff; + } + expect(Bun.indexOfFirstDifference(a, b)).toBe(len); + } + }); + + test("Uint8Array: every length 0..256, diff at every position", () => { + for (let len = 1; len <= 256; len++) { + for (let diffAt = 0; diffAt < len; diffAt++) { + const a = new Uint8Array(len); + const b = new Uint8Array(len); + for (let i = 0; i < len; i++) { + a[i] = (i + 1) & 0xff; + b[i] = (i + 1) & 0xff; + } + b[diffAt] = (b[diffAt]! ^ 0x80) & 0xff; + expect(Bun.indexOfFirstDifference(a, b)).toBe(diffAt); + } + } + }); + + test("Uint16Array: every length 0..256, identical", () => { + for (let len = 0; len <= 256; len++) { + const a = new Uint16Array(len); + const b = new Uint16Array(len); + for (let i = 0; i < len; i++) { + a[i] = (i + 1) * 257; + b[i] = (i + 1) * 257; + } + expect(Bun.indexOfFirstDifference(a, b)).toBe(len); + } + }); + + test("Uint16Array: every length 1..128, diff at every position", () => { + for (let len = 1; len <= 128; len++) { + for (let diffAt = 0; diffAt < len; diffAt++) { + const a = new Uint16Array(len); + const b = new Uint16Array(len); + for (let i = 0; i < len; i++) { + a[i] = (i + 1) * 257; + b[i] = (i + 1) * 257; + } + b[diffAt] = b[diffAt]! ^ 0x8000; + expect(Bun.indexOfFirstDifference(a, b)).toBe(diffAt); + } + } + }); + + test("Int32Array: every length 0..256, identical", () => { + for (let len = 0; len <= 256; len++) { + const a = new Int32Array(len); + const b = new Int32Array(len); + for (let i = 0; i < len; i++) { + a[i] = (i + 1) * 100003; + b[i] = (i + 1) * 100003; + } + expect(Bun.indexOfFirstDifference(a, b)).toBe(len); + } + }); + + test("Int32Array: every length 1..128, diff at every position", () => { + for (let len = 1; len <= 128; len++) { + for (let diffAt = 0; diffAt < len; diffAt++) { + const a = new Int32Array(len); + const b = new Int32Array(len); + for (let i = 0; i < len; i++) { + a[i] = (i + 1) * 100003; + b[i] = (i + 1) * 100003; + } + b[diffAt] = ~b[diffAt]!; + expect(Bun.indexOfFirstDifference(a, b)).toBe(diffAt); + } + } + }); + }); + + test("subarray views work correctly", () => { + const buf = new Uint8Array([0, 0, 1, 2, 3, 4, 5, 0, 0]); + const a = buf.subarray(2, 7); // [1, 2, 3, 4, 5] + const b = new Uint8Array([1, 2, 99, 4, 5]); + expect(Bun.indexOfFirstDifference(a, b)).toBe(2); + }); + + test("subarray views identical", () => { + const buf = new Uint8Array([0, 0, 1, 2, 3, 0, 0]); + const a = buf.subarray(2, 5); // [1, 2, 3] + const b = new Uint8Array([1, 2, 3]); + expect(Bun.indexOfFirstDifference(a, b)).toBe(3); + }); + + test("detached buffers throw TypeError", () => { + const buf = new ArrayBuffer(8); + const a = new Uint8Array(buf); + const b = new Uint8Array([1, 2, 3, 4, 5, 6, 7, 8]); + + // Detach the buffer by transferring it + const transferred = buf.transfer(); + void transferred; + + expect(() => Bun.indexOfFirstDifference(a, b)).toThrow(TypeError); + }); + + test("too few arguments throws TypeError", () => { + expect(() => (Bun.indexOfFirstDifference as any)()).toThrow(TypeError); + expect(() => (Bun.indexOfFirstDifference as any)(new Uint8Array([1]))).toThrow(TypeError); + }); + + test("non-TypedArray arguments throw TypeError", () => { + expect(() => (Bun.indexOfFirstDifference as any)("hello", "world")).toThrow(TypeError); + expect(() => (Bun.indexOfFirstDifference as any)(123, 456)).toThrow(TypeError); + expect(() => (Bun.indexOfFirstDifference as any)(new Uint8Array([1]), "world")).toThrow(TypeError); + }); + + test("Uint16Array element-level index", () => { + // Each element is 2 bytes. Difference at element index 1. + const a = new Uint16Array([0xaaaa, 0xbbbb, 0xcccc]); + const b = new Uint16Array([0xaaaa, 0xffff, 0xcccc]); + expect(Bun.indexOfFirstDifference(a, b)).toBe(1); + }); + + test("Int32Array element-level index", () => { + // Each element is 4 bytes. Difference at element index 2. + const a = new Int32Array([1, 2, 3, 4]); + const b = new Int32Array([1, 2, 99, 4]); + expect(Bun.indexOfFirstDifference(a, b)).toBe(2); + }); + + test("Uint32Array difference only in 3rd byte of element", () => { + // On little-endian, 0x00000000 vs 0x00FF0000 differ at byte offset 2 within the element. + // The function should still return element index 0 (not byte index 2). + const a = new Uint32Array([0x00000000, 0x12345678]); + const b = new Uint32Array([0x00ff0000, 0x12345678]); + expect(Bun.indexOfFirstDifference(a, b)).toBe(0); + + // Same but difference in element 1's 3rd byte + const c = new Uint32Array([0x12345678, 0x00000000, 0xaabbccdd]); + const d = new Uint32Array([0x12345678, 0x00ff0000, 0xaabbccdd]); + expect(Bun.indexOfFirstDifference(c, d)).toBe(1); + }); + + test("Uint32Array difference only in last byte of element", () => { + // 0x00000000 vs 0xFF000000 — differ only in the 4th byte (byte offset 3) + const a = new Uint32Array([0x11111111, 0x00000000]); + const b = new Uint32Array([0x11111111, 0xff000000]); + expect(Bun.indexOfFirstDifference(a, b)).toBe(1); + }); + + test("Uint32Array interior byte diff at every element and byte position", () => { + // For each element position and each byte within the element, + // ensure the correct element index is returned. + for (let numElems = 1; numElems <= 64; numElems++) { + for (let elemIdx = 0; elemIdx < numElems; elemIdx++) { + for (let byteWithin = 0; byteWithin < 4; byteWithin++) { + const a = new Uint32Array(numElems); + const b = new Uint32Array(numElems); + for (let i = 0; i < numElems; i++) { + a[i] = 0x01010101; + b[i] = 0x01010101; + } + // Flip just one byte within the element + b[elemIdx] = b[elemIdx]! ^ (0xff << (byteWithin * 8)); + expect(Bun.indexOfFirstDifference(a, b)).toBe(elemIdx); + } + } + } + }); + + test("BigUint64Array element-level index", () => { + // Each element is 8 bytes. Difference at element index 1. + const a = new BigUint64Array([0xffffffffffffffffn, 0x1234567890abcdefn, 0n]); + const b = new BigUint64Array([0xffffffffffffffffn, 0n, 0n]); + expect(Bun.indexOfFirstDifference(a, b)).toBe(1); + }); +});