Compare commits

..

7 Commits

Author SHA1 Message Date
Jarred Sumner
5268a5e546 nitpicks 2026-02-21 14:21:58 -08:00
Dylan Conway
52c68b8d4c 2026-02-21 14:08:51 -08:00
autofix-ci[bot]
2726bf88dd [autofix.ci] apply automated fixes 2026-02-21 14:08:50 -08:00
Claude
c10a786b2d fix(test): use module-scope import for chmodSync instead of dynamic require
Replace dynamic `require("fs")` inside test functions with a module-scope
`import { chmodSync } from "node:fs"` per test/CLAUDE.md guidelines.

https://claude.ai/code/session_01UujMs6n1JkfK5Sr18JM6du
2026-02-21 14:08:50 -08:00
Dylan Conway
6df1908619 fix(compile): guard BlobHeader behind OS(DARWIN) || __linux__
MSVC limits alignment to 8192 bytes. BlobHeader has 16KB alignment
which is only needed on macOS and Linux (for the section-based
standalone approach). Windows uses a different PE section strategy.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-21 14:08:50 -08:00
autofix-ci[bot]
a4024af929 [autofix.ci] apply automated fixes 2026-02-21 14:08:50 -08:00
Dylan Conway
6f03fc5451 fix(compile): use ELF section for standalone binaries on Linux
Standalone executables on Linux previously read their embedded module
graph from /proc/self/exe at startup, which fails when the binary has
execute-only permissions (chmod 111). This mirrors the approach already
used on macOS (__BUN,__bun section) and Windows (.bun PE section).

Build time: places a BUN_COMPILED symbol in a .bun ELF section via
__attribute__((section(".bun"))). At bun build --compile time, appends
the module graph to the end of the file, converts PT_GNU_STACK into a
PT_LOAD segment to map it, and stores the new vaddr at the original
BUN_COMPILED location.

Runtime: BUN_COMPILED.size holds either 0 (not standalone) or the vaddr
of the appended data. The kernel maps it via PT_LOAD during execve, so
the runtime just dereferences a pointer — zero file I/O, no read
permission needed.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-21 14:08:49 -08:00
14 changed files with 566 additions and 718 deletions

View File

@@ -593,35 +593,8 @@ function getTargetTriplet(platform) {
*/
function needsBaselineVerification(platform) {
const { os, arch, baseline } = platform;
if (os === "linux") return (arch === "x64" && baseline) || arch === "aarch64";
if (os === "windows") return arch === "x64" && baseline;
return false;
}
/**
* Returns the emulator binary name for the given platform.
* Linux uses QEMU user-mode; Windows uses Intel SDE.
* @param {Platform} platform
* @returns {string}
*/
function getEmulatorBinary(platform) {
const { os, arch } = platform;
if (os === "windows") return "sde-external/sde.exe";
if (arch === "aarch64") return "qemu-aarch64-static";
return "qemu-x86_64-static";
}
const SDE_VERSION = "9.58.0-2025-06-16";
const SDE_URL = `https://downloadmirror.intel.com/859732/sde-external-${SDE_VERSION}-win.tar.xz`;
/**
* @param {Platform} platform
* @param {PipelineOptions} options
* @returns {Step}
*/
function hasWebKitChanges(options) {
const { changedFiles = [] } = options;
return changedFiles.some(file => file.includes("SetupWebKit.cmake"));
if (os !== "linux") return false;
return (arch === "x64" && baseline) || arch === "aarch64";
}
/**
@@ -630,31 +603,9 @@ function hasWebKitChanges(options) {
* @returns {Step}
*/
function getVerifyBaselineStep(platform, options) {
const { os } = platform;
const { arch } = platform;
const targetKey = getTargetKey(platform);
const triplet = getTargetTriplet(platform);
const emulator = getEmulatorBinary(platform);
const jitStressFlag = hasWebKitChanges(options) ? " --jit-stress" : "";
const setupCommands =
os === "windows"
? [
`echo Downloading build artifacts...`,
`buildkite-agent artifact download ${triplet}.zip . --step ${targetKey}-build-bun`,
`echo Extracting ${triplet}.zip...`,
`tar -xf ${triplet}.zip`,
`echo Downloading Intel SDE...`,
`curl.exe -fsSL -o sde.tar.xz "${SDE_URL}"`,
`echo Extracting Intel SDE...`,
`7z x -y sde.tar.xz`,
`7z x -y sde.tar`,
`ren sde-external-${SDE_VERSION}-win sde-external`,
]
: [
`buildkite-agent artifact download '*.zip' . --step ${targetKey}-build-bun`,
`unzip -o '${triplet}.zip'`,
`chmod +x ${triplet}/bun`,
];
const archArg = arch === "x64" ? "x64" : "aarch64";
return {
key: `${targetKey}-verify-baseline`,
@@ -663,10 +614,57 @@ function getVerifyBaselineStep(platform, options) {
agents: getLinkBunAgent(platform, options),
retry: getRetry(),
cancel_on_build_failing: isMergeQueue(),
timeout_in_minutes: hasWebKitChanges(options) ? 30 : 10,
timeout_in_minutes: 5,
command: [
...setupCommands,
`bun scripts/verify-baseline.ts --binary ${triplet}/${os === "windows" ? "bun.exe" : "bun"} --emulator ${emulator}${jitStressFlag}`,
`buildkite-agent artifact download '*.zip' . --step ${targetKey}-build-bun`,
`unzip -o '${getTargetTriplet(platform)}.zip'`,
`unzip -o '${getTargetTriplet(platform)}-profile.zip'`,
`chmod +x ${getTargetTriplet(platform)}/bun ${getTargetTriplet(platform)}-profile/bun-profile`,
`./scripts/verify-baseline-cpu.sh --arch ${archArg} --binary ${getTargetTriplet(platform)}/bun`,
`./scripts/verify-baseline-cpu.sh --arch ${archArg} --binary ${getTargetTriplet(platform)}-profile/bun-profile`,
],
};
}
/**
* Returns true if the PR modifies SetupWebKit.cmake (WebKit version changes).
* JIT stress tests under QEMU should run when WebKit is updated to catch
* JIT-generated code that uses unsupported CPU instructions.
* @param {PipelineOptions} options
* @returns {boolean}
*/
function hasWebKitChanges(options) {
const { changedFiles = [] } = options;
return changedFiles.some(file => file.includes("SetupWebKit.cmake"));
}
/**
* Returns a step that runs JSC JIT stress tests under QEMU.
* This verifies that JIT-compiled code doesn't use CPU instructions
* beyond the baseline target (no AVX on x64, no LSE on aarch64).
* @param {Platform} platform
* @param {PipelineOptions} options
* @returns {Step}
*/
function getJitStressTestStep(platform, options) {
const { arch } = platform;
const targetKey = getTargetKey(platform);
const archArg = arch === "x64" ? "x64" : "aarch64";
return {
key: `${targetKey}-jit-stress-qemu`,
label: `${getTargetLabel(platform)} - jit-stress-qemu`,
depends_on: [`${targetKey}-build-bun`],
agents: getLinkBunAgent(platform, options),
retry: getRetry(),
cancel_on_build_failing: isMergeQueue(),
// JIT stress tests are slow under QEMU emulation
timeout_in_minutes: 30,
command: [
`buildkite-agent artifact download '*.zip' . --step ${targetKey}-build-bun`,
`unzip -o '${getTargetTriplet(platform)}.zip'`,
`chmod +x ${getTargetTriplet(platform)}/bun`,
`./scripts/verify-jit-stress-qemu.sh --arch ${archArg} --binary ${getTargetTriplet(platform)}/bun`,
],
};
}
@@ -1266,6 +1264,10 @@ async function getPipeline(options = {}) {
if (needsBaselineVerification(target)) {
steps.push(getVerifyBaselineStep(target, options));
// Run JIT stress tests under QEMU when WebKit is updated
if (hasWebKitChanges(options)) {
steps.push(getJitStressTestStep(target, options));
}
}
return getStepWithDependsOn(

View File

@@ -13,11 +13,6 @@ else()
set(LSHPACK_INCLUDES .)
endif()
# Suppress all warnings from vendored lshpack on Windows (clang-cl)
if(WIN32)
set(LSHPACK_CMAKE_ARGS "-DCMAKE_C_FLAGS=-w")
endif()
register_cmake_command(
TARGET
lshpack
@@ -33,7 +28,6 @@ register_cmake_command(
# _lshpack_enc_get_static_name in libls-hpack.a(lshpack.c.o)
# _update_hash in libls-hpack.a(lshpack.c.o)
-DCMAKE_BUILD_TYPE=Release
${LSHPACK_CMAKE_ARGS}
INCLUDES
${LSHPACK_INCLUDES}
)

View File

@@ -79,22 +79,12 @@ endif()
if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|arm64|ARM64|AARCH64" AND NOT APPLE)
list(APPEND MIMALLOC_CMAKE_ARGS -DMI_NO_OPT_ARCH=ON)
list(APPEND MIMALLOC_CMAKE_ARGS -DMI_OPT_SIMD=ON)
if(NOT WIN32)
list(APPEND MIMALLOC_CMAKE_ARGS "-DCMAKE_C_FLAGS=-moutline-atomics")
endif()
list(APPEND MIMALLOC_CMAKE_ARGS "-DCMAKE_C_FLAGS=-moutline-atomics")
elseif(NOT ENABLE_BASELINE)
list(APPEND MIMALLOC_CMAKE_ARGS -DMI_OPT_ARCH=ON)
list(APPEND MIMALLOC_CMAKE_ARGS -DMI_OPT_SIMD=ON)
endif()
# Suppress all warnings from mimalloc on Windows — it's vendored C code compiled
# as C++ (MI_USE_CXX=ON) which triggers many clang-cl warnings (-Wold-style-cast,
# -Wzero-as-null-pointer-constant, -Wc++98-compat-pedantic, etc.)
if(WIN32)
list(APPEND MIMALLOC_CMAKE_ARGS "-DCMAKE_C_FLAGS=-w")
list(APPEND MIMALLOC_CMAKE_ARGS "-DCMAKE_CXX_FLAGS=-w")
endif()
if(WIN32)
if(DEBUG)
set(MIMALLOC_LIBRARY mimalloc-static-debug)

View File

@@ -7,16 +7,9 @@ register_repository(
12882eee073cfe5c7621bcfadf679e1372d4537b
)
# Suppress all warnings from vendored tinycc on Windows (clang-cl)
if(WIN32)
set(TINYCC_CMAKE_ARGS "-DCMAKE_C_FLAGS=-w")
endif()
register_cmake_command(
TARGET
tinycc
ARGS
${TINYCC_CMAKE_ARGS}
LIBRARIES
tcc
)

View File

@@ -1,233 +0,0 @@
// Verify that a Bun binary doesn't use CPU instructions beyond its baseline target.
//
// Detects the platform and chooses the appropriate emulator:
// Linux x64: QEMU with Nehalem CPU (no AVX)
// Linux arm64: QEMU with Cortex-A53 (no LSE/SVE)
// Windows x64: Intel SDE with -nhm (no AVX)
//
// Usage:
// bun scripts/verify-baseline.ts --binary ./bun --emulator /usr/bin/qemu-x86_64
// bun scripts/verify-baseline.ts --binary ./bun.exe --emulator ./sde.exe
import { readdirSync } from "node:fs";
import { basename, dirname, join, resolve } from "node:path";
const { parseArgs } = require("node:util");
const { values } = parseArgs({
args: process.argv.slice(2),
options: {
binary: { type: "string" },
emulator: { type: "string" },
"jit-stress": { type: "boolean", default: false },
},
strict: true,
});
const binary = resolve(values.binary!);
function resolveEmulator(name: string): string {
const found = Bun.which(name);
if (found) return found;
// Try without -static suffix (e.g. qemu-aarch64 instead of qemu-aarch64-static)
if (name.endsWith("-static")) {
const fallback = Bun.which(name.slice(0, -"-static".length));
if (fallback) return fallback;
}
// Last resort: resolve as a relative path (e.g. sde-external/sde.exe)
return resolve(name);
}
const emulatorPath = resolveEmulator(values.emulator!);
const scriptDir = dirname(import.meta.path);
const repoRoot = resolve(scriptDir, "..");
const fixturesDir = join(repoRoot, "test", "js", "bun", "jsc-stress", "fixtures");
const wasmFixturesDir = join(fixturesDir, "wasm");
const preloadPath = join(repoRoot, "test", "js", "bun", "jsc-stress", "preload.js");
// Platform detection
const isWindows = process.platform === "win32";
const isAarch64 = process.arch === "arm64";
// SDE outputs this when a chip-check violation occurs
const SDE_VIOLATION_PATTERN = /SDE-ERROR:.*not valid for specified chip/i;
// Configure emulator based on platform
const config = isWindows
? {
runnerCmd: [emulatorPath, "-nhm", "--"],
cpuDesc: "Nehalem (SSE4.2, no AVX/AVX2/AVX512)",
// SDE must run from its own directory for Pin DLL resolution
cwd: dirname(emulatorPath),
}
: isAarch64
? {
runnerCmd: [emulatorPath, "-cpu", "cortex-a53"],
cpuDesc: "Cortex-A53 (ARMv8.0-A+CRC, no LSE/SVE)",
cwd: undefined,
}
: {
runnerCmd: [emulatorPath, "-cpu", "Nehalem"],
cpuDesc: "Nehalem (SSE4.2, no AVX/AVX2/AVX512)",
cwd: undefined,
};
function isInstructionViolation(exitCode: number, output: string): boolean {
if (isWindows) return SDE_VIOLATION_PATTERN.test(output);
return exitCode === 132; // SIGILL = 128 + signal 4
}
console.log(`--- Verifying ${basename(binary)} on ${config.cpuDesc}`);
console.log(` Binary: ${binary}`);
console.log(` Emulator: ${config.runnerCmd.join(" ")}`);
console.log();
let instructionFailures = 0;
let otherFailures = 0;
let passed = 0;
const failedTests: string[] = [];
interface RunTestOptions {
cwd?: string;
/** Tee output live to the console while still capturing it for analysis */
live?: boolean;
}
/** Read a stream, write each chunk to a writable, and return the full text. */
async function teeStream(stream: ReadableStream<Uint8Array>, output: NodeJS.WriteStream): Promise<string> {
const chunks: Uint8Array[] = [];
for await (const chunk of stream) {
chunks.push(chunk);
output.write(chunk);
}
return Buffer.concat(chunks).toString();
}
async function runTest(label: string, binaryArgs: string[], options?: RunTestOptions): Promise<boolean> {
console.log(`+++ ${label}`);
const start = performance.now();
const live = options?.live ?? false;
const proc = Bun.spawn([...config.runnerCmd, binary, ...binaryArgs], {
// config.cwd takes priority — SDE on Windows must run from its own directory for Pin DLL resolution
cwd: config.cwd ?? options?.cwd,
stdout: "pipe",
stderr: "pipe",
});
let stdout: string;
let stderr: string;
if (live) {
[stdout, stderr] = await Promise.all([
teeStream(proc.stdout as ReadableStream<Uint8Array>, process.stdout),
teeStream(proc.stderr as ReadableStream<Uint8Array>, process.stderr),
proc.exited,
]);
} else {
[stdout, stderr] = await Promise.all([
new Response(proc.stdout).text(),
new Response(proc.stderr).text(),
proc.exited,
]);
}
const exitCode = proc.exitCode!;
const elapsed = ((performance.now() - start) / 1000).toFixed(1);
const output = stdout + "\n" + stderr;
if (exitCode === 0) {
if (!live && stdout.trim()) console.log(stdout.trim());
console.log(` PASS (${elapsed}s)`);
passed++;
return true;
}
if (isInstructionViolation(exitCode, output)) {
if (!live && output.trim()) console.log(output.trim());
console.log();
console.log(` FAIL: CPU instruction violation detected (${elapsed}s)`);
if (isAarch64) {
console.log(" The aarch64 build targets Cortex-A53 (ARMv8.0-A+CRC).");
console.log(" LSE atomics, SVE, and dotprod instructions are not allowed.");
} else {
console.log(" The baseline x64 build targets Nehalem (SSE4.2).");
console.log(" AVX, AVX2, and AVX512 instructions are not allowed.");
}
instructionFailures++;
failedTests.push(label);
} else {
if (!live && output.trim()) console.log(output.trim());
console.log(` WARN: exit code ${exitCode} (${elapsed}s, not a CPU instruction issue)`);
otherFailures++;
}
return false;
}
// Phase 1: SIMD code path verification (always runs)
const simdTestPath = join(repoRoot, "test", "js", "bun", "jsc-stress", "fixtures", "simd-baseline.test.ts");
await runTest("SIMD baseline tests", ["test", simdTestPath], { live: true });
// Phase 2: JIT stress fixtures (only with --jit-stress, e.g. on WebKit changes)
if (values["jit-stress"]) {
const jsFixtures = readdirSync(fixturesDir)
.filter(f => f.endsWith(".js"))
.sort();
console.log();
console.log(`--- JS fixtures (DFG/FTL) — ${jsFixtures.length} tests`);
for (let i = 0; i < jsFixtures.length; i++) {
const fixture = jsFixtures[i];
await runTest(`[${i + 1}/${jsFixtures.length}] ${fixture}`, ["--preload", preloadPath, join(fixturesDir, fixture)]);
}
const wasmFixtures = readdirSync(wasmFixturesDir)
.filter(f => f.endsWith(".js"))
.sort();
console.log();
console.log(`--- Wasm fixtures (BBQ/OMG) — ${wasmFixtures.length} tests`);
for (let i = 0; i < wasmFixtures.length; i++) {
const fixture = wasmFixtures[i];
await runTest(
`[${i + 1}/${wasmFixtures.length}] ${fixture}`,
["--preload", preloadPath, join(wasmFixturesDir, fixture)],
{ cwd: wasmFixturesDir },
);
}
} else {
console.log();
console.log("--- Skipping JIT stress fixtures (pass --jit-stress to enable)");
}
// Summary
console.log();
console.log("--- Summary");
console.log(` Passed: ${passed}`);
console.log(` Instruction failures: ${instructionFailures}`);
console.log(` Other failures: ${otherFailures} (warnings, not CPU instruction issues)`);
console.log();
if (instructionFailures > 0) {
console.error(" FAILED: Code uses unsupported CPU instructions.");
// Report to Buildkite annotations tab
const platform = isWindows ? "Windows x64" : isAarch64 ? "Linux aarch64" : "Linux x64";
const annotation = [
`<details>`,
`<summary>CPU instruction violation on ${platform}${instructionFailures} failed</summary>`,
`<p>The baseline build uses instructions not available on <code>${config.cpuDesc}</code>.</p>`,
`<ul>${failedTests.map(t => `<li><code>${t}</code></li>`).join("")}</ul>`,
`</details>`,
].join("\n");
Bun.spawnSync(["buildkite-agent", "annotate", "--append", "--style", "error", "--context", "verify-baseline"], {
stdin: new Blob([annotation]),
});
process.exit(1);
}
if (otherFailures > 0) {
console.log(" Some tests failed for reasons unrelated to CPU instructions.");
}
console.log(` All baseline verification passed on ${config.cpuDesc}.`);

View File

@@ -154,6 +154,22 @@ pub const StandaloneModuleGraph = struct {
}
};
const ELF = struct {
pub extern "C" fn Bun__getStandaloneModuleGraphELFVaddr() ?*align(1) u64;
pub fn getData() ?[]const u8 {
const vaddr = (Bun__getStandaloneModuleGraphELFVaddr() orelse return null).*;
if (vaddr == 0) return null;
// BUN_COMPILED.size holds the virtual address of the appended data.
// The kernel mapped it via PT_LOAD, so we can dereference directly.
// Format at target: [u64 payload_len][payload bytes]
const target: [*]const u8 = @ptrFromInt(vaddr);
const payload_len = std.mem.readInt(u64, target[0..8], .little);
if (payload_len < 8) return null;
return target[8..][0..payload_len];
}
};
pub const File = struct {
name: []const u8 = "",
loader: bun.options.Loader,
@@ -885,6 +901,56 @@ pub const StandaloneModuleGraph = struct {
}
return cloned_executable_fd;
},
.linux => {
// ELF section approach: find .bun section and expand it
const input_result = bun.sys.File.readToEnd(.{ .handle = cloned_executable_fd }, bun.default_allocator);
if (input_result.err) |err| {
Output.prettyErrorln("Error reading executable: {f}", .{err});
cleanup(zname, cloned_executable_fd);
return bun.invalid_fd;
}
const elf_file = bun.elf.ElfFile.init(bun.default_allocator, input_result.bytes.items) catch |err| {
Output.prettyErrorln("Error initializing ELF file: {}", .{err});
cleanup(zname, cloned_executable_fd);
return bun.invalid_fd;
};
defer elf_file.deinit();
elf_file.writeBunSection(bytes) catch |err| {
Output.prettyErrorln("Error writing .bun section to ELF: {}", .{err});
cleanup(zname, cloned_executable_fd);
return bun.invalid_fd;
};
input_result.bytes.deinit();
switch (Syscall.setFileOffset(cloned_executable_fd, 0)) {
.err => |err| {
Output.prettyErrorln("Error seeking to start of temporary file: {f}", .{err});
cleanup(zname, cloned_executable_fd);
return bun.invalid_fd;
},
else => {},
}
// Write the modified ELF data back to the file
const write_file = bun.sys.File{ .handle = cloned_executable_fd };
switch (write_file.writeAll(elf_file.data.items)) {
.err => |err| {
Output.prettyErrorln("Error writing ELF file: {f}", .{err});
cleanup(zname, cloned_executable_fd);
return bun.invalid_fd;
},
.result => {},
}
// Truncate the file to the exact size of the modified ELF
_ = Syscall.ftruncate(cloned_executable_fd, @intCast(elf_file.data.items.len));
if (comptime !Environment.isWindows) {
_ = bun.c.fchmod(cloned_executable_fd.native(), 0o777);
}
return cloned_executable_fd;
},
else => {
var total_byte_count: usize = undefined;
if (Environment.isWindows) {
@@ -1261,99 +1327,23 @@ pub const StandaloneModuleGraph = struct {
return try fromBytesAlloc(allocator, @constCast(pe_bytes), offsets);
}
// Do not invoke libuv here.
const self_exe = openSelf() catch return null;
defer self_exe.close();
var trailer_bytes: [4096]u8 = undefined;
std.posix.lseek_END(self_exe.cast(), -4096) catch return null;
var read_amount: usize = 0;
while (read_amount < trailer_bytes.len) {
switch (Syscall.read(self_exe, trailer_bytes[read_amount..])) {
.result => |read| {
if (read == 0) return null;
read_amount += read;
},
.err => {
return null;
},
if (comptime Environment.isLinux) {
const elf_bytes = ELF.getData() orelse return null;
if (elf_bytes.len < @sizeOf(Offsets) + trailer.len) {
Output.debugWarn("bun standalone module graph is too small to be valid", .{});
return null;
}
}
if (read_amount < trailer.len + @sizeOf(usize) + @sizeOf(Offsets))
// definitely missing data
return null;
var end = @as([]u8, &trailer_bytes).ptr + read_amount - @sizeOf(usize);
const total_byte_count: usize = @as(usize, @bitCast(end[0..8].*));
if (total_byte_count > std.math.maxInt(u32) or total_byte_count < 4096) {
// sanity check: the total byte count should never be more than 4 GB
// bun is at least like 30 MB so if it reports a size less than 4096 bytes then something is wrong
return null;
}
end -= trailer.len;
if (!bun.strings.hasPrefixComptime(end[0..trailer.len], trailer)) {
// invalid trailer
return null;
}
end -= @sizeOf(Offsets);
const offsets: Offsets = std.mem.bytesAsValue(Offsets, end[0..@sizeOf(Offsets)]).*;
if (offsets.byte_count >= total_byte_count) {
// if we hit this branch then the file is corrupted and we should just give up
return null;
}
var to_read = try bun.default_allocator.alloc(u8, offsets.byte_count);
var to_read_from = to_read;
// Reading the data and making sure it's page-aligned + won't crash due
// to out of bounds using mmap() is very complicated.
// we just read the whole thing into memory for now.
// at the very least
// if you have not a ton of code, we only do a single read() call
if (Environment.allow_assert or offsets.byte_count > 1024 * 3) {
const offset_from_end = trailer_bytes.len - (@intFromPtr(end) - @intFromPtr(@as([]u8, &trailer_bytes).ptr));
std.posix.lseek_END(self_exe.cast(), -@as(i64, @intCast(offset_from_end + offsets.byte_count))) catch return null;
if (comptime Environment.allow_assert) {
// actually we just want to verify this logic is correct in development
if (offsets.byte_count <= 1024 * 3) {
to_read_from = try bun.default_allocator.alloc(u8, offsets.byte_count);
}
}
var remain = to_read_from;
while (remain.len > 0) {
switch (Syscall.read(self_exe, remain)) {
.result => |read| {
if (read == 0) return null;
remain = remain[read..];
},
.err => {
bun.default_allocator.free(to_read);
return null;
},
}
const elf_bytes_slice = elf_bytes[elf_bytes.len - @sizeOf(Offsets) - trailer.len ..];
const trailer_bytes = elf_bytes[elf_bytes.len - trailer.len ..][0..trailer.len];
if (!bun.strings.eqlComptime(trailer_bytes, trailer)) {
Output.debugWarn("bun standalone module graph has invalid trailer", .{});
return null;
}
const offsets = std.mem.bytesAsValue(Offsets, elf_bytes_slice).*;
return try fromBytesAlloc(allocator, @constCast(elf_bytes), offsets);
}
if (offsets.byte_count <= 1024 * 3) {
// we already have the bytes
end -= offsets.byte_count;
@memcpy(to_read[0..offsets.byte_count], end[0..offsets.byte_count]);
if (comptime Environment.allow_assert) {
bun.assert(bun.strings.eqlLong(to_read, end[0..offsets.byte_count], true));
}
}
return try fromBytesAlloc(allocator, to_read, offsets);
comptime unreachable;
}
/// Allocates a StandaloneModuleGraph on the heap, populates it from bytes, sets it globally, and returns the pointer.
@@ -1364,107 +1354,6 @@ pub const StandaloneModuleGraph = struct {
return graph_ptr;
}
/// heuristic: `bun build --compile` won't be supported if the name is "bun", "bunx", or "node".
/// this is a cheap way to avoid the extra overhead of opening the executable, and also just makes sense.
fn isBuiltInExe(comptime T: type, argv0: []const T) bool {
if (argv0.len == 0) return false;
if (argv0.len == 3) {
if (bun.strings.eqlComptimeCheckLenWithType(T, argv0, bun.strings.literal(T, "bun"), false)) {
return true;
}
}
if (argv0.len == 4) {
if (bun.strings.eqlComptimeCheckLenWithType(T, argv0, bun.strings.literal(T, "bunx"), false)) {
return true;
}
if (bun.strings.eqlComptimeCheckLenWithType(T, argv0, bun.strings.literal(T, "node"), false)) {
return true;
}
}
if (comptime Environment.isDebug) {
if (bun.strings.eqlComptimeCheckLenWithType(T, argv0, bun.strings.literal(T, "bun-debug"), true)) {
return true;
}
if (bun.strings.eqlComptimeCheckLenWithType(T, argv0, bun.strings.literal(T, "bun-debugx"), true)) {
return true;
}
}
return false;
}
fn openSelf() std.fs.OpenSelfExeError!bun.FileDescriptor {
if (!Environment.isWindows) {
const argv = bun.argv;
if (argv.len > 0) {
if (isBuiltInExe(u8, argv[0])) {
return error.FileNotFound;
}
}
}
switch (Environment.os) {
.linux => {
if (std.fs.openFileAbsoluteZ("/proc/self/exe", .{})) |easymode| {
return .fromStdFile(easymode);
} else |_| {
if (bun.argv.len > 0) {
// The user doesn't have /proc/ mounted, so now we just guess and hope for the best.
var whichbuf: bun.PathBuffer = undefined;
if (bun.which(
&whichbuf,
bun.env_var.PATH.get() orelse return error.FileNotFound,
"",
bun.argv[0],
)) |path| {
return .fromStdFile(try std.fs.cwd().openFileZ(path, .{}));
}
}
return error.FileNotFound;
}
},
.mac => {
// Use of MAX_PATH_BYTES here is valid as the resulting path is immediately
// opened with no modification.
const self_exe_path = try bun.selfExePath();
const file = try std.fs.openFileAbsoluteZ(self_exe_path.ptr, .{});
return .fromStdFile(file);
},
.windows => {
const image_path_unicode_string = std.os.windows.peb().ProcessParameters.ImagePathName;
const image_path = image_path_unicode_string.Buffer.?[0 .. image_path_unicode_string.Length / 2];
var nt_path_buf: bun.WPathBuffer = undefined;
const nt_path = bun.strings.addNTPathPrefixIfNeeded(&nt_path_buf, image_path);
const basename_start = std.mem.lastIndexOfScalar(u16, nt_path, '\\') orelse
return error.FileNotFound;
const basename = nt_path[basename_start + 1 .. nt_path.len - ".exe".len];
if (isBuiltInExe(u16, basename)) {
return error.FileNotFound;
}
return bun.sys.openFileAtWindows(
.cwd(),
nt_path,
.{
.access_mask = w.SYNCHRONIZE | w.GENERIC_READ,
.disposition = w.FILE_OPEN,
.options = w.FILE_SYNCHRONOUS_IO_NONALERT | w.FILE_OPEN_REPARSE_POINT,
},
).unwrap() catch {
return error.FileNotFound;
};
},
.wasm => @compileError("TODO"),
}
}
/// Source map serialization in the bundler is specially designed to be
/// loaded in memory as is. Source contents are compressed with ZSTD to
/// reduce the file size, and mappings are stored as uncompressed VLQ.

View File

@@ -910,6 +910,10 @@ extern "C" void Bun__signpost_emit(os_log_t log, os_signpost_type_t type, os_sig
#undef EMIT_SIGNPOST
#undef FOR_EACH_TRACE_EVENT
#endif // OS(DARWIN) signpost code
#if OS(DARWIN) || defined(__linux__)
#define BLOB_HEADER_ALIGNMENT 16 * 1024
extern "C" {
@@ -919,6 +923,8 @@ struct BlobHeader {
} __attribute__((aligned(BLOB_HEADER_ALIGNMENT)));
}
#if OS(DARWIN)
extern "C" BlobHeader __attribute__((section("__BUN,__bun"))) BUN_COMPILED = { 0, 0 };
extern "C" uint64_t* Bun__getStandaloneModuleGraphMachoLength()
@@ -926,6 +932,17 @@ extern "C" uint64_t* Bun__getStandaloneModuleGraphMachoLength()
return &BUN_COMPILED.size;
}
#else // __linux__
extern "C" BlobHeader __attribute__((section(".bun"), aligned(BLOB_HEADER_ALIGNMENT), used)) BUN_COMPILED = { 0 };
extern "C" uint64_t* Bun__getStandaloneModuleGraphELFVaddr()
{
return &BUN_COMPILED.size;
}
#endif // OS(DARWIN) / __linux__
#elif defined(_WIN32)
// Windows PE section handling
#include <windows.h>

View File

@@ -31,17 +31,8 @@
#include <utility>
#include <vector>
#ifdef _WIN32
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wmicrosoft-include"
#endif
#define v8 real_v8
#define private public
#include "node/v8.h"
#undef private
#undef v8
#ifdef _WIN32
#pragma clang diagnostic pop
#endif

View File

@@ -3704,6 +3704,7 @@ pub fn freeSensitive(allocator: std.mem.Allocator, slice: anytype) void {
pub const macho = @import("./macho.zig");
pub const pe = @import("./pe.zig");
pub const elf = @import("./elf.zig");
pub const valkey = @import("./valkey/index.zig");
pub const highway = @import("./highway.zig");

254
src/elf.zig Normal file
View File

@@ -0,0 +1,254 @@
/// ELF file manipulation for `bun build --compile` on Linux.
///
/// Analogous to `macho.zig` (macOS) and `pe.zig` (Windows).
/// Finds the `.bun` ELF section (placed by a linker symbol in c-bindings.cpp)
/// and expands it to hold the standalone module graph data.
///
/// Must work on any host platform (macOS, Windows, Linux) for cross-compilation.
pub const ElfFile = struct {
data: std.array_list.Managed(u8),
allocator: Allocator,
pub fn init(allocator: Allocator, elf_data: []const u8) !*ElfFile {
if (elf_data.len < @sizeOf(Elf64_Ehdr)) return error.InvalidElfFile;
const ehdr = readEhdr(elf_data);
// Validate ELF magic
if (!bun.strings.eqlComptime(ehdr.e_ident[0..4], "\x7fELF")) return error.InvalidElfFile;
// Must be 64-bit
if (ehdr.e_ident[elf.EI_CLASS] != elf.ELFCLASS64) return error.Not64Bit;
// Must be little-endian (bun only supports x64 + arm64, both LE)
if (ehdr.e_ident[elf.EI_DATA] != elf.ELFDATA2LSB) return error.NotLittleEndian;
var data = try std.array_list.Managed(u8).initCapacity(allocator, elf_data.len);
errdefer data.deinit();
try data.appendSlice(elf_data);
const self = try allocator.create(ElfFile);
errdefer allocator.destroy(self);
self.* = .{
.data = data,
.allocator = allocator,
};
return self;
}
pub fn deinit(self: *ElfFile) void {
self.data.deinit();
self.allocator.destroy(self);
}
/// Find the `.bun` section and write `payload` to the end of the ELF file,
/// creating a new PT_LOAD segment (from PT_GNU_STACK) to map it. Stores the
/// new segment's vaddr at the original BUN_COMPILED location so the runtime
/// can dereference it directly.
///
/// We always append rather than writing in-place because .bun is in the middle
/// of a PT_LOAD segment — sections like .dynamic, .got, .got.plt come after it,
/// and expanding in-place would invalidate their absolute virtual addresses.
pub fn writeBunSection(self: *ElfFile, payload: []const u8) !void {
const ehdr = readEhdr(self.data.items);
const bun_section = try self.findBunSection(ehdr);
const bun_section_offset = bun_section.file_offset;
const page_size = pageSize(ehdr);
const header_size: u64 = @sizeOf(u64);
const new_content_size: u64 = header_size + payload.len;
const aligned_new_size = alignUp(new_content_size, page_size);
// Find the highest virtual address across all PT_LOAD segments
var max_vaddr_end: u64 = 0;
const phdr_size = @sizeOf(Elf64_Phdr);
for (0..ehdr.e_phnum) |i| {
const phdr_offset = @as(usize, @intCast(ehdr.e_phoff)) + i * phdr_size;
const phdr = std.mem.bytesAsValue(Elf64_Phdr, self.data.items[phdr_offset..][0..phdr_size]).*;
if (phdr.p_type == elf.PT_LOAD) {
const vaddr_end = phdr.p_vaddr + phdr.p_memsz;
if (vaddr_end > max_vaddr_end) {
max_vaddr_end = vaddr_end;
}
}
}
// The new segment's virtual address: after all existing mappings, page-aligned
const new_vaddr = alignUp(max_vaddr_end, page_size);
// The new data goes at the end of the file, page-aligned
const new_file_offset = alignUp(self.data.items.len, page_size);
// Grow the buffer to hold the new data + section header table after it
const shdr_table_size = @as(u64, ehdr.e_shnum) * @sizeOf(Elf64_Shdr);
const new_shdr_offset = new_file_offset + aligned_new_size;
const total_new_size = new_shdr_offset + shdr_table_size;
const old_file_size = self.data.items.len;
try self.data.ensureTotalCapacity(total_new_size);
self.data.items.len = total_new_size;
// Zero the gap between old file end and new data (alignment padding).
// Without this, uninitialized allocator memory would leak into the output.
if (new_file_offset > old_file_size) {
@memset(self.data.items[old_file_size..new_file_offset], 0);
}
// Copy the section header table to its new location
const old_shdr_offset = ehdr.e_shoff;
bun.memmove(
self.data.items[new_shdr_offset..][0..shdr_table_size],
self.data.items[old_shdr_offset..][0..shdr_table_size],
);
// Update e_shoff to the new section header table location
self.writeEhdrShoff(new_shdr_offset);
// Write the payload at the new location: [u64 LE size][data][zero padding]
std.mem.writeInt(u64, self.data.items[new_file_offset..][0..8], @intCast(payload.len), .little);
@memcpy(self.data.items[new_file_offset + header_size ..][0..payload.len], payload);
// Zero the padding between payload end and section header table
const padding_start = new_file_offset + new_content_size;
if (new_shdr_offset > padding_start) {
@memset(self.data.items[padding_start..new_shdr_offset], 0);
}
// Write the vaddr of the appended data at the ORIGINAL .bun section location
// (where BUN_COMPILED symbol points). At runtime, BUN_COMPILED.size will be
// this vaddr (always non-zero), which the runtime dereferences as a pointer.
// Non-standalone binaries have BUN_COMPILED.size = 0, so 0 means "no data".
std.mem.writeInt(u64, self.data.items[bun_section_offset..][0..8], new_vaddr, .little);
// Update the .bun section header to reflect the new data location and size
// so that tools like `readelf -S` show accurate metadata.
{
const shdr_offset = new_shdr_offset + @as(u64, bun_section.section_index) * @sizeOf(Elf64_Shdr);
const shdr_bytes = self.data.items[shdr_offset..][0..@sizeOf(Elf64_Shdr)];
var shdr = std.mem.bytesAsValue(Elf64_Shdr, shdr_bytes).*;
shdr.sh_offset = new_file_offset;
shdr.sh_size = new_content_size;
shdr.sh_addr = new_vaddr;
@memcpy(shdr_bytes, std.mem.asBytes(&shdr));
}
// Find PT_GNU_STACK and convert it to PT_LOAD for the new .bun data.
// PT_GNU_STACK only controls stack executability; on modern kernels the
// stack defaults to non-executable without it, so repurposing is safe.
var found_gnu_stack = false;
for (0..ehdr.e_phnum) |i| {
const phdr_offset = @as(usize, @intCast(ehdr.e_phoff)) + i * phdr_size;
const phdr = std.mem.bytesAsValue(Elf64_Phdr, self.data.items[phdr_offset..][0..phdr_size]).*;
if (phdr.p_type == elf.PT_GNU_STACK) {
// Convert to PT_LOAD
const new_phdr: Elf64_Phdr = .{
.p_type = elf.PT_LOAD,
.p_flags = elf.PF_R, // read-only
.p_offset = new_file_offset,
.p_vaddr = new_vaddr,
.p_paddr = new_vaddr,
.p_filesz = aligned_new_size,
.p_memsz = aligned_new_size,
.p_align = page_size,
};
@memcpy(self.data.items[phdr_offset..][0..phdr_size], std.mem.asBytes(&new_phdr));
found_gnu_stack = true;
break;
}
}
if (!found_gnu_stack) {
return error.NoGnuStackSegment;
}
}
pub fn write(self: *const ElfFile, writer: anytype) !void {
try writer.writeAll(self.data.items);
}
// --- Internal helpers ---
const BunSectionInfo = struct {
/// File offset of the .bun section's data (sh_offset).
file_offset: u64,
/// Index of the .bun section in the section header table.
section_index: u16,
};
/// Returns the file offset and section index of the `.bun` section.
fn findBunSection(self: *const ElfFile, ehdr: Elf64_Ehdr) !BunSectionInfo {
const shdr_size = @sizeOf(Elf64_Shdr);
const shdr_table_offset = ehdr.e_shoff;
const shnum = ehdr.e_shnum;
if (shnum == 0) return error.BunSectionNotFound;
if (shdr_table_offset + @as(u64, shnum) * shdr_size > self.data.items.len)
return error.InvalidElfFile;
// Read the .shstrtab section to get section names
const shstrtab_shdr = self.readShdr(shdr_table_offset, ehdr.e_shstrndx);
const strtab_offset = shstrtab_shdr.sh_offset;
const strtab_size = shstrtab_shdr.sh_size;
if (strtab_offset + strtab_size > self.data.items.len) return error.InvalidElfFile;
const strtab = self.data.items[strtab_offset..][0..strtab_size];
// Search for .bun section
for (0..shnum) |i| {
const shdr = self.readShdr(shdr_table_offset, @intCast(i));
const name_offset = shdr.sh_name;
if (name_offset < strtab.len) {
const name = std.mem.sliceTo(strtab[name_offset..], 0);
if (bun.strings.eqlComptime(name, ".bun")) {
return .{
.file_offset = shdr.sh_offset,
.section_index = @intCast(i),
};
}
}
}
return error.BunSectionNotFound;
}
fn readShdr(self: *const ElfFile, table_offset: u64, index: u16) Elf64_Shdr {
const offset = table_offset + @as(u64, index) * @sizeOf(Elf64_Shdr);
return std.mem.bytesAsValue(Elf64_Shdr, self.data.items[offset..][0..@sizeOf(Elf64_Shdr)]).*;
}
fn writeEhdrShoff(self: *ElfFile, new_shoff: u64) void {
// e_shoff is at offset 40 in Elf64_Ehdr
std.mem.writeInt(u64, self.data.items[40..][0..8], new_shoff, .little);
}
fn pageSize(ehdr: Elf64_Ehdr) u64 {
return switch (ehdr.e_machine) {
.AARCH64, .PPC64 => 0x10000, // 64KB
else => 0x1000, // 4KB
};
}
};
fn readEhdr(data: []const u8) Elf64_Ehdr {
return std.mem.bytesAsValue(Elf64_Ehdr, data[0..@sizeOf(Elf64_Ehdr)]).*;
}
fn alignUp(value: u64, alignment: u64) u64 {
if (alignment == 0) return value;
const mask = alignment - 1;
return (value + mask) & ~mask;
}
const bun = @import("bun");
const std = @import("std");
const Allocator = std.mem.Allocator;
const elf = std.elf;
const Elf64_Ehdr = elf.Elf64_Ehdr;
const Elf64_Phdr = elf.Elf64_Phdr;
const Elf64_Shdr = elf.Elf64_Shdr;

View File

@@ -1,5 +1,6 @@
import { describe, expect, test } from "bun:test";
import { isArm64, isLinux, isMacOS, isMusl, isWindows, tempDir } from "harness";
import { chmodSync } from "node:fs";
import { join } from "path";
describe("Bun.build compile", () => {
@@ -188,4 +189,156 @@ describe("compiled binary validity", () => {
});
});
if (isLinux) {
describe("ELF section", () => {
test("compiled binary runs with execute-only permissions", async () => {
using dir = tempDir("build-compile-exec-only", {
"app.js": `console.log("exec-only-output");`,
});
const outfile = join(dir + "", "app-exec-only");
const result = await Bun.build({
entrypoints: [join(dir + "", "app.js")],
compile: {
outfile,
},
});
expect(result.success).toBe(true);
chmodSync(result.outputs[0].path, 0o111);
await using proc = Bun.spawn({
cmd: [result.outputs[0].path],
stdout: "pipe",
stderr: "pipe",
});
const [stdout, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]);
expect(stdout.trim()).toBe("exec-only-output");
expect(exitCode).toBe(0);
});
test("compiled binary with large payload runs correctly", async () => {
// Generate a string payload >16KB to exceed the initial .bun section allocation
// (BUN_COMPILED is aligned to 16KB). This forces the expansion path in elf.zig
// which appends data to the end of the file and converts PT_GNU_STACK to PT_LOAD.
const largeString = Buffer.alloc(20000, "x").toString();
using dir = tempDir("build-compile-large-payload", {
"app.js": `const data = "${largeString}"; console.log("large-payload-" + data.length);`,
});
const outfile = join(dir + "", "app-large");
const result = await Bun.build({
entrypoints: [join(dir + "", "app.js")],
compile: {
outfile,
},
});
expect(result.success).toBe(true);
await using proc = Bun.spawn({
cmd: [result.outputs[0].path],
stdout: "pipe",
stderr: "pipe",
});
const [stdout, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]);
expect(stdout).toContain("large-payload-20000");
expect(exitCode).toBe(0);
});
test("compiled binary with large payload runs with execute-only permissions", async () => {
// Same as above but also verifies execute-only works with the expansion path
const largeString = Buffer.alloc(20000, "y").toString();
using dir = tempDir("build-compile-large-exec-only", {
"app.js": `const data = "${largeString}"; console.log("large-exec-only-" + data.length);`,
});
const outfile = join(dir + "", "app-large-exec-only");
const result = await Bun.build({
entrypoints: [join(dir + "", "app.js")],
compile: {
outfile,
},
});
expect(result.success).toBe(true);
chmodSync(result.outputs[0].path, 0o111);
await using proc = Bun.spawn({
cmd: [result.outputs[0].path],
stdout: "pipe",
stderr: "pipe",
});
const [stdout, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]);
expect(stdout).toContain("large-exec-only-20000");
expect(exitCode).toBe(0);
});
test("compiled binary has .bun ELF section", async () => {
using dir = tempDir("build-compile-elf-section", {
"app.js": `console.log("elf-section-test");`,
});
const outfile = join(dir + "", "app-elf-section");
const result = await Bun.build({
entrypoints: [join(dir + "", "app.js")],
compile: {
outfile,
},
});
expect(result.success).toBe(true);
// Verify .bun ELF section exists by reading section headers
const file = Bun.file(result.outputs[0].path);
const bytes = new Uint8Array(await file.arrayBuffer());
// Parse ELF header to find section headers
const view = new DataView(bytes.buffer);
// e_shoff at offset 40 (little-endian u64)
const shoff = Number(view.getBigUint64(40, true));
// e_shentsize at offset 58
const shentsize = view.getUint16(58, true);
// e_shnum at offset 60
const shnum = view.getUint16(60, true);
// e_shstrndx at offset 62
const shstrndx = view.getUint16(62, true);
// Read .shstrtab section header to get string table
const strtabOff = shoff + shstrndx * shentsize;
const strtabFileOffset = Number(view.getBigUint64(strtabOff + 24, true));
const strtabSize = Number(view.getBigUint64(strtabOff + 32, true));
const decoder = new TextDecoder();
let foundBunSection = false;
for (let i = 0; i < shnum; i++) {
const hdrOff = shoff + i * shentsize;
const nameIdx = view.getUint32(hdrOff, true);
if (nameIdx < strtabSize) {
// Read null-terminated string from strtab
let end = strtabFileOffset + nameIdx;
while (end < bytes.length && bytes[end] !== 0) end++;
const name = decoder.decode(bytes.slice(strtabFileOffset + nameIdx, end));
if (name === ".bun") {
foundBunSection = true;
// Verify the section has non-zero size
const shSize = Number(view.getBigUint64(hdrOff + 32, true));
expect(shSize).toBeGreaterThan(0);
break;
}
}
}
expect(foundBunSection).toBe(true);
});
});
}
// file command test works well

View File

@@ -36,8 +36,8 @@
"std.enums.tagName(": 2,
"std.fs.Dir": 164,
"std.fs.File": 93,
"std.fs.cwd": 110,
"std.fs.openFileAbsolute": 10,
"std.fs.cwd": 109,
"std.fs.openFileAbsolute": 8,
"std.log": 1,
"std.mem.indexOfAny(u8": 0,
"std.unicode": 27,

View File

@@ -1,203 +0,0 @@
// Exercises Bun's SIMD code paths to verify the baseline binary doesn't
// emit instructions beyond its CPU target (no AVX on x64, no LSE/SVE on aarch64).
//
// Each test uses inputs large enough to hit vectorized fast paths (>= 16 bytes
// for @Vector(16, u8), >= 64 bytes for wider paths) and validates correctness
// to catch both SIGILL and miscompilation from wrong instruction lowering.
import { describe, expect, test } from "bun:test";
// Use Buffer.alloc instead of "x".repeat() — repeat is slow in debug JSC builds.
const ascii256 = Buffer.alloc(256, "a").toString();
const ascii1k = Buffer.alloc(1024, "x").toString();
describe("escapeHTML — @Vector(16, u8) gated by enableSIMD", () => {
test("clean passthrough", () => {
expect(Bun.escapeHTML(ascii256)).toBe(ascii256);
});
test("ampersand in middle", () => {
const input = ascii256 + "&" + ascii256;
const escaped = Bun.escapeHTML(input);
expect(escaped).toContain("&amp;");
// The raw "&" should have been replaced — only "&amp;" should remain
expect(escaped.replaceAll("&amp;", "").includes("&")).toBe(false);
});
test("all special chars", () => {
const input = '<div class="test">' + ascii256 + "</div>";
const escaped = Bun.escapeHTML(input);
expect(escaped).toContain("&lt;");
expect(escaped).toContain("&gt;");
expect(escaped).toContain("&quot;");
});
});
describe("stringWidth — @Vector(16, u8) ungated", () => {
test("ascii", () => {
expect(Bun.stringWidth(ascii256)).toBe(256);
});
test("empty", () => {
expect(Bun.stringWidth("")).toBe(0);
});
test("tabs", () => {
expect(Bun.stringWidth(Buffer.alloc(32, "\t").toString())).toBe(0);
});
test("mixed printable and zero-width", () => {
const mixed = "hello" + "\x00".repeat(64) + "world";
expect(Bun.stringWidth(mixed)).toBe(10);
});
});
describe("Buffer hex encoding — @Vector(16, u8) gated by enableSIMD", () => {
test.each([16, 32, 64, 128, 256])("size %d", size => {
const buf = Buffer.alloc(size, 0xab);
const hex = buf.toString("hex");
expect(hex.length).toBe(size * 2);
expect(hex).toBe("ab".repeat(size));
});
test("all byte values", () => {
const varied = Buffer.alloc(256);
for (let i = 0; i < 256; i++) varied[i] = i;
const hex = varied.toString("hex");
expect(hex).toStartWith("000102030405");
expect(hex).toEndWith("fdfeff");
});
});
describe("base64 — simdutf runtime dispatch", () => {
test("ascii roundtrip", () => {
const encoded = btoa(ascii1k);
expect(atob(encoded)).toBe(ascii1k);
});
test("binary roundtrip", () => {
const binary = String.fromCharCode(...Array.from({ length: 256 }, (_, i) => i));
expect(atob(btoa(binary))).toBe(binary);
});
});
describe("TextEncoder/TextDecoder — simdutf runtime dispatch", () => {
const encoder = new TextEncoder();
const decoder = new TextDecoder();
test("ascii roundtrip", () => {
const bytes = encoder.encode(ascii1k);
expect(bytes.length).toBe(1024);
expect(decoder.decode(bytes)).toBe(ascii1k);
});
test("mixed ascii + multibyte", () => {
const mixed = ascii256 + "\u00e9\u00e9\u00e9" + ascii256 + "\u2603\u2603" + ascii256;
expect(decoder.decode(encoder.encode(mixed))).toBe(mixed);
});
test("emoji surrogate pairs", () => {
const emoji = "\u{1F600}".repeat(64);
expect(decoder.decode(encoder.encode(emoji))).toBe(emoji);
});
});
describe("decodeURIComponent — SIMD % scanning", () => {
test("clean passthrough", () => {
const clean = Buffer.alloc(256, "a").toString();
expect(decodeURIComponent(clean)).toBe(clean);
});
test("encoded at various positions", () => {
const input = "a".repeat(128) + "%20" + "b".repeat(128) + "%21";
expect(decodeURIComponent(input)).toBe("a".repeat(128) + " " + "b".repeat(128) + "!");
});
test("heavy utf8 encoding", () => {
const input = Array.from({ length: 64 }, () => "%C3%A9").join("");
expect(decodeURIComponent(input)).toBe("\u00e9".repeat(64));
});
});
describe("URL parsing — Highway indexOfChar/indexOfAny", () => {
test("long URL with all components", () => {
const longPath = "/" + "segment/".repeat(32) + "end";
const url = new URL("https://user:pass@example.com:8080" + longPath + "?key=value&foo=bar#section");
expect(url.protocol).toBe("https:");
expect(url.hostname).toBe("example.com");
expect(url.port).toBe("8080");
expect(url.pathname).toBe(longPath);
expect(url.search).toBe("?key=value&foo=bar");
expect(url.hash).toBe("#section");
});
});
describe("JSON — JS lexer SIMD string scanning", () => {
test("large object roundtrip", () => {
const obj: Record<string, string> = {};
for (let i = 0; i < 100; i++) {
obj["key_" + Buffer.alloc(32, "a").toString() + "_" + i] = "value_" + Buffer.alloc(64, "b").toString() + "_" + i;
}
const parsed = JSON.parse(JSON.stringify(obj));
expect(Object.keys(parsed).length).toBe(100);
expect(parsed["key_" + Buffer.alloc(32, "a").toString() + "_0"]).toBe(
"value_" + Buffer.alloc(64, "b").toString() + "_0",
);
});
test("string with escape sequences", () => {
const original = { msg: 'quote"here\nand\ttab' + Buffer.alloc(256, "x").toString() };
const reparsed = JSON.parse(JSON.stringify(original));
expect(reparsed.msg).toBe(original.msg);
});
});
describe("HTTP parsing — llhttp SSE4.2 PCMPESTRI", () => {
test("long headers", async () => {
const longHeaderValue = Buffer.alloc(512, "v").toString();
using server = Bun.serve({
port: 0,
fetch(req) {
return new Response(req.headers.get("X-Test-Header") || "missing");
},
});
const resp = await fetch(`http://localhost:${server.port}/` + "path/".repeat(20), {
headers: {
"X-Test-Header": longHeaderValue,
"X-Header-A": Buffer.alloc(64, "a").toString(),
"X-Header-B": Buffer.alloc(64, "b").toString(),
"X-Header-C": Buffer.alloc(64, "c").toString(),
"Accept": "application/json",
"Accept-Language": "en-US,en;q=0.9,fr;q=0.8,de;q=0.7",
},
});
expect(await resp.text()).toBe(longHeaderValue);
});
});
describe("Latin-1 to UTF-8 — @Vector(16, u8) ungated", () => {
test("full byte range", () => {
const latin1Bytes = Buffer.alloc(256);
for (let i = 0; i < 256; i++) latin1Bytes[i] = i;
const latin1Str = latin1Bytes.toString("latin1");
const utf8Buf = Buffer.from(latin1Str, "utf-8");
expect(utf8Buf.length).toBeGreaterThan(256);
expect(utf8Buf.toString("utf-8").length).toBe(256);
});
});
describe("String search — Highway memMem/indexOfChar", () => {
test("indexOf long string", () => {
const haystack = Buffer.alloc(1000, "a").toString() + "needle" + Buffer.alloc(1000, "b").toString();
expect(haystack.indexOf("needle")).toBe(1000);
expect(haystack.indexOf("missing")).toBe(-1);
expect(haystack.lastIndexOf("needle")).toBe(1000);
});
test("includes long string", () => {
const haystack = Buffer.alloc(1000, "a").toString() + "needle" + Buffer.alloc(1000, "b").toString();
expect(haystack.includes("needle")).toBe(true);
expect(haystack.includes("missing")).toBe(false);
});
});

View File

@@ -2,6 +2,6 @@
{
"package": "elysia",
"repository": "https://github.com/elysiajs/elysia",
"tag": "1.4.25"
"tag": "1.4.12"
}
]