fix(compile): ensure bytecode alignment accounts for section header (#26299)

## Summary

Fixes bytecode alignment in standalone executables to prevent crashes
when loading bytecode cache on Windows.

The bytecode offset needs to be aligned such that when loaded at
runtime, the bytecode pointer is 128-byte aligned. Previously, alignment
was based on arbitrary memory addresses during compilation, which didn't
account for the 8-byte section header prepended at runtime. This caused
the bytecode to be misaligned, leading to segfaults in
`JSC::CachedJSValue::decode` on Windows.

## Root Cause

At runtime, embedded data starts 8 bytes after the PE/Mach-O section
virtual address (which is page-aligned, hence 128-byte aligned). For
bytecode at offset `O` to be aligned:
```
(section_va + 8 + O) % 128 == 0
=> (8 + O) % 128 == 0
=> O % 128 == 120
```

The previous code used `std.mem.alignInSlice()` which found aligned
addresses based on the compilation buffer's arbitrary address, not
accounting for the 8-byte header offset at load time.

## Changes

- **`src/StandaloneModuleGraph.zig`**: Calculate bytecode offset to
satisfy `offset % 128 == 120` instead of using `alignInSlice`
- **`test/regression/issue/26298.test.ts`**: Added regression tests for
bytecode cache in standalone executables

## Test plan

- [x] Added regression test `test/regression/issue/26298.test.ts` with 3
test cases
- [x] Existing `HelloWorldBytecode` test passes
- [x] Build succeeds

Fixes #26298

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-authored-by: Claude Bot <claude-bot@bun.sh>
Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
robobun
2026-01-20 22:42:38 -08:00
committed by GitHub
parent bb4d150aed
commit 08103aa2ff
2 changed files with 189 additions and 7 deletions

View File

@@ -429,16 +429,49 @@ pub const StandaloneModuleGraph = struct {
const bytecode: StringPointer = brk: { const bytecode: StringPointer = brk: {
if (output_file.bytecode_index != std.math.maxInt(u32)) { if (output_file.bytecode_index != std.math.maxInt(u32)) {
// Use up to 256 byte alignment for bytecode // Bytecode alignment for JSC bytecode cache deserialization.
// Not aligning it correctly will cause a runtime assertion error, or a segfault. // Not aligning correctly causes a runtime assertion error or segfault.
//
// PLATFORM-SPECIFIC ALIGNMENT:
// - PE (Windows) and Mach-O (macOS): The module graph data is embedded in
// a dedicated section with an 8-byte size header. At runtime, the section
// is memory-mapped at a page-aligned address (hence 128-byte aligned).
// The data buffer starts 8 bytes after the section start.
// For bytecode at offset O to be 128-byte aligned:
// (section_va + 8 + O) % 128 == 0
// => O % 128 == 120
//
// - ELF (Linux): The module graph data is appended to the executable and
// read into a heap-allocated buffer at runtime. The allocator provides
// natural alignment, and there's no 8-byte section header offset.
// However, using target_mod=120 is still safe because:
// - If the buffer is 128-aligned: bytecode at offset 120 is at (128n + 120),
// which when loaded at a 128-aligned address gives proper alignment.
// - The extra 120 bytes of padding is acceptable overhead.
//
// This alignment strategy (target_mod=120) works for all platforms because
// it's the worst-case offset needed for the 8-byte header scenario.
const bytecode = output_files[output_file.bytecode_index].value.buffer.bytes; const bytecode = output_files[output_file.bytecode_index].value.buffer.bytes;
const aligned = std.mem.alignInSlice(string_builder.writable(), 128).?; const current_offset = string_builder.len;
@memcpy(aligned[0..bytecode.len], bytecode[0..bytecode.len]); // Calculate padding so that (current_offset + padding) % 128 == 120
const unaligned_space = aligned[bytecode.len..]; // This accounts for the 8-byte section header on PE/Mach-O platforms.
const offset = @intFromPtr(aligned.ptr) - @intFromPtr(string_builder.ptr.?); const target_mod: usize = 128 - @sizeOf(u64); // 120 = accounts for 8-byte header
const current_mod = current_offset % 128;
const padding = if (current_mod <= target_mod)
target_mod - current_mod
else
128 - current_mod + target_mod;
// Zero the padding bytes to ensure deterministic output
const writable = string_builder.writable();
@memset(writable[0..padding], 0);
string_builder.len += padding;
const aligned_offset = string_builder.len;
const writable_after_padding = string_builder.writable();
@memcpy(writable_after_padding[0..bytecode.len], bytecode[0..bytecode.len]);
const unaligned_space = writable_after_padding[bytecode.len..];
const len = bytecode.len + @min(unaligned_space.len, 128); const len = bytecode.len + @min(unaligned_space.len, 128);
string_builder.len += len; string_builder.len += len;
break :brk StringPointer{ .offset = @truncate(offset), .length = @truncate(len) }; break :brk StringPointer{ .offset = @truncate(aligned_offset), .length = @truncate(len) };
} else { } else {
break :brk .{}; break :brk .{};
} }

View File

@@ -0,0 +1,149 @@
import { describe, expect, test } from "bun:test";
import { bunEnv, bunExe, isWindows, tempDir } from "harness";
import { join } from "path";
// Regression test for https://github.com/oven-sh/bun/issues/26298
// Windows segfault when running standalone executables with bytecode cache.
// The crash occurred because bytecode offsets were not properly aligned
// when embedded in PE sections, causing deserialization failures.
describe("issue #26298: bytecode cache in standalone executables", () => {
const ext = isWindows ? ".exe" : "";
test("standalone executable with --bytecode runs correctly", async () => {
using dir = tempDir("bytecode-standalone", {
"index.js": `
const add = (a, b) => a + b;
const multiply = (x, y) => x * y;
console.log("sum:", add(2, 3));
console.log("product:", multiply(4, 5));
`,
});
const outfile = join(String(dir), `app${ext}`);
// Build with bytecode
await using build = Bun.spawn({
cmd: [bunExe(), "build", "--compile", "--bytecode", join(String(dir), "index.js"), "--outfile", outfile],
env: bunEnv,
cwd: String(dir),
stdout: "pipe",
stderr: "pipe",
});
const [, buildStderr, buildExitCode] = await Promise.all([build.stdout.text(), build.stderr.text(), build.exited]);
expect(buildStderr).toBe("");
expect(buildExitCode).toBe(0);
// Run the compiled executable
await using exe = Bun.spawn({
cmd: [outfile],
env: bunEnv,
stdout: "pipe",
stderr: "pipe",
});
const [exeStdout, , exeExitCode] = await Promise.all([exe.stdout.text(), exe.stderr.text(), exe.exited]);
expect(exeStdout).toContain("sum: 5");
expect(exeStdout).toContain("product: 20");
// Should not crash with segfault
expect(exeExitCode).toBe(0);
});
test("standalone executable with --bytecode and multiple modules", async () => {
using dir = tempDir("bytecode-multi-module", {
"index.js": `
import { greet } from "./greet.js";
import { calculate } from "./math.js";
console.log(greet("World"));
console.log("result:", calculate(10, 5));
`,
"greet.js": `
export function greet(name) {
return "Hello, " + name + "!";
}
`,
"math.js": `
export function calculate(a, b) {
return a * b + (a - b);
}
`,
});
const outfile = join(String(dir), `multi${ext}`);
// Build with bytecode
await using build = Bun.spawn({
cmd: [bunExe(), "build", "--compile", "--bytecode", join(String(dir), "index.js"), "--outfile", outfile],
env: bunEnv,
cwd: String(dir),
stdout: "pipe",
stderr: "pipe",
});
const [, buildStderr, buildExitCode] = await Promise.all([build.stdout.text(), build.stderr.text(), build.exited]);
expect(buildStderr).toBe("");
expect(buildExitCode).toBe(0);
// Run the compiled executable
await using exe = Bun.spawn({
cmd: [outfile],
env: bunEnv,
stdout: "pipe",
stderr: "pipe",
});
const [exeStdout, , exeExitCode] = await Promise.all([exe.stdout.text(), exe.stderr.text(), exe.exited]);
expect(exeStdout).toContain("Hello, World!");
expect(exeStdout).toContain("result: 55");
// Should not crash with segfault
expect(exeExitCode).toBe(0);
});
test("standalone executable with --bytecode uses bytecode cache", async () => {
using dir = tempDir("bytecode-cache-hit", {
"app.js": `console.log("bytecode cache test");`,
});
const outfile = join(String(dir), `cached${ext}`);
// Build with bytecode
await using build = Bun.spawn({
cmd: [bunExe(), "build", "--compile", "--bytecode", join(String(dir), "app.js"), "--outfile", outfile],
env: bunEnv,
cwd: String(dir),
stdout: "pipe",
stderr: "pipe",
});
const [, buildStderr, buildExitCode] = await Promise.all([build.stdout.text(), build.stderr.text(), build.exited]);
expect(buildStderr).toBe("");
expect(buildExitCode).toBe(0);
// Run with verbose disk cache to verify bytecode is being used
await using exe = Bun.spawn({
cmd: [outfile],
env: {
...bunEnv,
BUN_JSC_verboseDiskCache: "1",
},
stdout: "pipe",
stderr: "pipe",
});
const [exeStdout, exeStderr, exeExitCode] = await Promise.all([exe.stdout.text(), exe.stderr.text(), exe.exited]);
expect(exeStdout).toContain("bytecode cache test");
// Check for cache hit message which confirms bytecode is being loaded.
// This relies on JSC's internal disk cache diagnostic output when
// BUN_JSC_verboseDiskCache=1 is set. The pattern is kept flexible to
// accommodate potential future changes in JSC's diagnostic format.
expect(exeStderr).toMatch(/\[Disk Cache\].*Cache hit/i);
expect(exeExitCode).toBe(0);
});
});