diff --git a/src/StandaloneModuleGraph.zig b/src/StandaloneModuleGraph.zig index 070b459eca..b4123564cd 100644 --- a/src/StandaloneModuleGraph.zig +++ b/src/StandaloneModuleGraph.zig @@ -429,16 +429,49 @@ pub const StandaloneModuleGraph = struct { const bytecode: StringPointer = brk: { if (output_file.bytecode_index != std.math.maxInt(u32)) { - // Use up to 256 byte alignment for bytecode - // Not aligning it correctly will cause a runtime assertion error, or a segfault. + // Bytecode alignment for JSC bytecode cache deserialization. + // Not aligning correctly causes a runtime assertion error or segfault. + // + // PLATFORM-SPECIFIC ALIGNMENT: + // - PE (Windows) and Mach-O (macOS): The module graph data is embedded in + // a dedicated section with an 8-byte size header. At runtime, the section + // is memory-mapped at a page-aligned address (hence 128-byte aligned). + // The data buffer starts 8 bytes after the section start. + // For bytecode at offset O to be 128-byte aligned: + // (section_va + 8 + O) % 128 == 0 + // => O % 128 == 120 + // + // - ELF (Linux): The module graph data is appended to the executable and + // read into a heap-allocated buffer at runtime. The allocator provides + // natural alignment, and there's no 8-byte section header offset. + // However, using target_mod=120 is still safe because: + // - If the buffer is 128-aligned: bytecode at offset 120 is at (128n + 120), + // which when loaded at a 128-aligned address gives proper alignment. + // - The extra 120 bytes of padding is acceptable overhead. + // + // This alignment strategy (target_mod=120) works for all platforms because + // it's the worst-case offset needed for the 8-byte header scenario. const bytecode = output_files[output_file.bytecode_index].value.buffer.bytes; - const aligned = std.mem.alignInSlice(string_builder.writable(), 128).?; - @memcpy(aligned[0..bytecode.len], bytecode[0..bytecode.len]); - const unaligned_space = aligned[bytecode.len..]; - const offset = @intFromPtr(aligned.ptr) - @intFromPtr(string_builder.ptr.?); + const current_offset = string_builder.len; + // Calculate padding so that (current_offset + padding) % 128 == 120 + // This accounts for the 8-byte section header on PE/Mach-O platforms. + const target_mod: usize = 128 - @sizeOf(u64); // 120 = accounts for 8-byte header + const current_mod = current_offset % 128; + const padding = if (current_mod <= target_mod) + target_mod - current_mod + else + 128 - current_mod + target_mod; + // Zero the padding bytes to ensure deterministic output + const writable = string_builder.writable(); + @memset(writable[0..padding], 0); + string_builder.len += padding; + const aligned_offset = string_builder.len; + const writable_after_padding = string_builder.writable(); + @memcpy(writable_after_padding[0..bytecode.len], bytecode[0..bytecode.len]); + const unaligned_space = writable_after_padding[bytecode.len..]; const len = bytecode.len + @min(unaligned_space.len, 128); string_builder.len += len; - break :brk StringPointer{ .offset = @truncate(offset), .length = @truncate(len) }; + break :brk StringPointer{ .offset = @truncate(aligned_offset), .length = @truncate(len) }; } else { break :brk .{}; } diff --git a/test/regression/issue/26298.test.ts b/test/regression/issue/26298.test.ts new file mode 100644 index 0000000000..3a1b36dd4f --- /dev/null +++ b/test/regression/issue/26298.test.ts @@ -0,0 +1,149 @@ +import { describe, expect, test } from "bun:test"; +import { bunEnv, bunExe, isWindows, tempDir } from "harness"; +import { join } from "path"; + +// Regression test for https://github.com/oven-sh/bun/issues/26298 +// Windows segfault when running standalone executables with bytecode cache. +// The crash occurred because bytecode offsets were not properly aligned +// when embedded in PE sections, causing deserialization failures. + +describe("issue #26298: bytecode cache in standalone executables", () => { + const ext = isWindows ? ".exe" : ""; + + test("standalone executable with --bytecode runs correctly", async () => { + using dir = tempDir("bytecode-standalone", { + "index.js": ` + const add = (a, b) => a + b; + const multiply = (x, y) => x * y; + console.log("sum:", add(2, 3)); + console.log("product:", multiply(4, 5)); + `, + }); + + const outfile = join(String(dir), `app${ext}`); + + // Build with bytecode + await using build = Bun.spawn({ + cmd: [bunExe(), "build", "--compile", "--bytecode", join(String(dir), "index.js"), "--outfile", outfile], + env: bunEnv, + cwd: String(dir), + stdout: "pipe", + stderr: "pipe", + }); + + const [, buildStderr, buildExitCode] = await Promise.all([build.stdout.text(), build.stderr.text(), build.exited]); + + expect(buildStderr).toBe(""); + expect(buildExitCode).toBe(0); + + // Run the compiled executable + await using exe = Bun.spawn({ + cmd: [outfile], + env: bunEnv, + stdout: "pipe", + stderr: "pipe", + }); + + const [exeStdout, , exeExitCode] = await Promise.all([exe.stdout.text(), exe.stderr.text(), exe.exited]); + + expect(exeStdout).toContain("sum: 5"); + expect(exeStdout).toContain("product: 20"); + // Should not crash with segfault + expect(exeExitCode).toBe(0); + }); + + test("standalone executable with --bytecode and multiple modules", async () => { + using dir = tempDir("bytecode-multi-module", { + "index.js": ` + import { greet } from "./greet.js"; + import { calculate } from "./math.js"; + console.log(greet("World")); + console.log("result:", calculate(10, 5)); + `, + "greet.js": ` + export function greet(name) { + return "Hello, " + name + "!"; + } + `, + "math.js": ` + export function calculate(a, b) { + return a * b + (a - b); + } + `, + }); + + const outfile = join(String(dir), `multi${ext}`); + + // Build with bytecode + await using build = Bun.spawn({ + cmd: [bunExe(), "build", "--compile", "--bytecode", join(String(dir), "index.js"), "--outfile", outfile], + env: bunEnv, + cwd: String(dir), + stdout: "pipe", + stderr: "pipe", + }); + + const [, buildStderr, buildExitCode] = await Promise.all([build.stdout.text(), build.stderr.text(), build.exited]); + + expect(buildStderr).toBe(""); + expect(buildExitCode).toBe(0); + + // Run the compiled executable + await using exe = Bun.spawn({ + cmd: [outfile], + env: bunEnv, + stdout: "pipe", + stderr: "pipe", + }); + + const [exeStdout, , exeExitCode] = await Promise.all([exe.stdout.text(), exe.stderr.text(), exe.exited]); + + expect(exeStdout).toContain("Hello, World!"); + expect(exeStdout).toContain("result: 55"); + // Should not crash with segfault + expect(exeExitCode).toBe(0); + }); + + test("standalone executable with --bytecode uses bytecode cache", async () => { + using dir = tempDir("bytecode-cache-hit", { + "app.js": `console.log("bytecode cache test");`, + }); + + const outfile = join(String(dir), `cached${ext}`); + + // Build with bytecode + await using build = Bun.spawn({ + cmd: [bunExe(), "build", "--compile", "--bytecode", join(String(dir), "app.js"), "--outfile", outfile], + env: bunEnv, + cwd: String(dir), + stdout: "pipe", + stderr: "pipe", + }); + + const [, buildStderr, buildExitCode] = await Promise.all([build.stdout.text(), build.stderr.text(), build.exited]); + + expect(buildStderr).toBe(""); + expect(buildExitCode).toBe(0); + + // Run with verbose disk cache to verify bytecode is being used + await using exe = Bun.spawn({ + cmd: [outfile], + env: { + ...bunEnv, + BUN_JSC_verboseDiskCache: "1", + }, + stdout: "pipe", + stderr: "pipe", + }); + + const [exeStdout, exeStderr, exeExitCode] = await Promise.all([exe.stdout.text(), exe.stderr.text(), exe.exited]); + + expect(exeStdout).toContain("bytecode cache test"); + // Check for cache hit message which confirms bytecode is being loaded. + // This relies on JSC's internal disk cache diagnostic output when + // BUN_JSC_verboseDiskCache=1 is set. The pattern is kept flexible to + // accommodate potential future changes in JSC's diagnostic format. + expect(exeStderr).toMatch(/\[Disk Cache\].*Cache hit/i); + expect(exeExitCode).toBe(0); + }); +});