Compare commits

...

18 Commits

Author SHA1 Message Date
Jarred Sumner
f1c3375108 fix 2024-12-09 08:34:40 +01:00
Jarred Sumner
592c06213e .bun section 2024-12-09 08:26:30 +01:00
Jarred Sumner
a537a995b5 do elf version 2024-12-08 22:55:59 -08:00
Jarred Sumner
34777c926d Closer but not working yet. Some kind of alignment issue. 2024-12-08 09:34:06 -08:00
Jarred Sumner
fb6b1e8765 Update macho.zig 2024-12-08 07:48:26 -08:00
Jarred Sumner
a2b179f685 Update macho.zig 2024-12-08 07:35:15 -08:00
Jarred Sumner
12e1231144 Update macho.zig 2024-12-08 07:20:37 -08:00
Jarred Sumner
a598a44f57 Further 2024-12-08 05:33:46 -08:00
Jarred Sumner
9683a247c5 Update macho.zig 2024-12-08 04:44:25 -08:00
Jarred Sumner
aee40c4478 wip 2024-12-08 04:37:16 -08:00
Jarred Sumner
d8951cf1aa Update .gitignore 2024-12-08 04:36:42 -08:00
Jarred Sumner
d7e88d6205 Merge branch 'main' into jarred/macho 2024-12-08 02:18:17 -08:00
Jarred Sumner
4ef87e950a Merge branch 'main' into jarred/macho 2024-12-05 02:34:33 -08:00
Jarred Sumner
7c034acc5d its starting to work 2024-12-01 13:01:59 -08:00
Jarred Sumner
413c46729c Update macho.zig 2024-12-01 12:39:57 -08:00
Jarred Sumner
413c8232dd Update macho.zig 2024-12-01 12:30:24 -08:00
Jarred Sumner
dc9ec11671 Update macho.zig 2024-12-01 12:30:08 -08:00
Jarred Sumner
5e738f8b48 wip 2024-12-01 12:29:40 -08:00
8 changed files with 1097 additions and 84 deletions

3
.gitignore vendored
View File

@@ -176,4 +176,5 @@ test/js/third_party/prisma/prisma/sqlite/dev.db-journal
.buildkite/ci.yml
*.sock
scratch*.{js,ts,tsx,cjs,mjs}
scratch*.{js,ts,tsx,cjs,mjs}
*.bun-build

View File

@@ -12,6 +12,7 @@ const Syscall = bun.sys;
const SourceMap = bun.sourcemap;
const StringPointer = bun.StringPointer;
const macho = bun.macho;
const w = std.os.windows;
pub const StandaloneModuleGraph = struct {
@@ -96,6 +97,35 @@ pub const StandaloneModuleGraph = struct {
cjs = 2,
};
const Macho = struct {
pub extern "C" fn Bun__getStandaloneModuleGraphMachoLength() ?*align(1) u32;
pub fn getData() ?[]const u8 {
if (Bun__getStandaloneModuleGraphMachoLength()) |length| {
if (length.* < 8) {
return null;
}
const slice_ptr: [*]const u8 = @ptrCast(length);
return slice_ptr[4..][0..length.*];
}
return null;
}
};
const ELF = struct {
pub extern "C" fn Bun__getStandaloneModuleGraphElfLength() ?*align(1) u32;
pub fn getData() ?[]const u8 {
if (Bun__getStandaloneModuleGraphElfLength()) |length| {
const slice_ptr: [*]const u8 = @ptrCast(length);
return slice_ptr[4..][0..length.*];
}
return null;
}
};
pub const File = struct {
name: []const u8 = "",
loader: bun.options.Loader,
@@ -430,7 +460,7 @@ pub const StandaloneModuleGraph = struct {
else
std.mem.page_size;
pub fn inject(bytes: []const u8, self_exe: [:0]const u8) bun.FileDescriptor {
pub fn inject(bytes: []const u8, self_exe: [:0]const u8, target: *const CompileTarget) bun.FileDescriptor {
var buf: bun.PathBuffer = undefined;
var zname: [:0]const u8 = bun.span(bun.fs.FileSystem.instance.tmpname("bun-build", &buf, @as(u64, @bitCast(std.time.milliTimestamp()))) catch |err| {
Output.prettyErrorln("<r><red>error<r><d>:<r> failed to get temporary file name: {s}", .{@errorName(err)});
@@ -439,8 +469,8 @@ pub const StandaloneModuleGraph = struct {
const cleanup = struct {
pub fn toClean(name: [:0]const u8, fd: bun.FileDescriptor) void {
_ = Syscall.close(fd);
_ = Syscall.unlink(name);
_ = Syscall.close(fd);
}
}.toClean;
@@ -565,74 +595,167 @@ pub const StandaloneModuleGraph = struct {
break :brk fd;
};
var total_byte_count: usize = undefined;
switch (target.os) {
.mac => {
const input_result = bun.sys.File.readToEnd(.{ .handle = cloned_executable_fd }, bun.default_allocator);
if (input_result.err) |err| {
Output.prettyErrorln("Error reading standalone module graph: {}", .{err});
cleanup(zname, cloned_executable_fd);
Global.exit(1);
}
var macho_file = bun.macho.MachoFile.init(bun.default_allocator, input_result.bytes.items, bytes.len) catch |err| {
Output.prettyErrorln("Error initializing standalone module graph: {}", .{err});
cleanup(zname, cloned_executable_fd);
Global.exit(1);
};
defer macho_file.deinit();
macho_file.writeSection(bytes) catch |err| {
Output.prettyErrorln("Error writing standalone module graph: {}", .{err});
cleanup(zname, cloned_executable_fd);
Global.exit(1);
};
input_result.bytes.deinit();
if (Environment.isWindows) {
total_byte_count = bytes.len + 8 + (Syscall.setFileOffsetToEndWindows(cloned_executable_fd).unwrap() catch |err| {
Output.prettyErrorln("<r><red>error<r><d>:<r> failed to seek to end of temporary file\n{}", .{err});
cleanup(zname, cloned_executable_fd);
Global.exit(1);
});
} else {
const seek_position = @as(u64, @intCast(brk: {
const fstat = switch (Syscall.fstat(cloned_executable_fd)) {
.result => |res| res,
switch (Syscall.setFileOffset(cloned_executable_fd, 0)) {
.err => |err| {
Output.prettyErrorln("{}", .{err});
Output.prettyErrorln("Error seeking to start of temporary file: {}", .{err});
cleanup(zname, cloned_executable_fd);
Global.exit(1);
},
else => {},
}
var file = bun.sys.File{ .handle = cloned_executable_fd };
const writer = file.writer();
const BufferedWriter = std.io.BufferedWriter(512 * 1024, @TypeOf(writer));
var buffered_writer = bun.default_allocator.create(BufferedWriter) catch bun.outOfMemory();
buffered_writer.* = .{
.unbuffered_writer = writer,
};
macho_file.buildAndSign(buffered_writer.writer()) catch |err| {
Output.prettyErrorln("Error writing standalone module graph: {}", .{err});
cleanup(zname, cloned_executable_fd);
Global.exit(1);
};
buffered_writer.flush() catch |err| {
Output.prettyErrorln("Error flushing standalone module graph: {}", .{err});
cleanup(zname, cloned_executable_fd);
Global.exit(1);
};
if (comptime !Environment.isWindows) {
_ = bun.C.fchmod(cloned_executable_fd.int(), 0o777);
}
return cloned_executable_fd;
},
.linux => {
var input_result = bun.sys.File.readToEnd(.{ .handle = cloned_executable_fd }, bun.default_allocator);
if (input_result.err) |err| {
Output.prettyErrorln("Error reading standalone module graph: {}", .{err});
cleanup(zname, cloned_executable_fd);
Global.exit(1);
}
defer input_result.bytes.deinit();
const elf_file = bun.elf.embedBinaryData(bun.default_allocator, input_result.bytes.items, bytes) catch |err| {
Output.prettyErrorln("Error embedding standalone module graph: {}", .{err});
cleanup(zname, cloned_executable_fd);
Global.exit(1);
};
defer bun.default_allocator.free(elf_file);
break :brk @max(fstat.size, 0);
}));
switch (Syscall.setFileOffset(cloned_executable_fd, 0)) {
.err => |err| {
Output.prettyErrorln("Error seeking to start of temporary file: {}", .{err});
cleanup(zname, cloned_executable_fd);
Global.exit(1);
},
else => {},
}
total_byte_count = seek_position + bytes.len + 8;
switch (bun.sys.File.writeAll(.{ .handle = cloned_executable_fd }, elf_file)) {
.err => |err| {
Output.prettyErrorln("Error writing standalone module graph: {}", .{err});
cleanup(zname, cloned_executable_fd);
Global.exit(1);
},
else => {},
}
// From https://man7.org/linux/man-pages/man2/lseek.2.html
//
// lseek() allows the file offset to be set beyond the end of the
// file (but this does not change the size of the file). If data is
// later written at this point, subsequent reads of the data in the
// gap (a "hole") return null bytes ('\0') until data is actually
// written into the gap.
//
switch (Syscall.setFileOffset(cloned_executable_fd, seek_position)) {
.err => |err| {
Output.prettyErrorln(
"{}\nwhile seeking to end of temporary file (pos: {d})",
.{
err,
seek_position,
if (comptime !Environment.isWindows) {
_ = bun.C.fchmod(cloned_executable_fd.int(), 0o777);
}
return cloned_executable_fd;
},
else => {
var total_byte_count: usize = undefined;
if (Environment.isWindows) {
total_byte_count = bytes.len + 8 + (Syscall.setFileOffsetToEndWindows(cloned_executable_fd).unwrap() catch |err| {
Output.prettyErrorln("<r><red>error<r><d>:<r> failed to seek to end of temporary file\n{}", .{err});
cleanup(zname, cloned_executable_fd);
Global.exit(1);
});
} else {
const seek_position = @as(u64, @intCast(brk: {
const fstat = switch (Syscall.fstat(cloned_executable_fd)) {
.result => |res| res,
.err => |err| {
Output.prettyErrorln("{}", .{err});
cleanup(zname, cloned_executable_fd);
Global.exit(1);
},
};
break :brk @max(fstat.size, 0);
}));
total_byte_count = seek_position + bytes.len + 8;
// From https://man7.org/linux/man-pages/man2/lseek.2.html
//
// lseek() allows the file offset to be set beyond the end of the
// file (but this does not change the size of the file). If data is
// later written at this point, subsequent reads of the data in the
// gap (a "hole") return null bytes ('\0') until data is actually
// written into the gap.
//
switch (Syscall.setFileOffset(cloned_executable_fd, seek_position)) {
.err => |err| {
Output.prettyErrorln(
"{}\nwhile seeking to end of temporary file (pos: {d})",
.{
err,
seek_position,
},
);
cleanup(zname, cloned_executable_fd);
Global.exit(1);
},
);
cleanup(zname, cloned_executable_fd);
Global.exit(1);
},
else => {},
}
else => {},
}
}
var remain = bytes;
while (remain.len > 0) {
switch (Syscall.write(cloned_executable_fd, bytes)) {
.result => |written| remain = remain[written..],
.err => |err| {
Output.prettyErrorln("<r><red>error<r><d>:<r> failed to write to temporary file\n{}", .{err});
cleanup(zname, cloned_executable_fd);
Global.exit(1);
},
}
}
// the final 8 bytes in the file are the length of the module graph with padding, excluding the trailer and offsets
_ = Syscall.write(cloned_executable_fd, std.mem.asBytes(&total_byte_count));
if (comptime !Environment.isWindows) {
_ = bun.C.fchmod(cloned_executable_fd.int(), 0o777);
}
return cloned_executable_fd;
},
}
var remain = bytes;
while (remain.len > 0) {
switch (Syscall.write(cloned_executable_fd, bytes)) {
.result => |written| remain = remain[written..],
.err => |err| {
Output.prettyErrorln("<r><red>error<r><d>:<r> failed to write to temporary file\n{}", .{err});
cleanup(zname, cloned_executable_fd);
Global.exit(1);
},
}
}
// the final 8 bytes in the file are the length of the module graph with padding, excluding the trailer and offsets
_ = Syscall.write(cloned_executable_fd, std.mem.asBytes(&total_byte_count));
if (comptime !Environment.isWindows) {
_ = bun.C.fchmod(cloned_executable_fd.int(), 0o777);
}
return cloned_executable_fd;
}
pub const CompileTarget = @import("./compile_target.zig");
@@ -675,6 +798,7 @@ pub const StandaloneModuleGraph = struct {
Output.err(err, "failed to download cross-compiled bun executable", .{});
Global.exit(1);
},
target,
);
fd.assertKind(.system);
@@ -708,29 +832,6 @@ pub const StandaloneModuleGraph = struct {
Global.exit(1);
};
if (comptime Environment.isMac) {
if (target.os == .mac) {
var signer = std.process.Child.init(
&.{
"codesign",
"--remove-signature",
temp_location,
},
bun.default_allocator,
);
if (bun.logger.Log.default_log_level.atLeast(.verbose)) {
signer.stdout_behavior = .Inherit;
signer.stderr_behavior = .Inherit;
signer.stdin_behavior = .Inherit;
} else {
signer.stdout_behavior = .Ignore;
signer.stderr_behavior = .Ignore;
signer.stdin_behavior = .Ignore;
}
_ = signer.spawnAndWait() catch {};
}
}
bun.C.moveFileZWithHandle(
fd,
bun.FD.cwd(),
@@ -752,6 +853,37 @@ pub const StandaloneModuleGraph = struct {
}
pub fn fromExecutable(allocator: std.mem.Allocator) !?StandaloneModuleGraph {
if (comptime Environment.isLinux) {
const elf_bytes = ELF.getData() orelse return null;
if (elf_bytes.len < @sizeOf(Offsets) + trailer.len) {
Output.debugWarn("bun standalone module graph is too small to be valid", .{});
return null;
}
const elf_bytes_slice = elf_bytes[elf_bytes.len - @sizeOf(Offsets) - trailer.len ..];
const trailer_bytes = elf_bytes[elf_bytes.len - trailer.len ..][0..trailer.len];
if (!bun.strings.eqlComptime(trailer_bytes, trailer)) {
Output.debugWarn("bun standalone module graph has invalid trailer", .{});
return null;
}
const offsets = std.mem.bytesAsValue(Offsets, elf_bytes_slice).*;
return try StandaloneModuleGraph.fromBytes(allocator, @constCast(elf_bytes), offsets);
}
if (comptime Environment.isMac) {
const macho_bytes = Macho.getData() orelse return null;
if (macho_bytes.len < @sizeOf(Offsets) + trailer.len) {
Output.debugWarn("bun standalone module graph is too small to be valid", .{});
return null;
}
const macho_bytes_slice = macho_bytes[macho_bytes.len - @sizeOf(Offsets) - trailer.len ..];
const trailer_bytes = macho_bytes[macho_bytes.len - trailer.len ..][0..trailer.len];
if (!bun.strings.eqlComptime(trailer_bytes, trailer)) {
Output.debugWarn("bun standalone module graph has invalid trailer", .{});
return null;
}
const offsets = std.mem.bytesAsValue(Offsets, macho_bytes_slice).*;
return try StandaloneModuleGraph.fromBytes(allocator, @constCast(macho_bytes), offsets);
}
// Do not invoke libuv here.
const self_exe = openSelf() catch return null;
defer _ = Syscall.close(self_exe);

View File

@@ -860,3 +860,45 @@ extern "C" void Bun__unregisterSignalsForForwarding()
}
#endif
#if OS(DARWIN)
#if CPU(ARM64)
#define BLOB_HEADER_ALIGNMENT 16 * 1024
#else
#define BLOB_HEADER_ALIGNMENT 4 * 1024
#endif
extern "C" {
struct BlobHeader {
uint32_t size;
uint8_t data[];
} __attribute__((aligned(BLOB_HEADER_ALIGNMENT)));
}
extern "C" BlobHeader __attribute__((section("__BUN,__bun"))) BUN_COMPILED = { 0, 0 };
extern "C" uint32_t* Bun__getStandaloneModuleGraphMachoLength()
{
return &BUN_COMPILED.size;
}
#endif
#if OS(LINUX)
#define BLOB_HEADER_ALIGNMENT 4 * 1024
extern "C" {
struct BlobHeader {
uint32_t size;
uint8_t data[];
} __attribute__((aligned(BLOB_HEADER_ALIGNMENT)));
}
extern "C" BlobHeader __attribute__((section(".bun"), used)) BUN_COMPILED_ELF = { 0, 0 };
extern "C" uint32_t* Bun__getStandaloneModuleGraphElfLength()
{
return &BUN_COMPILED_ELF.size;
}
#endif

View File

@@ -4118,3 +4118,6 @@ pub inline fn isComptimeKnown(x: anytype) bool {
pub inline fn itemOrNull(comptime T: type, slice: []const T, index: usize) ?T {
return if (index < slice.len) slice[index] else null;
}
pub const macho = @import("./macho.zig");
pub const elf = @import("./elf.zig");

View File

@@ -491,6 +491,15 @@ pub extern fn set_process_priority(pid: c_uint, priority: c_int) i32;
pub extern fn strncasecmp(s1: [*]const u8, s2: [*]const u8, n: usize) i32;
pub extern fn memmove(dest: [*]u8, src: [*]const u8, n: usize) void;
pub fn move(dest: []u8, src: []const u8) void {
if (comptime Environment.allow_assert) {
if (src.len != dest.len) {
bun.Output.panic("Move: src.len != dest.len, {d} != {d}", .{ src.len, dest.len });
}
}
memmove(dest.ptr, src.ptr, src.len);
}
// https://man7.org/linux/man-pages/man3/fmod.3.html
pub extern fn fmod(f64, f64) f64;

252
src/elf.zig Normal file
View File

@@ -0,0 +1,252 @@
const std = @import("std");
const mem = std.mem;
const fs = std.fs;
const io = std.io;
const macho = std.macho;
const Allocator = mem.Allocator;
const bun = @import("root").bun;
const elf = std.elf;
pub const BlobAlignment = 4 * 1024;
pub const ElfError = error{
InvalidElfFile,
SectionNotFound,
SymbolNotFound,
InvalidSectionType,
NotEnoughSpace,
InvalidAlignment,
SectionHasRelocations,
SectionInGroup,
CompressedSectionNotSupported,
InvalidSectionFlags,
} || Allocator.Error;
/// Embeds binary data into an ELF executable by modifying the ".bun" section
pub fn embedBinaryData(allocator: Allocator, input_elf: []const u8, data_to_embed: []const u8) ElfError![]u8 {
// Parse ELF header
if (input_elf.len < @sizeOf(elf.Elf64_Ehdr)) {
return error.InvalidElfFile;
}
const elf_header = @as(*align(1) const elf.Elf64_Ehdr, @ptrCast(input_elf.ptr)).*;
// Verify ELF magic
if (!mem.eql(u8, elf_header.e_ident[0..4], "\x7fELF")) {
return error.InvalidElfFile;
}
// Parse section headers
const sh_offset = elf_header.e_shoff;
const sh_size = @as(usize, @intCast(elf_header.e_shentsize)) * @as(usize, @intCast(elf_header.e_shnum));
if (sh_offset + sh_size > input_elf.len) {
return error.InvalidElfFile;
}
const sections = @as([*]const elf.Elf64_Shdr, @alignCast(@ptrCast(input_elf.ptr + sh_offset)))[0..elf_header.e_shnum];
// Find string table section
const strtab = sections[elf_header.e_shstrndx];
const strtab_data = input_elf[strtab.sh_offset..][0..strtab.sh_size];
// Find .bun section
var bun_data_section: ?*elf.Elf64_Shdr = null;
for (sections) |*section| {
const name = mem.sliceTo(@as([*:0]const u8, @ptrCast(strtab_data.ptr + section.sh_name)), 0);
if (section.sh_name >= strtab_data.len) {
return error.InvalidElfFile;
}
if (mem.eql(u8, name, ".bun")) {
bun_data_section = @constCast(section);
break;
}
}
const data_section = bun_data_section orelse return error.SectionNotFound;
// Verify section is writable and has enough space
if (data_section.sh_type != elf.SHT_PROGBITS) {
return error.InvalidSectionType;
}
// Check section flags - it should be writable and allocated
if ((data_section.sh_flags & (elf.SHF_WRITE | elf.SHF_ALLOC)) != (elf.SHF_WRITE | elf.SHF_ALLOC)) {
return error.InvalidSectionFlags;
}
const required_size = mem.alignForward(usize, @sizeOf(u32) + data_to_embed.len, @max(BlobAlignment, data_section.sh_addralign));
// Calculate new file size if we need to expand
const size_difference = if (data_section.sh_size < required_size)
required_size - data_section.sh_size
else
0;
// Create output buffer with potentially increased size
const output = try allocator.alloc(u8, input_elf.len + size_difference);
errdefer allocator.free(output);
// Copy everything up to the section that needs expansion
@memcpy(output[0..data_section.sh_offset], input_elf[0..data_section.sh_offset]);
// Write our data
const out_ptr = @as([*]u8, @ptrCast(output.ptr + data_section.sh_offset));
mem.writeInt(u32, out_ptr[0..4], @as(u32, @intCast(data_to_embed.len)), .little);
@memcpy(out_ptr[4..][0..data_to_embed.len], data_to_embed);
// If we didn't need to expand, copy the rest of the file
if (size_difference == 0) {
const remaining_offset = data_section.sh_offset + data_section.sh_size;
@memcpy(
output[remaining_offset..],
input_elf[remaining_offset..],
);
return output;
}
// If we expanded, we need to:
// 1. Update section header for .bun
const output_sections = @as([*]elf.Elf64_Shdr, @alignCast(@ptrCast(output.ptr + elf_header.e_shoff)))[0..elf_header.e_shnum];
// 2. Copy remaining sections and adjust their offsets
const current_offset = data_section.sh_offset + required_size;
const section_end = data_section.sh_offset + data_section.sh_size;
// Copy remaining file contents with adjusted offsets
@memcpy(
output[current_offset..][0..input_elf[section_end..].len],
input_elf[section_end..],
);
// Find and update our section in the output buffer
for (output_sections) |*section| {
const name = mem.sliceTo(@as([*:0]const u8, @ptrCast(strtab_data.ptr + section.sh_name)), 0);
if (mem.eql(u8, name, ".bun")) {
section.sh_size = required_size;
break;
}
}
// 3. Update section headers that come after our modified section
for (output_sections) |*section| {
if (section.sh_offset > data_section.sh_offset) {
section.sh_offset += size_difference;
}
}
// 4. Update ELF header if section header table was moved
if (elf_header.e_shoff > data_section.sh_offset) {
const output_header = @as(*align(1) elf.Elf64_Ehdr, @ptrCast(output.ptr));
output_header.e_shoff += size_difference;
}
// Update program headers if needed
const ph_offset = elf_header.e_phoff;
const ph_size = @as(usize, @intCast(elf_header.e_phentsize)) * @as(usize, @intCast(elf_header.e_phnum));
if (ph_offset + ph_size > input_elf.len) {
return error.InvalidElfFile;
}
const phdrs = @as([*]elf.Elf64_Phdr, @alignCast(@ptrCast(output.ptr + ph_offset)))[0..elf_header.e_phnum];
// Update any program headers that contain our section
for (phdrs) |*phdr| {
const segment_end = phdr.p_offset + phdr.p_filesz;
if (phdr.p_type == elf.PT_LOAD and
data_section.sh_offset >= phdr.p_offset and
data_section.sh_offset < segment_end)
{
// Update segment size if it contains our modified section
if (size_difference > 0) {
phdr.p_filesz += size_difference;
phdr.p_memsz += size_difference;
}
// Check alignment requirements
const new_size = phdr.p_offset + phdr.p_filesz + size_difference;
if (new_size % phdr.p_align != 0) {
return error.InvalidAlignment;
}
} else if (phdr.p_offset > data_section.sh_offset) {
// Adjust offset for segments that come after our section
phdr.p_offset += size_difference;
}
}
// Update virtual addresses for affected sections
for (output_sections) |*section| {
if (section.sh_addr > data_section.sh_addr) {
section.sh_addr += size_difference;
}
}
// Update virtual addresses in program headers
for (phdrs) |*phdr| {
if (phdr.p_vaddr > data_section.sh_addr) {
phdr.p_vaddr += size_difference;
phdr.p_paddr += size_difference;
}
}
// Find and update dynamic section if present
for (output_sections) |*section| {
if (section.sh_type == elf.SHT_DYNAMIC) {
const dynamic = @as([*]elf.Elf64_Dyn, @alignCast(@ptrCast(output.ptr + section.sh_offset)))[0..@divExact(section.sh_size, @sizeOf(elf.Elf64_Dyn))];
for (dynamic) |*dyn| {
// Update dynamic entries that contain file offsets
switch (dyn.d_tag) {
elf.DT_STRTAB, elf.DT_SYMTAB, elf.DT_RELA, elf.DT_REL, elf.DT_JMPREL, elf.DT_VERNEED, elf.DT_VERSYM => {
if (dyn.d_val > data_section.sh_offset) {
dyn.d_val += size_difference;
}
},
else => {},
}
}
}
}
// Find and update symbol tables
for (output_sections) |*section| {
if (section.sh_type == elf.SHT_SYMTAB or section.sh_type == elf.SHT_DYNSYM) {
const symbols = @as([*]elf.Elf64_Sym, @alignCast(@ptrCast(output.ptr + section.sh_offset)))[0..@divExact(section.sh_size, @sizeOf(elf.Elf64_Sym))];
for (symbols) |*sym| {
if (sym.st_value > data_section.sh_addr) {
sym.st_value += size_difference;
}
}
}
}
// Update relocations
for (output_sections) |*section| {
if (section.sh_type == elf.SHT_RELA) {
const relocations = @as([*]elf.Elf64_Rela, @alignCast(@ptrCast(output.ptr + section.sh_offset)))[0..@divExact(section.sh_size, @sizeOf(elf.Elf64_Rela))];
for (relocations) |*rela| {
if (rela.r_offset > data_section.sh_addr) {
rela.r_offset += size_difference;
}
}
}
}
// Update section groups if present
for (output_sections) |*section| {
if (section.sh_type == elf.SHT_GROUP) {
const group_members = @as([*]u32, @alignCast(@ptrCast(output.ptr + section.sh_offset)))[0..@divExact(section.sh_size, @sizeOf(u32))];
// Skip the flags word at the start
for (group_members[1..]) |*member| {
const member_section = output_sections[member.*];
if (member_section.sh_addr > data_section.sh_addr) {
member.* += @truncate(size_difference);
}
}
}
}
return output;
}

574
src/macho.zig Normal file
View File

@@ -0,0 +1,574 @@
const std = @import("std");
const mem = std.mem;
const fs = std.fs;
const io = std.io;
const macho = std.macho;
const Allocator = mem.Allocator;
const bun = @import("root").bun;
pub const SEGNAME_BUN = "__BUN\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00".*;
pub const SECTNAME = "__bun\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00".*;
const strings = bun.strings;
pub const MachoFile = struct {
header: macho.mach_header_64,
data: std.ArrayList(u8),
segment: macho.segment_command_64,
section: macho.section_64,
allocator: Allocator,
const LoadCommand = struct {
cmd: u32,
cmdsize: u32,
offset: usize,
};
pub fn init(allocator: Allocator, obj_file: []const u8, blob_to_embed_length: usize) !*MachoFile {
var data = try std.ArrayList(u8).initCapacity(allocator, obj_file.len + blob_to_embed_length);
try data.appendSlice(obj_file);
const header: *const macho.mach_header_64 = @alignCast(@ptrCast(data.items.ptr));
const self = try allocator.create(MachoFile);
errdefer allocator.destroy(self);
self.* = .{
.header = header.*,
.data = data,
.segment = std.mem.zeroes(macho.segment_command_64),
.section = std.mem.zeroes(macho.section_64),
.allocator = allocator,
};
return self;
}
pub fn deinit(self: *MachoFile) void {
self.data.deinit();
self.allocator.destroy(self);
}
pub fn writeSection(self: *MachoFile, data: []const u8) !void {
const blob_alignment: u64 = if (self.header.cputype == macho.CPU_TYPE_ARM64)
16 * 1024
else
4 * 1024;
const header_size = @sizeOf(u32);
const total_size = header_size + data.len;
const aligned_size = alignSize(total_size, blob_alignment);
// Look for existing __BUN,__BUN section
var original_fileoff: u64 = 0;
var original_vmaddr: u64 = 0;
var original_data_end: u64 = 0;
var original_segsize: u64 = blob_alignment;
// Use an index instead of a pointer to avoid issues with resizing the arraylist later.
var code_sign_cmd_idx: ?usize = null;
var linkedit_seg_idx: ?usize = null;
var found_bun = false;
var iter = self.iterator();
while (iter.next()) |entry| {
const cmd = entry.hdr;
switch (cmd.cmd) {
.SEGMENT_64 => {
const command = entry.cast(macho.segment_command_64).?;
if (strings.eqlComptime(command.segName(), "__BUN")) {
if (command.nsects > 0) {
const section_offset = @intFromPtr(entry.data.ptr) - @intFromPtr(self.data.items.ptr);
const sections = @as([*]macho.section_64, @ptrCast(@alignCast(&self.data.items[section_offset + @sizeOf(macho.segment_command_64)])))[0..command.nsects];
for (sections) |*sect| {
if (strings.eqlComptime(sect.sectName(), "__bun")) {
found_bun = true;
original_fileoff = sect.offset;
original_vmaddr = sect.addr;
original_data_end = original_fileoff + blob_alignment;
original_segsize = sect.size;
self.segment = command;
self.section = sect.*;
// Update segment with proper sizes and alignment
self.segment.vmsize = alignVmsize(aligned_size, blob_alignment);
self.segment.filesize = aligned_size;
self.segment.maxprot = macho.PROT.READ | macho.PROT.WRITE;
self.segment.initprot = macho.PROT.READ | macho.PROT.WRITE;
self.section = .{
.sectname = SECTNAME,
.segname = SEGNAME_BUN,
.addr = original_vmaddr,
.size = @intCast(total_size),
.offset = @intCast(original_fileoff),
.@"align" = @intFromFloat(@log2(@as(f64, @floatFromInt(blob_alignment)))),
.reloff = 0,
.nreloc = 0,
.flags = macho.S_REGULAR | macho.S_ATTR_NO_DEAD_STRIP,
.reserved1 = 0,
.reserved2 = 0,
.reserved3 = 0,
};
const entry_ptr: [*]u8 = @constCast(entry.data.ptr);
const segment_command_ptr: *align(1) macho.segment_command_64 = @ptrCast(@alignCast(entry_ptr));
segment_command_ptr.* = self.segment;
sect.* = self.section;
}
}
}
} else if (strings.eqlComptime(command.segName(), SEG_LINKEDIT)) {
linkedit_seg_idx = @intFromPtr(entry.data.ptr) - @intFromPtr(self.data.items.ptr);
}
},
.CODE_SIGNATURE => {
code_sign_cmd_idx = @intFromPtr(entry.data.ptr) - @intFromPtr(self.data.items.ptr);
},
else => {},
}
}
if (!found_bun) {
return error.InvalidObject;
}
// Calculate how much larger/smaller the section will be compared to its current size
const size_diff = @as(i64, @intCast(aligned_size)) - @as(i64, @intCast(original_segsize));
try self.data.ensureUnusedCapacity(@intCast(size_diff));
const code_sign_cmd: ?*align(1) macho.linkedit_data_command =
if (code_sign_cmd_idx) |idx|
@as(*align(1) macho.linkedit_data_command, @ptrCast(@constCast(@alignCast(&self.data.items[idx]))))
else
null;
const linkedit_seg: *align(1) macho.segment_command_64 =
if (linkedit_seg_idx) |idx|
@as(*align(1) macho.segment_command_64, @ptrCast(@constCast(@alignCast(&self.data.items[idx]))))
else
return error.MissingLinkeditSegment;
// Handle code signature specially
var sig_data: ?[]u8 = null;
var sig_size: usize = 0;
defer if (sig_data) |sd| self.allocator.free(sd);
const prev_data_slice = self.data.items[original_fileoff..];
self.data.items.len += @as(usize, @intCast(size_diff));
// Binary is:
// [header][...data before __BUN][__BUN][...data after __BUN]
// We need to shift [...data after __BUN] forward by size_diff bytes.
const after_bun_slice = self.data.items[original_data_end + @as(usize, @intCast(size_diff)) ..];
const prev_after_bun_slice = prev_data_slice[original_segsize..];
bun.C.move(after_bun_slice, prev_after_bun_slice);
// Now we copy the u32 size header
std.mem.writeInt(u32, self.data.items[original_fileoff..][0..4], @intCast(data.len), .little);
// Now we copy the data itself
@memcpy(self.data.items[original_fileoff + 4 ..][0..data.len], data);
// Lastly, we zero any of the padding that was added
const padding_bytes = self.data.items[original_fileoff..][data.len + 4 .. aligned_size];
@memset(padding_bytes, 0);
if (code_sign_cmd) |cs| {
sig_size = cs.datasize;
// Save existing signature if present
sig_data = try self.allocator.alloc(u8, sig_size);
@memcpy(sig_data.?, self.data.items[cs.dataoff..][0..sig_size]);
}
// Only update offsets if the size actually changed
if (size_diff != 0) {
linkedit_seg.fileoff += @as(usize, @intCast(size_diff));
try self.updateLoadCommandOffsets(original_fileoff, @intCast(size_diff), linkedit_seg.fileoff, linkedit_seg.filesize);
}
if (code_sign_cmd) |cs| {
// Calculate new end of LINKEDIT excluding signature
var new_linkedit_end = linkedit_seg.fileoff + linkedit_seg.filesize;
if (sig_size > 0) {
new_linkedit_end -= sig_size;
}
// Place signature at new end
cs.dataoff = @intCast(new_linkedit_end);
}
try self.validateSegments();
}
const Shifter = struct {
start: u64,
amount: u64,
linkedit_fileoff: u64,
linkedit_filesize: u64,
fn do(value: u64, amount: u64, range_min: u64, range_max: u64) !u64 {
if (value == 0) return 0;
if (value < range_min) return error.OffsetOutOfRange;
if (value > range_max) return error.OffsetOutOfRange;
// Check for overflow
if (value > std.math.maxInt(u64) - amount) {
return error.OffsetOverflow;
}
return value + amount;
}
pub fn shift(this: *const Shifter, value: anytype, comptime fields: []const []const u8) !void {
inline for (fields) |field| {
@field(value, field) = @intCast(try do(@field(value, field), this.amount, this.start, this.linkedit_fileoff + this.linkedit_filesize));
}
}
};
// Helper function to update load command offsets when resizing an existing section
fn updateLoadCommandOffsets(self: *MachoFile, previous_fileoff: u64, size_diff: u64, new_linkedit_fileoff: u64, new_linkedit_filesize: u64) !void {
// Validate inputs
if (new_linkedit_fileoff < previous_fileoff) {
return error.InvalidLinkeditOffset;
}
const PAGE_SIZE: u64 = 1 << 12;
// Ensure all offsets are page-aligned
const aligned_previous = alignSize(previous_fileoff, PAGE_SIZE);
const aligned_linkedit = alignSize(new_linkedit_fileoff, PAGE_SIZE);
var iter = self.iterator();
// Create shifter with validated parameters
const shifter = Shifter{
.start = aligned_previous,
.amount = size_diff,
.linkedit_fileoff = aligned_linkedit,
.linkedit_filesize = new_linkedit_filesize,
};
while (iter.next()) |entry| {
const cmd = entry.hdr;
const cmd_ptr: [*]u8 = @constCast(entry.data.ptr);
switch (cmd.cmd) {
.SYMTAB => {
const symtab: *align(1) macho.symtab_command = @ptrCast(@alignCast(cmd_ptr));
try shifter.shift(symtab, &.{
"symoff",
"stroff",
});
},
.DYSYMTAB => {
const dysymtab: *align(1) macho.dysymtab_command = @ptrCast(@alignCast(cmd_ptr));
try shifter.shift(dysymtab, &.{
"tocoff",
"modtaboff",
"extrefsymoff",
"indirectsymoff",
"extreloff",
"locreloff",
});
},
.DYLD_CHAINED_FIXUPS,
.CODE_SIGNATURE,
.FUNCTION_STARTS,
.DATA_IN_CODE,
.DYLIB_CODE_SIGN_DRS,
.LINKER_OPTIMIZATION_HINT,
.DYLD_EXPORTS_TRIE,
=> {
const linkedit_cmd: *align(1) macho.linkedit_data_command = @ptrCast(@alignCast(cmd_ptr));
try shifter.shift(linkedit_cmd, &.{"dataoff"});
// Special handling for code signature
if (cmd.cmd == .CODE_SIGNATURE) {
// Ensure code signature is at the end of LINKEDIT
linkedit_cmd.dataoff = @intCast(new_linkedit_fileoff + new_linkedit_filesize - linkedit_cmd.datasize);
}
},
.DYLD_INFO, .DYLD_INFO_ONLY => {
const dyld_info: *align(1) macho.dyld_info_command = @ptrCast(@alignCast(cmd_ptr));
try shifter.shift(dyld_info, &.{
"rebase_off",
"bind_off",
"weak_bind_off",
"lazy_bind_off",
"export_off",
});
},
else => {},
}
}
}
pub fn iterator(self: *const MachoFile) macho.LoadCommandIterator {
return .{
.buffer = self.data.items[@sizeOf(macho.mach_header_64)..][0..self.header.sizeofcmds],
.ncmds = self.header.ncmds,
};
}
pub fn build(self: *MachoFile, writer: anytype) !void {
try writer.writeAll(self.data.items);
}
fn validateSegments(self: *MachoFile) !void {
var iter = self.iterator();
var prev_end: u64 = 0;
while (iter.next()) |entry| {
const cmd = entry.hdr;
if (cmd.cmd == .SEGMENT_64) {
const seg = entry.cast(macho.segment_command_64).?;
if (seg.fileoff < prev_end) {
return error.OverlappingSegments;
}
prev_end = seg.fileoff + seg.filesize;
}
}
}
pub fn buildAndSign(self: *MachoFile, writer: anytype) !void {
if (self.header.cputype == macho.CPU_TYPE_ARM64 and !bun.getRuntimeFeatureFlag("BUN_NO_CODESIGN_MACHO_BINARY")) {
var data = std.ArrayList(u8).init(self.allocator);
defer data.deinit();
try self.build(data.writer());
var signer = try MachoSigner.init(self.allocator, data.items);
defer signer.deinit();
try signer.sign(writer);
} else {
try self.build(writer);
}
}
const MachoSigner = struct {
data: std.ArrayList(u8),
sig_off: usize,
sig_sz: usize,
cs_cmd_off: usize,
linkedit_off: usize,
linkedit_seg: macho.segment_command_64,
text_seg: macho.segment_command_64,
allocator: Allocator,
pub fn init(allocator: Allocator, obj: []const u8) !*MachoSigner {
var self = try allocator.create(MachoSigner);
errdefer allocator.destroy(self);
const header = @as(*align(1) const macho.mach_header_64, @ptrCast(obj.ptr)).*;
const header_size = @sizeOf(macho.mach_header_64);
var sig_off: usize = 0;
var sig_sz: usize = 0;
var cs_cmd_off: usize = 0;
var linkedit_off: usize = 0;
var text_seg = std.mem.zeroes(macho.segment_command_64);
var linkedit_seg = std.mem.zeroes(macho.segment_command_64);
var it = macho.LoadCommandIterator{
.ncmds = header.ncmds,
.buffer = obj[header_size..][0..header.sizeofcmds],
};
// First pass: find segments to establish bounds
while (it.next()) |cmd| {
if (cmd.cmd() == .SEGMENT_64) {
const seg = cmd.cast(macho.segment_command_64).?;
// Store segment info
if (strings.eqlComptime(seg.segName(), SEG_LINKEDIT)) {
linkedit_seg = seg;
linkedit_off = @intFromPtr(cmd.data.ptr) - @intFromPtr(obj.ptr);
// Validate linkedit is after text
if (linkedit_seg.fileoff < text_seg.fileoff + text_seg.filesize) {
return error.InvalidLinkeditOffset;
}
} else if (strings.eqlComptime(seg.segName(), "__TEXT")) {
text_seg = seg;
}
}
}
// Reset iterator
it = macho.LoadCommandIterator{
.ncmds = header.ncmds,
.buffer = obj[header_size..][0..header.sizeofcmds],
};
// Second pass: find code signature
while (it.next()) |cmd| {
switch (cmd.cmd()) {
.CODE_SIGNATURE => {
const cs = cmd.cast(macho.linkedit_data_command).?;
sig_off = cs.dataoff;
sig_sz = cs.datasize;
cs_cmd_off = @intFromPtr(cmd.data.ptr) - @intFromPtr(obj.ptr);
},
else => {},
}
}
if (linkedit_off == 0 or sig_off == 0) {
return error.MissingRequiredSegment;
}
self.* = .{
.data = try std.ArrayList(u8).initCapacity(allocator, obj.len),
.sig_off = sig_off,
.sig_sz = sig_sz,
.cs_cmd_off = cs_cmd_off,
.linkedit_off = linkedit_off,
.linkedit_seg = linkedit_seg,
.text_seg = text_seg,
.allocator = allocator,
};
try self.data.appendSlice(obj);
return self;
}
pub fn deinit(self: *MachoSigner) void {
self.data.deinit();
self.allocator.destroy(self);
}
pub fn sign(self: *MachoSigner, writer: anytype) !void {
const PAGE_SIZE: usize = 1 << 12;
// Calculate total binary pages before signature
const total_pages = (self.sig_off + PAGE_SIZE - 1) / PAGE_SIZE;
const aligned_sig_off = total_pages * PAGE_SIZE;
// Calculate base signature structure sizes
const id = "a.out\x00";
const super_blob_header_size = @sizeOf(SuperBlob);
const blob_index_size = @sizeOf(BlobIndex);
const code_dir_header_size = @sizeOf(CodeDirectory);
const id_offset = code_dir_header_size;
const hash_offset = id_offset + id.len;
// Calculate hash sizes
const hashes_size = total_pages * 32; // SHA256 = 32 bytes
const code_dir_length = hash_offset + hashes_size;
// Calculate total signature size
const sig_structure_size = super_blob_header_size + blob_index_size + code_dir_length;
const total_sig_size = alignSize(sig_structure_size, PAGE_SIZE);
// Setup SuperBlob
var super_blob = SuperBlob{
.magic = @byteSwap(CSMAGIC_EMBEDDED_SIGNATURE),
.length = @byteSwap(@as(u32, @truncate(sig_structure_size))),
.count = @byteSwap(@as(u32, 1)),
};
// Setup BlobIndex
var blob_index = BlobIndex{
.type = @byteSwap(CSSLOT_CODEDIRECTORY),
.offset = @byteSwap(@as(u32, super_blob_header_size + blob_index_size)),
};
// Setup CodeDirectory
var code_dir = std.mem.zeroes(CodeDirectory);
code_dir.magic = @byteSwap(CSMAGIC_CODEDIRECTORY);
code_dir.length = @byteSwap(@as(u32, @truncate(code_dir_length)));
code_dir.version = @byteSwap(@as(u32, 0x20400));
code_dir.flags = @byteSwap(@as(u32, 0x20002));
code_dir.hashOffset = @byteSwap(@as(u32, @truncate(hash_offset)));
code_dir.identOffset = @byteSwap(@as(u32, @truncate(id_offset)));
code_dir.nSpecialSlots = 0;
code_dir.nCodeSlots = @byteSwap(@as(u32, @truncate(total_pages)));
code_dir.codeLimit = @byteSwap(@as(u32, @truncate(aligned_sig_off)));
code_dir.hashSize = 32;
code_dir.hashType = SEC_CODE_SIGNATURE_HASH_SHA256;
code_dir.pageSize = 12; // log2(4096)
// Get text segment info
const text_base = alignSize(self.text_seg.fileoff, PAGE_SIZE);
const text_limit = alignSize(self.text_seg.filesize, PAGE_SIZE);
code_dir.execSegBase = @byteSwap(@as(u64, text_base));
code_dir.execSegLimit = @byteSwap(@as(u64, text_limit));
code_dir.execSegFlags = @byteSwap(CS_EXECSEG_MAIN_BINARY);
// Ensure space for signature
try self.data.resize(aligned_sig_off + total_sig_size);
self.data.items.len = aligned_sig_off;
@memset(self.data.unusedCapacitySlice(), 0);
// Position writer at signature offset
var sig_writer = self.data.writer();
// Write signature components
try sig_writer.writeAll(mem.asBytes(&super_blob));
try sig_writer.writeAll(mem.asBytes(&blob_index));
try sig_writer.writeAll(mem.asBytes(&code_dir));
try sig_writer.writeAll(id);
// Hash and write pages
var remaining = self.data.items[0..aligned_sig_off];
while (remaining.len >= PAGE_SIZE) {
const page = remaining[0..PAGE_SIZE];
var digest: bun.sha.SHA256.Digest = undefined;
bun.sha.SHA256.hash(page, &digest, null);
try sig_writer.writeAll(&digest);
remaining = remaining[PAGE_SIZE..];
}
if (remaining.len > 0) {
var last_page = [_]u8{0} ** PAGE_SIZE;
@memcpy(last_page[0..remaining.len], remaining);
var digest: bun.sha.SHA256.Digest = undefined;
bun.sha.SHA256.hash(&last_page, &digest, null);
try sig_writer.writeAll(&digest);
}
// Write final binary
try writer.writeAll(self.data.items);
}
};
};
fn alignSize(size: u64, base: u64) u64 {
const over = size % base;
return if (over == 0) size else size + (base - over);
}
fn alignVmsize(size: u64, page_size: u64) u64 {
return alignSize(if (size > 0x4000) size else 0x4000, page_size);
}
const SEG_LINKEDIT = "__LINKEDIT";
pub const utils = struct {
pub fn isElf(data: []const u8) bool {
if (data.len < 4) return false;
return mem.readInt(u32, data[0..4], .big) == 0x7f454c46;
}
pub fn isMacho(data: []const u8) bool {
if (data.len < 4) return false;
return mem.readInt(u32, data[0..4], .little) == macho.MH_MAGIC_64;
}
};
const CSMAGIC_CODEDIRECTORY: u32 = 0xfade0c02;
const CSMAGIC_EMBEDDED_SIGNATURE: u32 = 0xfade0cc0;
const CSSLOT_CODEDIRECTORY: u32 = 0;
const SEC_CODE_SIGNATURE_HASH_SHA256: u8 = 2;
const CS_EXECSEG_MAIN_BINARY: u64 = 0x1;
const SuperBlob = std.macho.SuperBlob;
const Blob = std.macho.GenericBlob;
const CodeDirectory = std.macho.CodeDirectory;
const BlobIndex = std.macho.BlobIndex;

View File

@@ -56,7 +56,7 @@ fn NewEVP(comptime digest_size: comptime_int, comptime MDName: []const u8) type
return this;
}
pub fn hash(bytes: []const u8, out: *Digest, engine: *BoringSSL.ENGINE) void {
pub fn hash(bytes: []const u8, out: *Digest, engine: ?*BoringSSL.ENGINE) void {
const md = @field(BoringSSL, MDName)();
bun.assert(BoringSSL.EVP_Digest(bytes.ptr, bytes.len, out, null, md, engine) == 1);