bun.sh/src/linux_memfd_allocator.zig

const bun = @import("root").bun;
const std = @import("std");

/// When cloning large amounts of data potentially multiple times, we can
/// leverage copy-on-write memory to avoid actually copying the data. To do that
/// on Linux, we need to use a memfd, which is a Linux-specific feature.
///
/// The steps are roughly:
///
/// 1. Create a memfd
/// 2. Write the data to the memfd
/// 3. Map the memfd into memory
///
/// Then, to clone the data later, we can just call `mmap` again.
///
/// The big catch is that mmap(), memfd_create(), write() all have overhead. And
/// often we will re-use virtual memory within the process. This does not reuse
/// the virtual memory. So we should only really use this for large blobs of
/// data that we expect to be cloned multiple times. Such as Blob in FormData.
pub const LinuxMemFdAllocator = struct {
    fd: bun.FileDescriptor = .zero,
    ref_count: std.atomic.Value(u32) = std.atomic.Value(u32).init(0),
    size: usize = 0,

    var memfd_counter = std.atomic.Value(usize).init(0);

    pub usingnamespace bun.New(LinuxMemFdAllocator);

    pub fn ref(this: *LinuxMemFdAllocator) void {
        _ = this.ref_count.fetchAdd(1, .Monotonic);
    }

    pub fn deref(this: *LinuxMemFdAllocator) void {
        if (this.ref_count.fetchSub(1, .Monotonic) == 1) {
            _ = bun.sys.close(this.fd);
            this.destroy();
        }
    }

    pub fn allocator(this: *LinuxMemFdAllocator) std.mem.Allocator {
        return .{
            .ptr = this,
            .vtable = AllocatorInterface.VTable,
        };
    }

    pub fn from(allocator_: std.mem.Allocator) ?*LinuxMemFdAllocator {
        if (allocator_.vtable == AllocatorInterface.VTable) {
            return @alignCast(@ptrCast(allocator_.ptr));
        }

        return null;
    }

    const AllocatorInterface = struct {
        fn alloc(_: *anyopaque, _: usize, _: u8, _: usize) ?[*]u8 {
            // it should perform no allocations or resizes
            return null;
        }

        fn resize(_: *anyopaque, _: []u8, _: u8, _: usize, _: usize) bool {
            return false;
        }

        fn free(
            ptr: *anyopaque,
            buf: []u8,
            _: u8,
            _: usize,
        ) void {
            var this: *LinuxMemFdAllocator = @alignCast(@ptrCast(ptr));
            defer this.deref();
            bun.sys.munmap(@alignCast(@ptrCast(buf))).unwrap() catch |err| {
                bun.Output.debugWarn("Failed to munmap memfd: {}", .{err});
            };
        }

        pub const VTable = &std.mem.Allocator.VTable{
            .alloc = &AllocatorInterface.alloc,
            .resize = &resize,
            .free = &free,
        };
    };

    pub fn alloc(this: *LinuxMemFdAllocator, len: usize, offset: usize, flags: u32) bun.JSC.Maybe(bun.JSC.WebCore.Blob.ByteStore) {
        var size = len;

        // size rounded up to nearest page
        size += (size + std.mem.page_size - 1) & std.mem.page_size;

        switch (bun.sys.mmap(
            null,
            @min(size, this.size),
            std.os.PROT.READ | std.os.PROT.WRITE,
            std.os.MAP.SHARED | flags,
            this.fd,
            offset,
        )) {
            .result => |slice| {
                return .{
                    .result = bun.JSC.WebCore.Blob.ByteStore{
                        .cap = @truncate(slice.len),
                        .ptr = slice.ptr,
                        .len = @truncate(len),
                        .allocator = this.allocator(),
                    },
                };
            },
            .err => |errno| {
                return .{ .err = errno };
            },
        }
    }

    pub fn shouldUse(bytes: []const u8) bool {
        if (comptime !bun.Environment.isLinux) {
            return false;
        }

        if (bun.JSC.VirtualMachine.is_smol_mode) {
            return bytes.len >= 1024 * 1024 * 1;
        }

        // This is a net 2x - 4x slowdown to new Blob([huge])
        // so we must be careful
        return bytes.len >= 1024 * 1024 * 8;
    }

    pub fn create(bytes: []const u8) bun.JSC.Maybe(bun.JSC.WebCore.Blob.ByteStore) {
        if (comptime !bun.Environment.isLinux) {
            unreachable;
        }

        const rc = brk: {
            var label_buf: [128]u8 = undefined;
            const label = std.fmt.bufPrintZ(&label_buf, "memfd-num-{d}", .{memfd_counter.fetchAdd(1, .Monotonic)}) catch "";

            // Using huge pages was slower.
            const code = std.os.linux.memfd_create(label.ptr, std.os.linux.MFD.CLOEXEC | 0);

            bun.sys.syslog("memfd_create({s}) = {d}", .{ label, code });
            break :brk code;
        };

        switch (std.os.linux.getErrno(rc)) {
            .SUCCESS => {},
            else => |errno| {
                bun.sys.syslog("Failed to create memfd: {s}", .{@tagName(errno)});
                return .{ .err = bun.sys.Error.fromCode(errno, .open) };
            },
        }

        const fd = bun.toFD(rc);

        if (bytes.len > 0)
            // Hint at the size of the file
            _ = bun.sys.ftruncate(fd, @intCast(bytes.len));

        // Dump all the bytes in there
        var written: isize = 0;

        var remain = bytes;
        while (remain.len > 0) {
            switch (bun.sys.pwrite(fd, remain, written)) {
                .err => |err| {
                    if (err.getErrno() == .AGAIN) {
                        continue;
                    }

                    bun.Output.debugWarn("Failed to write to memfd: {}", .{err});
                    _ = bun.sys.close(fd);
                    return .{ .err = err };
                },
                .result => |result| {
                    if (result == 0) {
                        bun.Output.debugWarn("Failed to write to memfd: EOF", .{});
                        _ = bun.sys.close(fd);
                        return .{ .err = bun.sys.Error.fromCode(.NOMEM, .write) };
                    }
                    written += @intCast(result);
                    remain = remain[result..];
                },
            }
        }

        var linux_memfd_allocator = LinuxMemFdAllocator.new(.{
            .fd = fd,
            .ref_count = std.atomic.Value(u32).init(1),
            .size = bytes.len,
        });

        switch (linux_memfd_allocator.alloc(bytes.len, 0, 0)) {
            .result => |res| {
                return .{ .result = res };
            },
            .err => |err| {
                linux_memfd_allocator.deref();
                return .{ .err = err };
            },
        }

        unreachable;
    }
};