Files
bun.sh/src/copy_file.zig
taylor.fish 41b1efe12c Rename disabled parameter in Output.scoped (#21769)
It's very confusing.

(For internal tracking: fixes STAB-977)
2025-08-11 20:19:34 -07:00

309 lines
12 KiB
Zig

// Transfer all the data between two file descriptors in the most efficient way.
// The copy starts at offset 0, the initial offsets are preserved.
// No metadata is transferred over.
pub const CopyFileRangeError = error{
FileTooBig,
InputOutput,
/// `in` is not open for reading; or `out` is not open for writing;
/// or the `O.APPEND` flag is set for `out`.
FilesOpenedWithWrongFlags,
IsDir,
OutOfMemory,
NoSpaceLeft,
Unseekable,
PermissionDenied,
FileBusy,
} || posix.PReadError || posix.PWriteError || posix.UnexpectedError;
const InputType = if (Environment.isWindows) bun.OSPathSliceZ else bun.FD;
/// In a `bun install` with prisma, this reduces the system call count from ~18,000 to ~12,000
///
/// The intended order here is:
/// 1. ioctl_ficlone
/// 2. copy_file_range
/// 3. sendfile()
/// 4. read() write() loop
///
/// copy_file_range is supposed to do all the fast ways. It might be unnecessary
/// to do ioctl_ficlone.
///
/// sendfile() is a good fallback to avoid the read-write loops. sendfile() improves
/// performance by moving the copying step to the kernel.
///
/// On Linux, sendfile() can work between any two file descriptors which can be mmap'd.
/// This means that it cannot work with TTYs and some special devices
/// But it can work with two ordinary files
///
/// on macOS and other platforms, sendfile() only works when one of the ends is a socket
/// and in general on macOS, it doesn't seem to have much performance impact.
const LinuxCopyFileState = packed struct(u8) {
/// This is the most important flag for reducing the system call count
/// When copying files from one folder to another, if we see EXDEV once
/// there's a very good chance we will see it for every file thereafter in that folder.
/// So we should remember whether or not we saw it and keep the state for roughly one directory tree.
has_seen_exdev: bool = false,
has_ioctl_ficlone_failed: bool = false,
has_copy_file_range_failed: bool = false,
has_sendfile_failed: bool = false,
_: u4 = 0,
};
const EmptyCopyFileState = struct {};
pub const CopyFileState = if (Environment.isLinux) LinuxCopyFileState else EmptyCopyFileState;
const CopyFileReturnType = bun.sys.Maybe(void);
pub fn copyFileWithState(in: InputType, out: InputType, copy_file_state: *CopyFileState) CopyFileReturnType {
if (comptime Environment.isMac) {
const rc = posix.system.fcopyfile(in.native(), out.native(), null, posix.system.COPYFILE{ .DATA = true });
switch (posix.errno(rc)) {
.SUCCESS => return CopyFileReturnType.success,
// The source file is not a directory, symbolic link, or regular file.
// Try with the fallback path before giving up.
.OPNOTSUPP => {},
else => return CopyFileReturnType.errnoSys(rc, .copyfile).?,
}
}
if (comptime Environment.isLinux) {
if (can_use_ioctl_ficlone() and !copy_file_state.has_seen_exdev and !copy_file_state.has_ioctl_ficlone_failed) {
// We only check once if the ioctl is supported, and cache the result.
// EXT4 does not support FICLONE.
const rc = bun.linux.ioctl_ficlone(out, in);
// the ordering is flipped but it is consistent with other system calls.
bun.sys.syslog("ioctl_ficlone({}, {}) = {d}", .{ in, out, rc });
switch (bun.sys.getErrno(rc)) {
.SUCCESS => return CopyFileReturnType.success,
.XDEV => {
copy_file_state.has_seen_exdev = true;
},
// Don't worry about EINTR here.
.INTR => {},
.ACCES, .BADF, .INVAL, .OPNOTSUPP, .NOSYS, .PERM => {
debug("ioctl_ficlonerange is NOT supported", .{});
can_use_ioctl_ficlone_.store(-1, .monotonic);
copy_file_state.has_ioctl_ficlone_failed = true;
},
else => {
// Failed for some other reason
copy_file_state.has_ioctl_ficlone_failed = true;
},
}
}
// Try copy_file_range first as that works at the FS level and is the
// most efficient method (if available).
var offset: u64 = 0;
cfr_loop: while (true) {
// The kernel checks the u64 value `offset+count` for overflow, use
// a 32 bit value so that the syscall won't return EINVAL except for
// impossibly large files (> 2^64-1 - 2^32-1).
const amt = switch (copyFileRange(in.native(), out.native(), math.maxInt(i32) - 1, 0, copy_file_state)) {
.result => |a| a,
.err => |err| return .{ .err = err },
};
// Terminate when no data was copied
if (amt == 0) break :cfr_loop;
offset += amt;
}
return CopyFileReturnType.success;
}
if (comptime Environment.isWindows) {
if (CopyFileReturnType.errnoSys(bun.windows.CopyFileW(in.ptr, out.ptr, 0), .copyfile)) |err| {
return err;
}
return CopyFileReturnType.success;
}
while (true) {
switch (copyFileReadWriteLoop(in.native(), out.native(), math.maxInt(i32) - 1)) {
.err => |err| return .{ .err = err },
.result => |amt| {
if (amt == 0) break;
},
}
}
return CopyFileReturnType.success;
}
pub fn copyFile(in: InputType, out: InputType) CopyFileReturnType {
var state: CopyFileState = .{};
return copyFileWithState(in, out, &state);
}
var can_use_copy_file_range = std.atomic.Value(i32).init(0);
pub inline fn disableCopyFileRangeSyscall() void {
if (comptime !Environment.isLinux) {
return;
}
can_use_copy_file_range.store(-1, .monotonic);
}
pub fn canUseCopyFileRangeSyscall() bool {
const result = can_use_copy_file_range.load(.monotonic);
if (result == 0) {
// This flag mostly exists to make other code more easily testable.
if (bun.getenvZ("BUN_CONFIG_DISABLE_COPY_FILE_RANGE") != null) {
debug("copy_file_range is disabled by BUN_CONFIG_DISABLE_COPY_FILE_RANGE", .{});
can_use_copy_file_range.store(-1, .monotonic);
return false;
}
const kernel = Platform.kernelVersion();
if (kernel.orderWithoutTag(.{ .major = 4, .minor = 5 }).compare(.gte)) {
debug("copy_file_range is supported", .{});
can_use_copy_file_range.store(1, .monotonic);
return true;
} else {
debug("copy_file_range is NOT supported", .{});
can_use_copy_file_range.store(-1, .monotonic);
return false;
}
}
return result == 1;
}
pub var can_use_ioctl_ficlone_ = std.atomic.Value(i32).init(0);
pub inline fn disable_ioctl_ficlone() void {
if (comptime !Environment.isLinux) {
return;
}
can_use_ioctl_ficlone_.store(-1, .monotonic);
}
pub fn can_use_ioctl_ficlone() bool {
const result = can_use_ioctl_ficlone_.load(.monotonic);
if (result == 0) {
// This flag mostly exists to make other code more easily testable.
if (bun.getenvZ("BUN_CONFIG_DISABLE_ioctl_ficlonerange") != null) {
debug("ioctl_ficlonerange is disabled by BUN_CONFIG_DISABLE_ioctl_ficlonerange", .{});
can_use_ioctl_ficlone_.store(-1, .monotonic);
return false;
}
const kernel = Platform.kernelVersion();
if (kernel.orderWithoutTag(.{ .major = 4, .minor = 5 }).compare(.gte)) {
debug("ioctl_ficlonerange is supported", .{});
can_use_ioctl_ficlone_.store(1, .monotonic);
return true;
} else {
debug("ioctl_ficlonerange is NOT supported", .{});
can_use_ioctl_ficlone_.store(-1, .monotonic);
return false;
}
}
return result == 1;
}
pub fn copyFileRange(in: fd_t, out: fd_t, len: usize, flags: u32, copy_file_state: *CopyFileState) Maybe(usize) {
if (canUseCopyFileRangeSyscall() and !copy_file_state.has_seen_exdev and !copy_file_state.has_copy_file_range_failed) {
while (true) {
const rc = std.os.linux.copy_file_range(in, null, out, null, len, flags);
bun.sys.syslog("copy_file_range({d}, {d}, {d}) = {d}", .{ in, out, len, rc });
switch (bun.sys.getErrno(rc)) {
.SUCCESS => return .{ .result = @intCast(rc) },
// these may not be regular files, try fallback
.INVAL => {
copy_file_state.has_copy_file_range_failed = true;
},
// support for cross-filesystem copy added in Linux 5.3
// and even then, it is frequently not supported.
.XDEV => {
copy_file_state.has_seen_exdev = true;
copy_file_state.has_copy_file_range_failed = true;
},
// syscall added in Linux 4.5, use fallback
.OPNOTSUPP, .NOSYS => {
copy_file_state.has_copy_file_range_failed = true;
debug("copy_file_range is NOT supported", .{});
can_use_copy_file_range.store(-1, .monotonic);
},
.INTR => continue,
else => {
// failed for some other reason
copy_file_state.has_copy_file_range_failed = true;
},
}
break;
}
}
while (!copy_file_state.has_sendfile_failed) {
const rc = std.os.linux.sendfile(@intCast(out), @intCast(in), null, len);
bun.sys.syslog("sendfile({d}, {d}, {d}) = {d}", .{ in, out, len, rc });
switch (bun.sys.getErrno(rc)) {
.SUCCESS => return .{ .result = @intCast(rc) },
.INTR => continue,
// these may not be regular files, try fallback
.INVAL => {
copy_file_state.has_sendfile_failed = true;
},
// This shouldn't happen?
.XDEV => {
copy_file_state.has_seen_exdev = true;
copy_file_state.has_sendfile_failed = true;
},
// they might not support it
.OPNOTSUPP, .NOSYS => {
copy_file_state.has_sendfile_failed = true;
},
else => {
// failed for some other reason, fallback to read-write loop
copy_file_state.has_sendfile_failed = true;
},
}
break;
}
return copyFileReadWriteLoop(in, out, len);
}
pub fn copyFileReadWriteLoop(
in: fd_t,
out: fd_t,
len: usize,
) Maybe(usize) {
var buf: [8 * 4096]u8 = undefined;
const adjusted_count = @min(buf.len, len);
switch (bun.sys.read(.fromNative(in), buf[0..adjusted_count])) {
.result => |amt_read| {
var amt_written: usize = 0;
if (amt_read == 0) return .{ .result = 0 };
while (amt_written < amt_read) {
switch (bun.sys.write(.fromNative(out), buf[amt_written..amt_read])) {
.result => |wrote| {
if (wrote == 0) {
return .{ .result = amt_written };
}
amt_written += wrote;
},
.err => |err| return .{ .err = err },
}
}
if (amt_read == 0) return .{ .result = 0 };
return .{ .result = amt_read };
},
.err => |err| return .{ .err = err },
}
}
const debug = bun.Output.scoped(.copy_file, .hidden);
const bun = @import("bun");
const Environment = bun.Environment;
const Maybe = bun.sys.Maybe;
const Platform = bun.analytics.GenerateHeader.GeneratePlatform;
const std = @import("std");
const math = std.math;
const posix = std.posix;
const fd_t = std.posix.fd_t;