Files
bun.sh/src/copy_file.zig
pfg 05d0475c6c Update to zig 0.15.2 (#24204)
Fixes ENG-21287

Build times, from `bun run build && echo '//' >> src/main.zig && time
bun run build`

|Platform|0.14.1|0.15.2|Speedup|
|-|-|-|-|
|macos debug asan|126.90s|106.27s|1.19x|
|macos debug noasan|60.62s|50.85s|1.19x|
|linux debug asan|292.77s|241.45s|1.21x|
|linux debug noasan|146.58s|130.94s|1.12x|
|linux debug use_llvm=false|n/a|78.27s|1.87x|
|windows debug asan|177.13s|142.55s|1.24x|

Runtime performance:

- next build memory usage may have gone up by 5%. Otherwise seems the
same. Some code with writers may have gotten slower, especially one
instance of a counting writer and a few instances of unbuffered writers
that now have vtable overhead.
- File size reduced by 800kb (from 100.2mb to 99.4mb)

Improvements:

- `@export` hack is no longer needed for watch
- native x86_64 backend for linux builds faster. to use it, set use_llvm
false and no_link_obj false. also set `ASAN_OPTIONS=detect_leaks=0`
otherwise it will spam the output with tens of thousands of lines of
debug info errors. may need to use the zig lldb fork for debugging.
- zig test-obj, which we will be able to use for zig unit tests

Still an issue:

- false 'dependency loop' errors remain in watch mode
- watch mode crashes observed

Follow-up:

- [ ] search `comptime Writer: type` and `comptime W: type` and remove
- [ ] remove format_mode in our zig fork
- [ ] remove deprecated.zig autoFormatLabelFallback
- [ ] remove deprecated.zig autoFormatLabel
- [ ] remove deprecated.BufferedWriter and BufferedReader
- [ ] remove override_no_export_cpp_apis as it is no longer needed
- [ ] css Parser(W) -> Parser, and remove all the comptime writer: type
params
- [ ] remove deprecated writer fully

Files that add lines:

```
649     src/deprecated.zig
167     scripts/pack-codegen-for-zig-team.ts
54      scripts/cleartrace-impl.js
46      scripts/cleartrace.ts
43      src/windows.zig
18      src/fs.zig
17      src/bun.js/ConsoleObject.zig
16      src/output.zig
12      src/bun.js/test/debug.zig
12      src/bun.js/node/node_fs.zig
8       src/env_loader.zig
7       src/css/printer.zig
7       src/cli/init_command.zig
7       src/bun.js/node.zig
6       src/string/escapeRegExp.zig
6       src/install/PnpmMatcher.zig
5       src/bun.js/webcore/Blob.zig
4       src/crash_handler.zig
4       src/bun.zig
3       src/install/lockfile/bun.lock.zig
3       src/cli/update_interactive_command.zig
3       src/cli/pack_command.zig
3       build.zig
2       src/Progress.zig
2       src/install/lockfile/lockfile_json_stringify_for_debugging.zig
2       src/css/small_list.zig
2       src/bun.js/webcore/prompt.zig
1       test/internal/ban-words.test.ts
1       test/internal/ban-limits.json
1       src/watcher/WatcherTrace.zig
1       src/transpiler.zig
1       src/shell/builtin/cp.zig
1       src/js_printer.zig
1       src/io/PipeReader.zig
1       src/install/bin.zig
1       src/css/selectors/selector.zig
1       src/cli/run_command.zig
1       src/bun.js/RuntimeTranspilerStore.zig
1       src/bun.js/bindings/JSRef.zig
1       src/bake/DevServer.zig
```

Files that remove lines:

```
-1      src/test/recover.zig
-1      src/sql/postgres/SocketMonitor.zig
-1      src/sql/mysql/MySQLRequestQueue.zig
-1      src/sourcemap/CodeCoverage.zig
-1      src/css/values/color_js.zig
-1      src/compile_target.zig
-1      src/bundler/linker_context/convertStmtsForChunk.zig
-1      src/bundler/bundle_v2.zig
-1      src/bun.js/webcore/blob/read_file.zig
-1      src/ast/base.zig
-2      src/sql/postgres/protocol/ArrayList.zig
-2      src/shell/builtin/mkdir.zig
-2      src/install/PackageManager/patchPackage.zig
-2      src/install/PackageManager/PackageManagerDirectories.zig
-2      src/fmt.zig
-2      src/css/declaration.zig
-2      src/css/css_parser.zig
-2      src/collections/baby_list.zig
-2      src/bun.js/bindings/ZigStackFrame.zig
-2      src/ast/E.zig
-3      src/StandaloneModuleGraph.zig
-3      src/deps/picohttp.zig
-3      src/deps/libuv.zig
-3      src/btjs.zig
-4      src/threading/Futex.zig
-4      src/shell/builtin/touch.zig
-4      src/meta.zig
-4      src/install/lockfile.zig
-4      src/css/selectors/parser.zig
-5      src/shell/interpreter.zig
-5      src/css/error.zig
-5      src/bun.js/web_worker.zig
-5      src/bun.js.zig
-6      src/cli/test_command.zig
-6      src/bun.js/VirtualMachine.zig
-6      src/bun.js/uuid.zig
-6      src/bun.js/bindings/JSValue.zig
-9      src/bun.js/test/pretty_format.zig
-9      src/bun.js/api/BunObject.zig
-14     src/install/install_binding.zig
-14     src/fd.zig
-14     src/bun.js/node/path.zig
-14     scripts/pack-codegen-for-zig-team.sh
-17     src/bun.js/test/diff_format.zig
```

`git diff --numstat origin/main...HEAD | awk '{ print ($1-$2)"\t"$3 }' |
sort -rn`

---------

Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
Co-authored-by: Dylan Conway <dylan.conway567@gmail.com>
Co-authored-by: Meghan Denny <meghan@bun.com>
Co-authored-by: tayor.fish <contact@taylor.fish>
2025-11-10 14:38:26 -08:00

309 lines
12 KiB
Zig

// Transfer all the data between two file descriptors in the most efficient way.
// The copy starts at offset 0, the initial offsets are preserved.
// No metadata is transferred over.
pub const CopyFileRangeError = error{
FileTooBig,
InputOutput,
/// `in` is not open for reading; or `out` is not open for writing;
/// or the `O.APPEND` flag is set for `out`.
FilesOpenedWithWrongFlags,
IsDir,
OutOfMemory,
NoSpaceLeft,
Unseekable,
PermissionDenied,
FileBusy,
} || posix.PReadError || posix.PWriteError || posix.UnexpectedError;
const InputType = if (Environment.isWindows) bun.OSPathSliceZ else bun.FD;
/// In a `bun install` with prisma, this reduces the system call count from ~18,000 to ~12,000
///
/// The intended order here is:
/// 1. ioctl_ficlone
/// 2. copy_file_range
/// 3. sendfile()
/// 4. read() write() loop
///
/// copy_file_range is supposed to do all the fast ways. It might be unnecessary
/// to do ioctl_ficlone.
///
/// sendfile() is a good fallback to avoid the read-write loops. sendfile() improves
/// performance by moving the copying step to the kernel.
///
/// On Linux, sendfile() can work between any two file descriptors which can be mmap'd.
/// This means that it cannot work with TTYs and some special devices
/// But it can work with two ordinary files
///
/// on macOS and other platforms, sendfile() only works when one of the ends is a socket
/// and in general on macOS, it doesn't seem to have much performance impact.
const LinuxCopyFileState = packed struct(u8) {
/// This is the most important flag for reducing the system call count
/// When copying files from one folder to another, if we see EXDEV once
/// there's a very good chance we will see it for every file thereafter in that folder.
/// So we should remember whether or not we saw it and keep the state for roughly one directory tree.
has_seen_exdev: bool = false,
has_ioctl_ficlone_failed: bool = false,
has_copy_file_range_failed: bool = false,
has_sendfile_failed: bool = false,
_: u4 = 0,
};
const EmptyCopyFileState = struct {};
pub const CopyFileState = if (Environment.isLinux) LinuxCopyFileState else EmptyCopyFileState;
const CopyFileReturnType = bun.sys.Maybe(void);
pub fn copyFileWithState(in: InputType, out: InputType, copy_file_state: *CopyFileState) CopyFileReturnType {
if (comptime Environment.isMac) {
const rc = posix.system.fcopyfile(in.native(), out.native(), null, posix.system.COPYFILE{ .DATA = true });
switch (posix.errno(rc)) {
.SUCCESS => return CopyFileReturnType.success,
// The source file is not a directory, symbolic link, or regular file.
// Try with the fallback path before giving up.
.OPNOTSUPP => {},
else => return CopyFileReturnType.errnoSys(rc, .copyfile).?,
}
}
if (comptime Environment.isLinux) {
if (can_use_ioctl_ficlone() and !copy_file_state.has_seen_exdev and !copy_file_state.has_ioctl_ficlone_failed) {
// We only check once if the ioctl is supported, and cache the result.
// EXT4 does not support FICLONE.
const rc = bun.linux.ioctl_ficlone(out, in);
// the ordering is flipped but it is consistent with other system calls.
bun.sys.syslog("ioctl_ficlone({f}, {f}) = {d}", .{ in, out, rc });
switch (bun.sys.getErrno(rc)) {
.SUCCESS => return CopyFileReturnType.success,
.XDEV => {
copy_file_state.has_seen_exdev = true;
},
// Don't worry about EINTR here.
.INTR => {},
.ACCES, .BADF, .INVAL, .OPNOTSUPP, .NOSYS, .PERM => {
debug("ioctl_ficlonerange is NOT supported", .{});
can_use_ioctl_ficlone_.store(-1, .monotonic);
copy_file_state.has_ioctl_ficlone_failed = true;
},
else => {
// Failed for some other reason
copy_file_state.has_ioctl_ficlone_failed = true;
},
}
}
// Try copy_file_range first as that works at the FS level and is the
// most efficient method (if available).
var offset: u64 = 0;
cfr_loop: while (true) {
// The kernel checks the u64 value `offset+count` for overflow, use
// a 32 bit value so that the syscall won't return EINVAL except for
// impossibly large files (> 2^64-1 - 2^32-1).
const amt = switch (copyFileRange(in.native(), out.native(), math.maxInt(i32) - 1, 0, copy_file_state)) {
.result => |a| a,
.err => |err| return .{ .err = err },
};
// Terminate when no data was copied
if (amt == 0) break :cfr_loop;
offset += amt;
}
return CopyFileReturnType.success;
}
if (comptime Environment.isWindows) {
if (CopyFileReturnType.errnoSys(bun.windows.CopyFileW(in.ptr, out.ptr, 0), .copyfile)) |err| {
return err;
}
return CopyFileReturnType.success;
}
while (true) {
switch (copyFileReadWriteLoop(in.native(), out.native(), math.maxInt(i32) - 1)) {
.err => |err| return .{ .err = err },
.result => |amt| {
if (amt == 0) break;
},
}
}
return CopyFileReturnType.success;
}
pub fn copyFile(in: InputType, out: InputType) CopyFileReturnType {
var state: CopyFileState = .{};
return copyFileWithState(in, out, &state);
}
var can_use_copy_file_range = std.atomic.Value(i32).init(0);
pub inline fn disableCopyFileRangeSyscall() void {
if (comptime !Environment.isLinux) {
return;
}
can_use_copy_file_range.store(-1, .monotonic);
}
pub fn canUseCopyFileRangeSyscall() bool {
const result = can_use_copy_file_range.load(.monotonic);
if (result == 0) {
// This flag mostly exists to make other code more easily testable.
if (bun.env_var.BUN_CONFIG_DISABLE_COPY_FILE_RANGE.get()) {
debug("copy_file_range is disabled by BUN_CONFIG_DISABLE_COPY_FILE_RANGE", .{});
can_use_copy_file_range.store(-1, .monotonic);
return false;
}
const kernel = Platform.kernelVersion();
if (kernel.orderWithoutTag(.{ .major = 4, .minor = 5 }).compare(.gte)) {
debug("copy_file_range is supported", .{});
can_use_copy_file_range.store(1, .monotonic);
return true;
} else {
debug("copy_file_range is NOT supported", .{});
can_use_copy_file_range.store(-1, .monotonic);
return false;
}
}
return result == 1;
}
pub var can_use_ioctl_ficlone_ = std.atomic.Value(i32).init(0);
pub inline fn disable_ioctl_ficlone() void {
if (comptime !Environment.isLinux) {
return;
}
can_use_ioctl_ficlone_.store(-1, .monotonic);
}
pub fn can_use_ioctl_ficlone() bool {
const result = can_use_ioctl_ficlone_.load(.monotonic);
if (result == 0) {
// This flag mostly exists to make other code more easily testable.
if (bun.env_var.BUN_CONFIG_DISABLE_ioctl_ficlonerange.get()) {
debug("ioctl_ficlonerange is disabled by BUN_CONFIG_DISABLE_ioctl_ficlonerange", .{});
can_use_ioctl_ficlone_.store(-1, .monotonic);
return false;
}
const kernel = Platform.kernelVersion();
if (kernel.orderWithoutTag(.{ .major = 4, .minor = 5 }).compare(.gte)) {
debug("ioctl_ficlonerange is supported", .{});
can_use_ioctl_ficlone_.store(1, .monotonic);
return true;
} else {
debug("ioctl_ficlonerange is NOT supported", .{});
can_use_ioctl_ficlone_.store(-1, .monotonic);
return false;
}
}
return result == 1;
}
pub fn copyFileRange(in: fd_t, out: fd_t, len: usize, flags: u32, copy_file_state: *CopyFileState) Maybe(usize) {
if (canUseCopyFileRangeSyscall() and !copy_file_state.has_seen_exdev and !copy_file_state.has_copy_file_range_failed) {
while (true) {
const rc = std.os.linux.copy_file_range(in, null, out, null, len, flags);
bun.sys.syslog("copy_file_range({d}, {d}, {d}) = {d}", .{ in, out, len, rc });
switch (bun.sys.getErrno(rc)) {
.SUCCESS => return .{ .result = @intCast(rc) },
// these may not be regular files, try fallback
.INVAL => {
copy_file_state.has_copy_file_range_failed = true;
},
// support for cross-filesystem copy added in Linux 5.3
// and even then, it is frequently not supported.
.XDEV => {
copy_file_state.has_seen_exdev = true;
copy_file_state.has_copy_file_range_failed = true;
},
// syscall added in Linux 4.5, use fallback
.OPNOTSUPP, .NOSYS => {
copy_file_state.has_copy_file_range_failed = true;
debug("copy_file_range is NOT supported", .{});
can_use_copy_file_range.store(-1, .monotonic);
},
.INTR => continue,
else => {
// failed for some other reason
copy_file_state.has_copy_file_range_failed = true;
},
}
break;
}
}
while (!copy_file_state.has_sendfile_failed) {
const rc = std.os.linux.sendfile(@intCast(out), @intCast(in), null, len);
bun.sys.syslog("sendfile({d}, {d}, {d}) = {d}", .{ in, out, len, rc });
switch (bun.sys.getErrno(rc)) {
.SUCCESS => return .{ .result = @intCast(rc) },
.INTR => continue,
// these may not be regular files, try fallback
.INVAL => {
copy_file_state.has_sendfile_failed = true;
},
// This shouldn't happen?
.XDEV => {
copy_file_state.has_seen_exdev = true;
copy_file_state.has_sendfile_failed = true;
},
// they might not support it
.OPNOTSUPP, .NOSYS => {
copy_file_state.has_sendfile_failed = true;
},
else => {
// failed for some other reason, fallback to read-write loop
copy_file_state.has_sendfile_failed = true;
},
}
break;
}
return copyFileReadWriteLoop(in, out, len);
}
pub fn copyFileReadWriteLoop(
in: fd_t,
out: fd_t,
len: usize,
) Maybe(usize) {
var buf: [8 * 4096]u8 = undefined;
const adjusted_count = @min(buf.len, len);
switch (bun.sys.read(.fromNative(in), buf[0..adjusted_count])) {
.result => |amt_read| {
var amt_written: usize = 0;
if (amt_read == 0) return .{ .result = 0 };
while (amt_written < amt_read) {
switch (bun.sys.write(.fromNative(out), buf[amt_written..amt_read])) {
.result => |wrote| {
if (wrote == 0) {
return .{ .result = amt_written };
}
amt_written += wrote;
},
.err => |err| return .{ .err = err },
}
}
if (amt_read == 0) return .{ .result = 0 };
return .{ .result = amt_read };
},
.err => |err| return .{ .err = err },
}
}
const debug = bun.Output.scoped(.copy_file, .hidden);
const bun = @import("bun");
const Environment = bun.Environment;
const Maybe = bun.sys.Maybe;
const Platform = bun.analytics.GenerateHeader.GeneratePlatform;
const std = @import("std");
const math = std.math;
const posix = std.posix;
const fd_t = std.posix.fd_t;