mirror of
https://github.com/oven-sh/bun
synced 2026-02-16 05:42:43 +00:00
* initial glob impl * Add `Bun.globMatch` * Glob boilerplate setup * Experiment with Rust glob implementation * Rust impl is slow revert * Setup glob walking * Basic glob walker working * Fix segfault * Recursive directory traversal * Fix glob match non-ascii * Make faster lil bit * use arena * ASCII fast path * Experiment with packed codepoint cursor Results in ~4% perf boost if the glob pattern needs to create/manipulate cursors (for example when the pattern uses braces) * Try converting to u32 array Made it pretty slow * Lazily create codepoint buffer * Different walk algorithm * Fast path optimizations * Add `dot` option to `Glob` * . * Fix some bugs * Fix bug, clean up lil bit * Windows fix * Non absolute paths * use specific version of fast-glob for benchmarks and tests * . * Fix some stuff * Fix more stuff * Add `hasPendingActivity()` to glob * accident * Symlinks * fast-glob e2e tests * remove * woops * Fix relative paths * Fix absolute * add test for `onlyFiles` * Fix invalid surrogate pairs problem * Rename: `match/matchSync` -> `scan/scanSync` and `matchString` -> `match` * forgot to close cwd fd * Update types * Add stress test * Port `micromatch` / `glob-match` / `globlin` tests * fix stale reference arena thing * stupid bug * Add builtins to classes code generator and add `Glob.scanIter()` * all iterables * generate fixtures, remove from git * fix test * Fix * woops on test * Fix stuff licenses license `has_pending_activity` to usize cwd threadSafe fix atomic compile errors `GlobWalker` own `cwd` Fix windows path and absolute test stuff * Fixes * Fix stuff * Use Syscall.close * Use private symbols for underlying scan functions to preevent misuse * Update types * Fix build for zig * Fix tests * Fix more tests * Prevent these tests from GC'ing too much * Make this benchmark work in Node and Bun * Fix memory leak * Add leak test * Fix windows * comment about arena allocator use for glob walker * Make leak test run in separate process * Iterator api for glob * GlobWalker.Iterator * fix leak test * Remove old impl * filter functions wip start * stuff * wip lockfile use glob * glob working with lockfile * revert lockfile changes * Update bun.lockb * Manually set to cwd to prevent test failing on linux CI --------- Co-authored-by: Jarred Sumner <jarred@jarredsumner.com> Co-authored-by: Jarred Sumner <709451+Jarred-Sumner@users.noreply.github.com>
378 lines
14 KiB
Zig
378 lines
14 KiB
Zig
// This is copied from std.fs.IterableDir.Iterator
|
|
// The differences are:
|
|
// - it returns errors in the expected format
|
|
// - doesn't mark BADF as unreachable
|
|
// - It uses PathString instead of []const u8
|
|
|
|
const builtin = @import("builtin");
|
|
const std = @import("std");
|
|
const os = std.os;
|
|
|
|
const Dir = std.fs.Dir;
|
|
const JSC = @import("root").bun.JSC;
|
|
const PathString = JSC.PathString;
|
|
const bun = @import("root").bun;
|
|
|
|
const IteratorError = error{ AccessDenied, SystemResources } || os.UnexpectedError;
|
|
const mem = std.mem;
|
|
const strings = @import("root").bun.strings;
|
|
const Maybe = JSC.Maybe;
|
|
const File = std.fs.File;
|
|
pub const IteratorResult = struct {
|
|
name: PathString,
|
|
kind: Entry.Kind,
|
|
};
|
|
|
|
const Result = Maybe(?IteratorResult);
|
|
|
|
const Entry = JSC.Node.Dirent;
|
|
|
|
pub const Iterator = switch (builtin.os.tag) {
|
|
.macos, .ios, .freebsd, .netbsd, .dragonfly, .openbsd, .solaris => struct {
|
|
dir: Dir,
|
|
seek: i64,
|
|
buf: [8192]u8, // TODO align(@alignOf(os.system.dirent)),
|
|
index: usize,
|
|
end_index: usize,
|
|
|
|
const Self = @This();
|
|
|
|
pub const Error = IteratorError;
|
|
|
|
/// Memory such as file names referenced in this returned entry becomes invalid
|
|
/// with subsequent calls to `next`, as well as when this `Dir` is deinitialized.
|
|
const next = switch (builtin.os.tag) {
|
|
.macos, .ios => nextDarwin,
|
|
// .freebsd, .netbsd, .dragonfly, .openbsd => nextBsd,
|
|
// .solaris => nextSolaris,
|
|
else => @compileError("unimplemented"),
|
|
};
|
|
|
|
fn nextDarwin(self: *Self) Result {
|
|
start_over: while (true) {
|
|
if (self.index >= self.end_index) {
|
|
const rc = os.system.__getdirentries64(
|
|
self.dir.fd,
|
|
&self.buf,
|
|
self.buf.len,
|
|
&self.seek,
|
|
);
|
|
|
|
if (rc < 1) {
|
|
if (rc == 0) return Result{ .result = null };
|
|
if (Result.errnoSys(rc, .getdirentries64)) |err| {
|
|
return err;
|
|
}
|
|
}
|
|
|
|
self.index = 0;
|
|
self.end_index = @as(usize, @intCast(rc));
|
|
}
|
|
const darwin_entry = @as(*align(1) os.system.dirent, @ptrCast(&self.buf[self.index]));
|
|
const next_index = self.index + darwin_entry.reclen();
|
|
self.index = next_index;
|
|
|
|
const name = @as([*]u8, @ptrCast(&darwin_entry.d_name))[0..darwin_entry.d_namlen];
|
|
|
|
if (strings.eqlComptime(name, ".") or strings.eqlComptime(name, "..") or (darwin_entry.d_ino == 0)) {
|
|
continue :start_over;
|
|
}
|
|
|
|
const entry_kind = switch (darwin_entry.d_type) {
|
|
os.DT.BLK => Entry.Kind.block_device,
|
|
os.DT.CHR => Entry.Kind.character_device,
|
|
os.DT.DIR => Entry.Kind.directory,
|
|
os.DT.FIFO => Entry.Kind.named_pipe,
|
|
os.DT.LNK => Entry.Kind.sym_link,
|
|
os.DT.REG => Entry.Kind.file,
|
|
os.DT.SOCK => Entry.Kind.unix_domain_socket,
|
|
os.DT.WHT => Entry.Kind.whiteout,
|
|
else => Entry.Kind.unknown,
|
|
};
|
|
return .{
|
|
.result = IteratorResult{
|
|
.name = PathString.init(name),
|
|
.kind = entry_kind,
|
|
},
|
|
};
|
|
}
|
|
}
|
|
},
|
|
|
|
.linux => struct {
|
|
dir: Dir,
|
|
// The if guard is solely there to prevent compile errors from missing `linux.dirent64`
|
|
// definition when compiling for other OSes. It doesn't do anything when compiling for Linux.
|
|
buf: [8192]u8 align(if (builtin.os.tag != .linux) 1 else @alignOf(linux.dirent64)),
|
|
index: usize,
|
|
end_index: usize,
|
|
|
|
const Self = @This();
|
|
const linux = os.linux;
|
|
|
|
pub const Error = IteratorError;
|
|
|
|
/// Memory such as file names referenced in this returned entry becomes invalid
|
|
/// with subsequent calls to `next`, as well as when this `Dir` is deinitialized.
|
|
pub fn next(self: *Self) Result {
|
|
start_over: while (true) {
|
|
if (self.index >= self.end_index) {
|
|
const rc = linux.getdents64(self.dir.fd, &self.buf, self.buf.len);
|
|
if (Result.errnoSys(rc, .getdents64)) |err| return err;
|
|
if (rc == 0) return .{ .result = null };
|
|
self.index = 0;
|
|
self.end_index = rc;
|
|
}
|
|
const linux_entry = @as(*align(1) linux.dirent64, @ptrCast(&self.buf[self.index]));
|
|
const next_index = self.index + linux_entry.reclen();
|
|
self.index = next_index;
|
|
|
|
const name = mem.sliceTo(@as([*:0]u8, @ptrCast(&linux_entry.d_name)), 0);
|
|
|
|
// skip . and .. entries
|
|
if (strings.eqlComptime(name, ".") or strings.eqlComptime(name, "..")) {
|
|
continue :start_over;
|
|
}
|
|
|
|
const entry_kind = switch (linux_entry.d_type) {
|
|
linux.DT.BLK => Entry.Kind.block_device,
|
|
linux.DT.CHR => Entry.Kind.character_device,
|
|
linux.DT.DIR => Entry.Kind.directory,
|
|
linux.DT.FIFO => Entry.Kind.named_pipe,
|
|
linux.DT.LNK => Entry.Kind.sym_link,
|
|
linux.DT.REG => Entry.Kind.file,
|
|
linux.DT.SOCK => Entry.Kind.unix_domain_socket,
|
|
else => Entry.Kind.unknown,
|
|
};
|
|
return .{
|
|
.result = IteratorResult{
|
|
.name = PathString.init(name),
|
|
.kind = entry_kind,
|
|
},
|
|
};
|
|
}
|
|
}
|
|
},
|
|
.windows => struct {
|
|
dir: Dir,
|
|
buf: [8192]u8 align(@alignOf(os.windows.FILE_BOTH_DIR_INFORMATION)),
|
|
index: usize,
|
|
end_index: usize,
|
|
first: bool,
|
|
name_data: [256]u8,
|
|
|
|
const Self = @This();
|
|
|
|
pub const Error = IteratorError;
|
|
|
|
/// Memory such as file names referenced in this returned entry becomes invalid
|
|
/// with subsequent calls to `next`, as well as when this `Dir` is deinitialized.
|
|
pub fn next(self: *Self) Result {
|
|
while (true) {
|
|
const w = os.windows;
|
|
if (self.index >= self.end_index) {
|
|
var io: w.IO_STATUS_BLOCK = undefined;
|
|
const rc = w.ntdll.NtQueryDirectoryFile(
|
|
self.dir.fd,
|
|
null,
|
|
null,
|
|
null,
|
|
&io,
|
|
&self.buf,
|
|
self.buf.len,
|
|
.FileBothDirectoryInformation,
|
|
w.FALSE,
|
|
null,
|
|
if (self.first) @as(w.BOOLEAN, w.TRUE) else @as(w.BOOLEAN, w.FALSE),
|
|
);
|
|
self.first = false;
|
|
if (io.Information == 0) return .{ .result = null };
|
|
self.index = 0;
|
|
self.end_index = io.Information;
|
|
// If the handle is not a directory, we'll get STATUS_INVALID_PARAMETER.
|
|
if (rc == .INVALID_PARAMETER) {
|
|
return .{
|
|
.err = .{
|
|
.errno = @as(bun.sys.Error.Int, @truncate(@intFromEnum(bun.C.SystemErrno.ENOTDIR))),
|
|
.syscall = .NtQueryDirectoryFile,
|
|
},
|
|
};
|
|
}
|
|
|
|
if (rc == .NO_MORE_FILES) {
|
|
self.end_index = self.index;
|
|
return .{ .result = null };
|
|
}
|
|
|
|
if (rc != .SUCCESS) {
|
|
if ((bun.windows.Win32Error.fromNTStatus(rc).toSystemErrno())) |errno| {
|
|
return .{
|
|
.err = .{
|
|
.errno = @truncate(@intFromEnum(errno)),
|
|
.syscall = .NtQueryDirectoryFile,
|
|
},
|
|
};
|
|
}
|
|
|
|
return .{
|
|
.err = .{
|
|
.errno = @truncate(@intFromEnum(bun.C.SystemErrno.EUNKNOWN)),
|
|
.syscall = .NtQueryDirectoryFile,
|
|
},
|
|
};
|
|
}
|
|
}
|
|
|
|
const dir_info: *w.FILE_BOTH_DIR_INFORMATION = @ptrCast(@alignCast(&self.buf[self.index]));
|
|
if (dir_info.NextEntryOffset != 0) {
|
|
self.index += dir_info.NextEntryOffset;
|
|
} else {
|
|
self.index = self.buf.len;
|
|
}
|
|
|
|
const name_utf16le = @as([*]u16, @ptrCast(&dir_info.FileName))[0 .. dir_info.FileNameLength / 2];
|
|
|
|
if (mem.eql(u16, name_utf16le, &[_]u16{'.'}) or mem.eql(u16, name_utf16le, &[_]u16{ '.', '.' }))
|
|
continue;
|
|
// Trust that Windows gives us valid UTF-16LE
|
|
const name_utf8 = strings.fromWPath(self.name_data[0..], name_utf16le);
|
|
const kind = blk: {
|
|
const attrs = dir_info.FileAttributes;
|
|
if (attrs & w.FILE_ATTRIBUTE_DIRECTORY != 0) break :blk Entry.Kind.directory;
|
|
if (attrs & w.FILE_ATTRIBUTE_REPARSE_POINT != 0) break :blk Entry.Kind.sym_link;
|
|
break :blk Entry.Kind.file;
|
|
};
|
|
return .{
|
|
.result = IteratorResult{
|
|
.name = PathString.init(name_utf8),
|
|
.kind = kind,
|
|
},
|
|
};
|
|
}
|
|
}
|
|
},
|
|
.wasi => struct {
|
|
dir: Dir,
|
|
buf: [8192]u8, // TODO align(@alignOf(os.wasi.dirent_t)),
|
|
cookie: u64,
|
|
index: usize,
|
|
end_index: usize,
|
|
|
|
const Self = @This();
|
|
|
|
pub const Error = IteratorError;
|
|
|
|
/// Memory such as file names referenced in this returned entry becomes invalid
|
|
/// with subsequent calls to `next`, as well as when this `Dir` is deinitialized.
|
|
pub fn next(self: *Self) Result {
|
|
// We intentinally use fd_readdir even when linked with libc,
|
|
// since its implementation is exactly the same as below,
|
|
// and we avoid the code complexity here.
|
|
const w = os.wasi;
|
|
start_over: while (true) {
|
|
if (self.index >= self.end_index) {
|
|
var bufused: usize = undefined;
|
|
switch (w.fd_readdir(self.dir.fd, &self.buf, self.buf.len, self.cookie, &bufused)) {
|
|
.SUCCESS => {},
|
|
.BADF => unreachable, // Dir is invalid or was opened without iteration ability
|
|
.FAULT => unreachable,
|
|
.NOTDIR => unreachable,
|
|
.INVAL => unreachable,
|
|
.NOTCAPABLE => return error.AccessDenied,
|
|
else => |err| return os.unexpectedErrno(err),
|
|
}
|
|
if (bufused == 0) return null;
|
|
self.index = 0;
|
|
self.end_index = bufused;
|
|
}
|
|
const entry = @as(*align(1) w.dirent_t, @ptrCast(&self.buf[self.index]));
|
|
const entry_size = @sizeOf(w.dirent_t);
|
|
const name_index = self.index + entry_size;
|
|
const name = mem.span(self.buf[name_index .. name_index + entry.d_namlen]);
|
|
|
|
const next_index = name_index + entry.d_namlen;
|
|
self.index = next_index;
|
|
self.cookie = entry.d_next;
|
|
|
|
// skip . and .. entries
|
|
if (strings.eqlComptime(name, ".") or strings.eqlComptime(name, "..")) {
|
|
continue :start_over;
|
|
}
|
|
|
|
const entry_kind = switch (entry.d_type) {
|
|
.BLOCK_DEVICE => Entry.Kind.block_device,
|
|
.CHARACTER_DEVICE => Entry.Kind.character_device,
|
|
.DIRECTORY => Entry.Kind.directory,
|
|
.SYMBOLIC_LINK => Entry.Kind.sym_link,
|
|
.REGULAR_FILE => Entry.Kind.file,
|
|
.SOCKET_STREAM, .SOCKET_DGRAM => Entry.Kind.unix_domain_socket,
|
|
else => Entry.Kind.unknown,
|
|
};
|
|
return IteratorResult{
|
|
.name = name,
|
|
.kind = entry_kind,
|
|
};
|
|
}
|
|
}
|
|
},
|
|
else => @compileError("unimplemented"),
|
|
};
|
|
|
|
pub const WrappedIterator = struct {
|
|
iter: Iterator,
|
|
const Self = @This();
|
|
|
|
pub const Error = IteratorError;
|
|
|
|
pub inline fn next(self: *Self) Result {
|
|
return self.iter.next();
|
|
}
|
|
};
|
|
|
|
pub fn iterate(self: Dir) WrappedIterator {
|
|
return WrappedIterator{
|
|
.iter = _iterate(self),
|
|
};
|
|
}
|
|
|
|
fn _iterate(self: Dir) Iterator {
|
|
switch (builtin.os.tag) {
|
|
.macos,
|
|
.ios,
|
|
.freebsd,
|
|
.netbsd,
|
|
.dragonfly,
|
|
.openbsd,
|
|
.solaris,
|
|
=> return Iterator{
|
|
.dir = self,
|
|
.seek = 0,
|
|
.index = 0,
|
|
.end_index = 0,
|
|
.buf = undefined,
|
|
},
|
|
.linux, .haiku => return Iterator{
|
|
.dir = self,
|
|
.index = 0,
|
|
.end_index = 0,
|
|
.buf = undefined,
|
|
},
|
|
.windows => return Iterator{
|
|
.dir = self,
|
|
.index = 0,
|
|
.end_index = 0,
|
|
.first = true,
|
|
.buf = undefined,
|
|
.name_data = undefined,
|
|
},
|
|
.wasi => return Iterator{
|
|
.dir = self,
|
|
.cookie = os.wasi.DIRCOOKIE_START,
|
|
.index = 0,
|
|
.end_index = 0,
|
|
.buf = undefined,
|
|
},
|
|
else => @compileError("unimplemented"),
|
|
}
|
|
}
|