Files
bun.sh/src/fs.zig
Jarred Sumner f7ed006a08 ok
2021-05-24 12:44:23 -07:00

769 lines
26 KiB
Zig

const std = @import("std");
usingnamespace @import("global.zig");
const sync = @import("sync.zig");
const alloc = @import("alloc.zig");
const expect = std.testing.expect;
const Mutex = sync.Mutex;
const Semaphore = sync.Semaphore;
const path_handler = @import("./resolver/resolve_path.zig");
const allocators = @import("./allocators.zig");
// pub const FilesystemImplementation = @import("fs_impl.zig");
threadlocal var scratch_lookup_buffer: [256]u8 = undefined;
pub const Preallocate = struct {
pub const Counts = struct {
pub const dir_entry: usize = 1024;
pub const files: usize = 2048;
};
};
pub const FileSystem = struct {
allocator: *std.mem.Allocator,
top_level_dir: string = "/",
fs: Implementation,
dirname_store: *DirnameStore,
filename_store: *FilenameStore,
pub var instance: FileSystem = undefined;
pub const DirnameStore = allocators.BSSStringList(Preallocate.Counts.dir_entry, 256);
pub const FilenameStore = allocators.BSSStringList(Preallocate.Counts.files, 64);
pub const Error = error{
ENOENT,
EACCESS,
INVALID_NAME,
ENOTDIR,
};
pub fn init1(allocator: *std.mem.Allocator, top_level_dir: ?string, enable_watcher: bool) !*FileSystem {
const _top_level_dir = top_level_dir orelse (if (isBrowser) "/project" else try std.process.getCwdAlloc(allocator));
instance = FileSystem{
.allocator = allocator,
.top_level_dir = _top_level_dir,
.fs = Implementation.init(allocator, _top_level_dir, enable_watcher),
// .stats = std.StringHashMap(Stat).init(allocator),
.dirname_store = DirnameStore.init(allocator),
.filename_store = FilenameStore.init(allocator),
};
instance.fs.parent_fs = &instance;
_ = DirEntry.EntryStore.init(allocator);
return &instance;
}
pub const DirEntry = struct {
pub const EntryMap = std.StringHashMap(EntryStore.ListIndex);
pub const EntryStore = allocators.BSSList(Entry, Preallocate.Counts.files);
dir: string,
data: EntryMap,
pub fn addEntry(dir: *DirEntry, entry: std.fs.Dir.Entry) !void {
var _kind: Entry.Kind = undefined;
switch (entry.kind) {
.Directory => {
_kind = Entry.Kind.dir;
},
.SymLink => {
// This might be wrong!
_kind = Entry.Kind.file;
},
.File => {
_kind = Entry.Kind.file;
},
else => {
return;
},
}
// entry.name only lives for the duration of the iteration
var name = FileSystem.FilenameStore.editableSlice(try FileSystem.FilenameStore.instance.append(entry.name));
for (entry.name) |c, i| {
name[i] = std.ascii.toLower(c);
}
var symlink: []u8 = "";
if (entry.kind == std.fs.Dir.Entry.Kind.SymLink) {
symlink = name;
}
const index = try EntryStore.instance.append(Entry{
.base = name,
.dir = dir.dir,
.mutex = Mutex.init(),
// Call "stat" lazily for performance. The "@material-ui/icons" package
// contains a directory with over 11,000 entries in it and running "stat"
// for each entry was a big performance issue for that package.
.need_stat = entry.kind == .SymLink,
.cache = Entry.Cache{
.symlink = symlink,
.kind = _kind,
},
});
try dir.data.put(name, index);
}
pub fn updateDir(i: *DirEntry, dir: string) void {
var iter = i.data.iterator();
i.dir = dir;
while (iter.next()) |entry| {
entry.value.dir = dir;
}
}
pub fn empty(dir: string, allocator: *std.mem.Allocator) DirEntry {
return DirEntry{ .dir = dir, .data = EntryMap.init(allocator) };
}
pub fn init(dir: string, allocator: *std.mem.Allocator) DirEntry {
return DirEntry{ .dir = dir, .data = EntryMap.init(allocator) };
}
pub const Err = struct {
original_err: anyerror,
canonical_error: anyerror,
};
pub fn deinit(d: *DirEntry) void {
d.data.allocator.free(d.dir);
var iter = d.data.iterator();
while (iter.next()) |file_entry| {
EntryStore.instance.at(file_entry.value).?.deinit(d.data.allocator);
}
d.data.deinit();
}
pub fn get(entry: *DirEntry, _query: string) ?Entry.Lookup {
if (_query.len == 0) return null;
var end: usize = 0;
std.debug.assert(scratch_lookup_buffer.len >= _query.len);
for (_query) |c, i| {
scratch_lookup_buffer[i] = std.ascii.toLower(c);
end = i;
}
const query = scratch_lookup_buffer[0 .. end + 1];
const result_index = entry.data.get(query) orelse return null;
const result = EntryStore.instance.at(result_index) orelse return null;
if (!strings.eql(result.base, query)) {
return Entry.Lookup{ .entry = result, .diff_case = Entry.Lookup.DifferentCase{
.dir = entry.dir,
.query = _query,
.actual = result.base,
} };
}
return Entry.Lookup{ .entry = result, .diff_case = null };
}
};
pub const Entry = struct {
cache: Cache = Cache{},
dir: string,
base: string,
mutex: Mutex,
need_stat: bool = true,
pub const Lookup = struct {
entry: *Entry,
diff_case: ?DifferentCase,
pub const DifferentCase = struct {
dir: string,
query: string,
actual: string,
};
};
pub fn deinit(e: *Entry, allocator: *std.mem.Allocator) void {
allocator.free(e.base);
allocator.free(e.dir);
allocator.free(e.cache.symlink);
allocator.destroy(e);
}
pub const Cache = struct {
symlink: string = "",
kind: Kind = Kind.file,
};
pub const Kind = enum {
dir,
file,
};
pub fn kind(entry: *Entry, fs: *Implementation) Kind {
// entry.mutex.lock();
// defer entry.mutex.unlock();
if (entry.need_stat) {
entry.need_stat = false;
entry.cache = fs.kind(entry.dir, entry.base) catch unreachable;
}
return entry.cache.kind;
}
pub fn symlink(entry: *Entry, fs: *Implementation) string {
// entry.mutex.lock();
// defer entry.mutex.unlock();
if (entry.need_stat) {
entry.need_stat = false;
entry.cache = fs.kind(entry.dir, entry.base) catch unreachable;
}
return entry.cache.symlink;
}
};
// pub fn statBatch(fs: *FileSystemEntry, paths: []string) ![]?Stat {
// }
// pub fn stat(fs: *FileSystemEntry, path: string) !Stat {
// }
// pub fn readFile(fs: *FileSystemEntry, path: string) ?string {
// }
// pub fn readDir(fs: *FileSystemEntry, path: string) ?[]string {
// }
pub fn normalize(f: *@This(), str: string) string {
return @call(.{ .modifier = .always_inline }, path_handler.normalizeAndJoin, .{ f.top_level_dir, .auto, str });
}
pub fn join(f: *@This(), parts: anytype) string {
return @call(.{ .modifier = .always_inline }, path_handler.normalizeAndJoinString, .{
f.top_level_dir,
parts,
.auto,
});
}
pub fn joinAlloc(f: *@This(), allocator: *std.mem.Allocator, parts: anytype) !string {
const joined = f.join(parts);
return try allocator.dupe(u8, joined);
}
pub const RealFS = struct {
entries_mutex: Mutex = Mutex.init(),
entries: *EntriesOption.Map,
allocator: *std.mem.Allocator,
do_not_cache_entries: bool = false,
limiter: Limiter,
watcher: ?std.StringHashMap(WatchData) = null,
watcher_mutex: Mutex = Mutex.init(),
cwd: string,
parent_fs: *FileSystem = undefined,
pub fn init(allocator: *std.mem.Allocator, cwd: string, enable_watcher: bool) RealFS {
return RealFS{
.entries = EntriesOption.Map.init(allocator),
.allocator = allocator,
.cwd = cwd,
.limiter = Limiter.init(allocator),
.watcher = if (enable_watcher) std.StringHashMap(WatchData).init(allocator) else null,
};
}
pub const ModKeyError = error{
Unusable,
};
pub const ModKey = struct {
inode: std.fs.File.INode = 0,
size: u64 = 0,
mtime: i128 = 0,
mode: std.fs.File.Mode = 0,
pub fn generate(fs: *RealFS, path: string) anyerror!ModKey {
var file = try std.fs.openFileAbsolute(path, std.fs.File.OpenFlags{ .read = true });
defer file.close();
const stat = try file.stat();
const seconds = @divTrunc(stat.mtime, @as(@TypeOf(stat.mtime), std.time.ns_per_s));
// We can't detect changes if the file system zeros out the modification time
if (seconds == 0 and std.time.ns_per_s == 0) {
return error.Unusable;
}
// Don't generate a modification key if the file is too new
const now = std.time.nanoTimestamp();
const now_seconds = @divTrunc(now, std.time.ns_per_s);
if (seconds > seconds or (seconds == now_seconds and stat.mtime > now)) {
return error.Unusable;
}
return ModKey{
.inode = stat.inode,
.size = stat.size,
.mtime = stat.mtime,
.mode = stat.mode,
// .uid = stat.
};
}
pub const SafetyGap = 3;
};
fn modKeyError(fs: *RealFS, path: string, err: anyerror) void {
if (fs.watcher) |*watcher| {
fs.watcher_mutex.lock();
defer fs.watcher_mutex.unlock();
var state = WatchData.State.file_missing;
switch (err) {
error.Unusable => {
state = WatchData.State.file_unusable_mod_key;
},
else => {},
}
var entry = watcher.getOrPutValue(path, WatchData{ .state = state }) catch unreachable;
entry.value.state = state;
}
}
pub fn modKey(fs: *RealFS, path: string) anyerror!ModKey {
fs.limiter.before();
defer fs.limiter.after();
const key = ModKey.generate(fs, path) catch |err| {
fs.modKeyError(path, err);
return err;
};
if (fs.watcher) |*watcher| {
fs.watcher_mutex.lock();
defer fs.watcher_mutex.unlock();
var entry = watcher.getOrPutValue(path, WatchData{ .state = .file_has_mod_key, .mod_key = key }) catch unreachable;
entry.value.mod_key = key;
}
return key;
}
pub const WatchData = struct {
dir_entries: []string = &([_]string{}),
file_contents: string = "",
mod_key: ModKey = ModKey{},
watch_mutex: Mutex = Mutex.init(),
state: State = State.none,
pub const State = enum {
none,
dir_has_entries,
dir_missing,
file_has_mod_key,
file_need_mod_key,
file_missing,
file_unusable_mod_key,
};
};
pub const EntriesOption = union(Tag) {
entries: DirEntry,
err: DirEntry.Err,
pub const Tag = enum {
entries,
err,
};
// This custom map implementation:
// - Preallocates a fixed amount of directory name space
// - Doesn't store directory names which don't exist.
pub const Map = allocators.BSSMap(EntriesOption, Preallocate.Counts.dir_entry, false, 128);
};
// Limit the number of files open simultaneously to avoid ulimit issues
pub const Limiter = struct {
semaphore: Semaphore,
pub fn init(allocator: *std.mem.Allocator) Limiter {
return Limiter{
.semaphore = Semaphore.init(32),
// .counter = std.atomic.Int(u8).init(0),
// .lock = std.Thread.Mutex.init(),
};
}
// This will block if the number of open files is already at the limit
pub fn before(limiter: *Limiter) void {
limiter.semaphore.wait();
// var added = limiter.counter.fetchAdd(1);
}
pub fn after(limiter: *Limiter) void {
limiter.semaphore.post();
// limiter.counter.decr();
// if (limiter.held) |hold| {
// hold.release();
// limiter.held = null;
// }
}
};
pub fn openDir(fs: *RealFS, unsafe_dir_string: string) std.fs.File.OpenError!std.fs.Dir {
return try std.fs.openDirAbsolute(unsafe_dir_string, std.fs.Dir.OpenDirOptions{ .iterate = true, .access_sub_paths = true, .no_follow = true });
}
fn readdir(
fs: *RealFS,
_dir: string,
handle: std.fs.Dir,
) !DirEntry {
fs.limiter.before();
defer fs.limiter.after();
var iter: std.fs.Dir.Iterator = handle.iterate();
var dir = DirEntry.init(_dir, fs.allocator);
errdefer dir.deinit();
while (try iter.next()) |_entry| {
try dir.addEntry(_entry);
}
return dir;
}
fn readDirectoryError(fs: *RealFS, dir: string, err: anyerror) !*EntriesOption {
if (fs.watcher) |*watcher| {
fs.watcher_mutex.lock();
defer fs.watcher_mutex.unlock();
try watcher.put(dir, WatchData{ .state = .dir_missing });
}
if (!fs.do_not_cache_entries) {
fs.entries_mutex.lock();
defer fs.entries_mutex.unlock();
var get_or_put_result = try fs.entries.getOrPut(dir);
var opt = try fs.entries.put(&get_or_put_result, EntriesOption{
.err = DirEntry.Err{ .original_err = err, .canonical_error = err },
});
return opt;
}
temp_entries_option = EntriesOption{
.err = DirEntry.Err{ .original_err = err, .canonical_error = err },
};
return &temp_entries_option;
}
threadlocal var temp_entries_option: EntriesOption = undefined;
pub fn readDirectory(fs: *RealFS, _dir: string, _handle: ?std.fs.Dir, recursive: bool) !*EntriesOption {
var dir = _dir;
var cache_result: ?allocators.Result = null;
if (!fs.do_not_cache_entries) {
fs.entries_mutex.lock();
defer fs.entries_mutex.unlock();
cache_result = try fs.entries.getOrPut(dir);
if (cache_result.?.hasCheckedIfExists()) {
if (fs.entries.atIndex(cache_result.?.index)) |cached_result| {
return cached_result;
}
}
}
var handle = _handle orelse try fs.openDir(dir);
defer {
if (_handle == null) {
handle.close();
}
}
// if we get this far, it's a real directory, so we can just store the dir name.
if (_handle == null) {
dir = try FilenameStore.instance.append(_dir);
}
// Cache miss: read the directory entries
const entries = fs.readdir(
dir,
handle,
) catch |err| {
return fs.readDirectoryError(dir, err) catch unreachable;
};
// if (fs.watcher) |*watcher| {
// fs.watcher_mutex.lock();
// defer fs.watcher_mutex.unlock();
// var _entries = watcher.iterator();
// const names = try fs.allocator.alloc([]const u8, _entries.len);
// for (_entries) |entry, i| {
// names[i] = try fs.allocator.dupe(u8, entry.key);
// }
// strings.sortAsc(names);
// try watcher.put(
// try fs.allocator.dupe(u8, dir),
// WatchData{ .dir_entries = names, .state = .dir_has_entries },
// );
// }
if (!fs.do_not_cache_entries) {
fs.entries_mutex.lock();
defer fs.entries_mutex.unlock();
const result = EntriesOption{
.entries = entries,
};
return try fs.entries.put(&cache_result.?, result);
}
temp_entries_option = EntriesOption{ .entries = entries };
return &temp_entries_option;
}
fn readFileError(fs: *RealFS, path: string, err: anyerror) void {
if (fs.watcher) |*watcher| {
fs.watcher_mutex.lock();
defer fs.watcher_mutex.unlock();
var res = watcher.getOrPutValue(path, WatchData{ .state = .file_missing }) catch unreachable;
res.value.state = .file_missing;
}
}
pub fn readFile(fs: *RealFS, path: string, _size: ?usize) !File {
fs.limiter.before();
defer fs.limiter.after();
const file: std.fs.File = std.fs.openFileAbsolute(path, std.fs.File.OpenFlags{ .read = true, .write = false }) catch |err| {
fs.readFileError(path, err);
return err;
};
defer file.close();
// Skip the extra file.stat() call when possible
var size = _size orelse (file.getEndPos() catch |err| {
fs.readFileError(path, err);
return err;
});
const file_contents: []u8 = file.readToEndAllocOptions(fs.allocator, size, size, @alignOf(u8), null) catch |err| {
fs.readFileError(path, err);
return err;
};
if (fs.watcher) |*watcher| {
fs.watcher_mutex.lock();
defer fs.watcher_mutex.unlock();
var res = watcher.getOrPutValue(path, WatchData{}) catch unreachable;
res.value.state = .file_need_mod_key;
res.value.file_contents = file_contents;
}
return File{ .path = Path.init(path), .contents = file_contents };
}
pub fn kind(fs: *RealFS, _dir: string, base: string) !Entry.Cache {
var dir = _dir;
var combo = [2]string{ dir, base };
var entry_path = path_handler.normalizeAndJoinString(fs.cwd, &combo, .auto);
fs.limiter.before();
defer fs.limiter.after();
const file = try std.fs.openFileAbsolute(entry_path, .{ .read = true, .write = false });
defer file.close();
var stat = try file.stat();
var _kind = stat.kind;
var cache = Entry.Cache{ .kind = Entry.Kind.file, .symlink = "" };
var symlink: []const u8 = "";
if (_kind == .SymLink) {
// windows has a max filepath of 255 chars
// we give it a little longer for other platforms
var out_buffer = std.mem.zeroes([512]u8);
var out_slice = &out_buffer;
symlink = entry_path;
var links_walked: u8 = 0;
while (links_walked < 255) : (links_walked += 1) {
var link: string = try std.os.readlink(symlink, out_slice);
if (!std.fs.path.isAbsolute(link)) {
combo[0] = dir;
combo[1] = link;
link = path_handler.normalizeAndJoinStringBuf(fs.cwd, out_slice, &combo, .auto);
}
// TODO: do we need to clean the path?
symlink = link;
const file2 = std.fs.openFileAbsolute(symlink, std.fs.File.OpenFlags{ .read = true, .write = false }) catch return cache;
defer file2.close();
const stat2 = file2.stat() catch return cache;
// Re-run "lstat" on the symlink target
_kind = stat2.kind;
if (_kind != .SymLink) {
break;
}
dir = std.fs.path.dirname(link) orelse return cache;
}
if (links_walked > 255) {
return cache;
}
}
if (_kind == .Directory) {
cache.kind = .dir;
} else {
cache.kind = .file;
}
if (symlink.len > 0) {
cache.symlink = try fs.allocator.dupe(u8, symlink);
}
return cache;
}
// // Stores the file entries for directories we've listed before
// entries_mutex: std.Mutex
// entries map[string]entriesOrErr
// // If true, do not use the "entries" cache
// doNotCacheEntries bool
};
pub const Implementation = comptime {
switch (build_target) {
.wasi, .native => return RealFS,
.wasm => return WasmFS,
}
};
};
pub const FileSystemEntry = union(FileSystemEntry.Kind) {
file: File,
directory: Directory,
not_found: FileNotFound,
pub const Kind = enum(u8) {
file,
directory,
not_found,
};
};
pub const Directory = struct { path: Path, contents: []string };
pub const File = struct { path: Path, contents: string };
pub const PathName = struct {
base: string,
dir: string,
ext: string,
// For readability, the names of certain automatically-generated symbols are
// derived from the file name. For example, instead of the CommonJS wrapper for
// a file being called something like "require273" it can be called something
// like "require_react" instead. This function generates the part of these
// identifiers that's specific to the file path. It can take both an absolute
// path (OS-specific) and a path in the source code (OS-independent).
//
// Note that these generated names do not at all relate to the correctness of
// the code as far as avoiding symbol name collisions. These names still go
// through the renaming logic that all other symbols go through to avoid name
// collisions.
pub fn nonUniqueNameString(self: *PathName, allocator: *std.mem.Allocator) !string {
if (strings.eqlComptime(self.base, "index")) {
if (self.dir.len > 0) {
return MutableString.ensureValidIdentifier(PathName.init(self.dir).dir, allocator);
}
}
return MutableString.ensureValidIdentifier(self.base, allocator);
}
pub fn init(_path: string) PathName {
var path = _path;
var base = path;
var ext = path;
var dir = path;
var _i = strings.lastIndexOfChar(path, '/');
while (_i) |i| {
// Stop if we found a non-trailing slash
if (i + 1 != path.len) {
base = path[i + 1 ..];
dir = path[0..i];
break;
}
// Ignore trailing slashes
path = path[0..i];
_i = strings.lastIndexOfChar(path, '/');
}
// Strip off the extension
var _dot = strings.lastIndexOfChar(base, '.');
if (_dot) |dot| {
ext = base[dot..];
base = base[0..dot];
}
return PathName{
.dir = dir,
.base = base,
.ext = ext,
};
}
};
threadlocal var normalize_buf: [1024]u8 = undefined;
threadlocal var join_buf: [1024]u8 = undefined;
pub const Path = struct {
pretty: string,
text: string,
namespace: string = "unspecified",
name: PathName,
is_disabled: bool = false,
pub fn generateKey(p: *Path, allocator: *std.mem.Allocator) !string {
return try std.fmt.allocPrint(allocator, "{s}://{s}", .{ p.namespace, p.text });
}
pub fn init(text: string) Path {
return Path{ .pretty = text, .text = text, .namespace = "file", .name = PathName.init(text) };
}
pub fn initWithNamespace(text: string, namespace: string) Path {
return Path{ .pretty = text, .text = text, .namespace = namespace, .name = PathName.init(text) };
}
pub fn isBefore(a: *Path, b: Path) bool {
return a.namespace > b.namespace ||
(a.namespace == b.namespace and (a.text < b.text ||
(a.text == b.text and (a.flags < b.flags ||
(a.flags == b.flags)))));
}
};
test "PathName.init" {
var file = "/root/directory/file.ext".*;
const res = PathName.init(
&file,
);
try std.testing.expectEqualStrings(res.dir, "/root/directory");
try std.testing.expectEqualStrings(res.base, "file");
try std.testing.expectEqualStrings(res.ext, ".ext");
}
test {}