fix: handle DT_UNKNOWN in dir_iterator for bind-mounted filesystems (#25838)

### What does this PR do?
Fixes #24007
Possibly fixes https://github.com/oven-sh/bun/issues/18902,
https://github.com/oven-sh/bun/issues/7412

Some filesystems (bind mounts, FUSE, NFS) don't provide `d_type` in
directory entries, returning `DT_UNKNOWN`. This caused glob and
recursive readdir to skip entries entirely.

## Problem
On Linux filesystems that don't populate `d_type` in directory entries
(bind mounts, FUSE, NFS, some ext4 configurations), `readdir()` returns
`DT_UNKNOWN` instead of the actual file type. This caused:
- `Bun.Glob` to skip files/directories entirely
- `fs.readdirSync(..., {recursive: true})` to not recurse into
subdirectories
- `fs.readdirSync(..., {withFileTypes: true})` to report incorrect types

## Solution
Implemented a **lazy `lstatat()` fallback** when `d_type == DT_UNKNOWN`:

- **`sys.zig`**: Added `lstatat()` function - same as `fstatat()` but
with `AT_SYMLINK_NOFOLLOW` flag to correctly identify symlinks
- **`GlobWalker.zig`**: When encountering `.unknown` entries, first
check if filename matches pattern, then call `lstatat()` only if needed
- **`node_fs.zig`**: Handle `.unknown` in both async and sync recursive
readdir paths; propagate resolved kind to Dirent objects
- **`dir_iterator.zig`**: Return `.unknown` for `DT_UNKNOWN` entries,
letting callers handle lazy stat

**Why `lstatat` instead of `fstatat`?** We use `AT_SYMLINK_NOFOLLOW` to
preserve consistent behavior with normal filesystems - symlinks should
be reported as symlinks, not as their target type. This matches [Node.js
behavior](https://github.com/nodejs/node/blob/main/lib/internal/fs/utils.js#L251-L269)
which uses `lstat()` for the DT_UNKNOWN fallback, and follows the lazy
stat pattern established in PR #18172.

### How did you verify your code works?

**Testing:**
- Regression test: `test/regression/issue/24007.test.ts`
- FUSE filesystem test: `test/cli/run/glob-on-fuse.test.ts` (reuses
`fuse-fs.py` from PR #18172, includes symlink verification)
- All existing glob/readdir tests pass
- **Verified in Docker bind-mount environment:**
  - Official Bun: `0 files`
  - Patched Bun: `3 files`

**Performance:** No impact on normal filesystems - the `.unknown` branch
is only hit when `d_type == DT_UNKNOWN`. The lazy stat pattern avoids
unnecessary syscalls by checking pattern match first.

---------

Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com>
Co-authored-by: Jarred Sumner <jarred@jarredsumner.com>
Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
This commit is contained in:
mmitchellg5
2026-01-22 13:44:49 -08:00
committed by GitHub
parent 2a9980076d
commit 85080f7949
6 changed files with 421 additions and 6 deletions

View File

@@ -162,7 +162,7 @@ pub fn NewIterator(comptime use_windows_ospath: bool) type {
continue :start_over;
}
const entry_kind = switch (linux_entry.type) {
const entry_kind: Entry.Kind = switch (linux_entry.type) {
linux.DT.BLK => Entry.Kind.block_device,
linux.DT.CHR => Entry.Kind.character_device,
linux.DT.DIR => Entry.Kind.directory,
@@ -170,6 +170,9 @@ pub fn NewIterator(comptime use_windows_ospath: bool) type {
linux.DT.LNK => Entry.Kind.sym_link,
linux.DT.REG => Entry.Kind.file,
linux.DT.SOCK => Entry.Kind.unix_domain_socket,
// DT_UNKNOWN: Some filesystems (e.g., bind mounts, FUSE, NFS)
// don't provide d_type. Callers should use lstatat() to determine
// the type when needed (lazy stat pattern for performance).
else => Entry.Kind.unknown,
};
return .{

View File

@@ -4499,10 +4499,19 @@ pub const NodeFS = struct {
switch (ExpectedType) {
jsc.Node.Dirent => {
dirent_path.ref();
// On filesystems that return DT_UNKNOWN (e.g. FUSE, bind mounts),
// fall back to lstat to determine the real file kind.
const kind = if (current.kind == .unknown)
switch (Syscall.lstatat(fd, current.name.sliceAssumeZ())) {
.result => |st| bun.sys.kindFromMode(@intCast(st.mode)),
.err => current.kind,
}
else
current.kind;
entries.append(.{
.name = jsc.WebCore.encoding.toBunString(utf8_name, args.encoding),
.path = dirent_path,
.kind = current.kind,
.kind = kind,
}) catch |err| bun.handleOom(err);
},
Buffer => {
@@ -4627,6 +4636,9 @@ pub const NodeFS = struct {
break :brk bun.path.joinZBuf(buf, &path_parts, .auto);
};
// Track effective kind - may be resolved from .unknown via stat
var effective_kind = current.kind;
enqueue: {
switch (current.kind) {
// a symlink might be a directory or might not be
@@ -4646,6 +4658,24 @@ pub const NodeFS = struct {
async_task.enqueue(name_to_copy);
},
// Some filesystems (e.g., Docker bind mounts, FUSE, NFS) return
// DT_UNKNOWN for d_type. Use lstatat to determine the actual type.
.unknown => {
if (current.name.len + 1 + name_to_copy.len > bun.MAX_PATH_BYTES) break :enqueue;
// Lazy stat to determine the actual kind (lstatat to not follow symlinks)
const stat_result = bun.sys.lstatat(fd, current.name.sliceAssumeZ());
switch (stat_result) {
.result => |st| {
const real_kind = bun.sys.kindFromMode(@intCast(st.mode));
effective_kind = real_kind;
if (real_kind == .directory or real_kind == .sym_link) {
async_task.enqueue(name_to_copy);
}
},
.err => {}, // Skip entries we can't stat
}
},
else => {},
}
}
@@ -4662,7 +4692,7 @@ pub const NodeFS = struct {
entries.append(.{
.name = bun.String.cloneUTF8(utf8_name),
.path = dirent_path_prev,
.kind = current.kind,
.kind = effective_kind,
}) catch |err| bun.handleOom(err);
},
Buffer => {
@@ -4774,6 +4804,9 @@ pub const NodeFS = struct {
break :brk bun.path.joinZBuf(buf, &path_parts, .auto);
};
// Track effective kind - may be resolved from .unknown via stat
var effective_kind = current.kind;
enqueue: {
switch (current.kind) {
// a symlink might be a directory or might not be
@@ -4786,6 +4819,24 @@ pub const NodeFS = struct {
if (current.name.len + 1 + name_to_copy.len > bun.MAX_PATH_BYTES) break :enqueue;
stack.writeItem(basename_allocator.dupeZ(u8, name_to_copy) catch break :enqueue) catch break :enqueue;
},
// Some filesystems (e.g., Docker bind mounts, FUSE, NFS) return
// DT_UNKNOWN for d_type. Use lstatat to determine the actual type.
.unknown => {
if (current.name.len + 1 + name_to_copy.len > bun.MAX_PATH_BYTES) break :enqueue;
// Lazy stat to determine the actual kind (lstatat to not follow symlinks)
const stat_result = bun.sys.lstatat(fd, current.name.sliceAssumeZ());
switch (stat_result) {
.result => |st| {
const real_kind = bun.sys.kindFromMode(@intCast(st.mode));
effective_kind = real_kind;
if (real_kind == .directory or real_kind == .sym_link) {
stack.writeItem(basename_allocator.dupeZ(u8, name_to_copy) catch break :enqueue) catch break :enqueue;
}
},
.err => {}, // Skip entries we can't stat
}
},
else => {},
}
}
@@ -4801,7 +4852,7 @@ pub const NodeFS = struct {
entries.append(.{
.name = jsc.WebCore.encoding.toBunString(utf8_name, args.encoding),
.path = dirent_path_prev,
.kind = current.kind,
.kind = effective_kind,
}) catch |err| bun.handleOom(err);
},
Buffer => {

View File

@@ -154,6 +154,12 @@ pub const SyscallAccessor = struct {
};
}
/// Like statat but does not follow symlinks.
pub fn lstatat(handle: Handle, path: [:0]const u8) Maybe(bun.Stat) {
if (comptime bun.Environment.isWindows) return statatWindows(handle.value, path);
return Syscall.lstatat(handle.value, path);
}
pub fn openat(handle: Handle, path: [:0]const u8) !Maybe(Handle) {
return switch (Syscall.openat(handle.value, path, bun.O.DIRECTORY | bun.O.RDONLY, 0)) {
.err => |err| .{ .err = err },
@@ -247,6 +253,24 @@ pub const DirEntryAccessor = struct {
return Syscall.stat(path);
}
/// Like statat but does not follow symlinks.
pub fn lstatat(handle: Handle, path_: [:0]const u8) Maybe(bun.Stat) {
var path: [:0]const u8 = path_;
var buf: bun.PathBuffer = undefined;
if (handle.value) |entry| {
return Syscall.lstatat(entry.fd, path);
}
if (!bun.path.Platform.auto.isAbsolute(path)) {
if (handle.value) |entry| {
const slice = bun.path.joinStringBuf(&buf, [_][]const u8{ entry.dir, path }, .auto);
buf[slice.len] = 0;
path = buf[0..slice.len :0];
}
}
return Syscall.lstat(path);
}
pub fn open(path: [:0]const u8) !Maybe(Handle) {
return openat(.empty, path);
}
@@ -902,6 +926,93 @@ pub fn GlobWalker_(
continue;
},
// Some filesystems (e.g., Docker bind mounts, FUSE, NFS) return
// DT_UNKNOWN for d_type. Use lazy stat to determine the real kind
// only when needed (PR #18172 pattern for performance).
.unknown => {
// First check if name might match pattern (avoid unnecessary stat)
const might_match = this.walker.matchPatternImpl(dir_iter_state.pattern, entry_name);
if (!might_match) continue;
// Need to stat to determine actual kind (lstatat to not follow symlinks)
// Use stack fallback for short names (typical case) to avoid arena allocation
const stackbuf_size = 256;
var stfb = std.heap.stackFallback(stackbuf_size, this.walker.arena.allocator());
const name_z = bun.handleOom(stfb.get().dupeZ(u8, entry_name));
const stat_result = Accessor.lstatat(dir.fd, name_z);
const real_kind = switch (stat_result) {
.result => |st| bun.sys.kindFromMode(@intCast(st.mode)),
.err => continue, // Skip entries we can't stat
};
// Process based on actual kind
switch (real_kind) {
.file => {
const matches = this.walker.matchPatternFile(entry_name, dir_iter_state.component_idx, dir.is_last, dir_iter_state.pattern, dir_iter_state.next_pattern);
if (matches) {
const prepared = try this.walker.prepareMatchedPath(entry_name, dir.dir_path) orelse continue;
return .{ .result = prepared };
}
},
.directory => {
var add_dir: bool = false;
const recursion_idx_bump_ = this.walker.matchPatternDir(dir_iter_state.pattern, dir_iter_state.next_pattern, entry_name, dir_iter_state.component_idx, dir_iter_state.is_last, &add_dir);
if (recursion_idx_bump_) |recursion_idx_bump| {
const subdir_parts: []const []const u8 = &[_][]const u8{
dir.dir_path[0..dir.dir_path.len],
entry_name,
};
const subdir_entry_name = try this.walker.join(subdir_parts);
if (recursion_idx_bump == 2) {
try this.walker.workbuf.append(
this.walker.arena.allocator(),
WorkItem.new(subdir_entry_name, dir_iter_state.component_idx + recursion_idx_bump, .directory),
);
try this.walker.workbuf.append(
this.walker.arena.allocator(),
WorkItem.new(subdir_entry_name, dir_iter_state.component_idx, .directory),
);
} else {
try this.walker.workbuf.append(
this.walker.arena.allocator(),
WorkItem.new(subdir_entry_name, dir_iter_state.component_idx + recursion_idx_bump, .directory),
);
}
}
if (add_dir and !this.walker.only_files) {
const prepared_path = try this.walker.prepareMatchedPath(entry_name, dir.dir_path) orelse continue;
return .{ .result = prepared_path };
}
},
.sym_link => {
if (this.walker.follow_symlinks) {
const subdir_parts: []const []const u8 = &[_][]const u8{
dir.dir_path[0..dir.dir_path.len],
entry_name,
};
const entry_start: u32 = @intCast(if (dir.dir_path.len == 0) 0 else dir.dir_path.len + 1);
const subdir_entry_name = try this.walker.join(subdir_parts);
try this.walker.workbuf.append(
this.walker.arena.allocator(),
WorkItem.newSymlink(subdir_entry_name, dir_iter_state.component_idx, entry_start),
);
} else if (!this.walker.only_files) {
const matches = this.walker.matchPatternFile(entry_name, dir_iter_state.component_idx, dir_iter_state.is_last, dir_iter_state.pattern, dir_iter_state.next_pattern);
if (matches) {
const prepared_path = try this.walker.prepareMatchedPath(entry_name, dir.dir_path) orelse continue;
return .{ .result = prepared_path };
}
}
},
else => {}, // Skip other types (block devices, etc.)
}
continue;
},
else => continue,
}
},

View File

@@ -744,10 +744,11 @@ pub fn fstatat(fd: bun.FileDescriptor, path: [:0]const u8) Maybe(bun.Stat) {
return Maybe(bun.Stat){ .result = stat_buf };
}
/// Like fstatat but does not follow symlinks (uses AT_SYMLINK_NOFOLLOW)
/// Like fstatat but does not follow symlinks (uses AT.SYMLINK_NOFOLLOW).
/// This is the "at" equivalent of lstat.
pub fn lstatat(fd: bun.FileDescriptor, path: [:0]const u8) Maybe(bun.Stat) {
if (Environment.isWindows) {
// On Windows, use O.NOFOLLOW to get lstat behavior (prevents following symlinks)
// Use O.NOFOLLOW to not follow symlinks (FILE_OPEN_REPARSE_POINT on Windows)
return switch (openatWindowsA(fd, path, O.NOFOLLOW, 0)) {
.result => |file| {
defer file.close();

View File

@@ -0,0 +1,121 @@
/**
* Test that Bun.Glob and fs.globSync work correctly on FUSE filesystems
* where d_type returns DT_UNKNOWN.
*
* Related to issue #24007 and PR #18172
*/
import { spawn, type ReadableSubprocess } from "bun";
import { describe, expect, test } from "bun:test";
import { isLinux, tmpdirSync } from "harness";
import fs from "node:fs";
import { join } from "node:path";
describe.skipIf(!isLinux)("glob on a FUSE mount", () => {
async function withFuseMount<T>(fn: (mountpoint: string) => Promise<T>): Promise<T> {
// Use tmpdirSync for empty mount point (tempDir requires file tree)
const mountpoint = tmpdirSync();
let pythonProcess: ReadableSubprocess | undefined = undefined;
let result: T;
let originalError: Error | undefined;
let cleanupError: Error | undefined;
try {
// setup FUSE filesystem (uses fuse-fs.py which returns DT_UNKNOWN)
pythonProcess = spawn({
cmd: ["python3", "fuse-fs.py", "-f", mountpoint],
cwd: __dirname,
stdout: "pipe",
stderr: "pipe",
});
// wait for mount to be ready, also check if Python process exited early
let tries = 0;
while (!fs.existsSync(join(mountpoint, "main.js")) && tries < 250 && pythonProcess.exitCode === null) {
tries++;
await Bun.sleep(5);
}
if (pythonProcess.exitCode !== null && pythonProcess.exitCode !== 0) {
throw new Error(`FUSE process exited early with code ${pythonProcess.exitCode}`);
}
expect(fs.existsSync(join(mountpoint, "main.js"))).toBeTrue();
result = await fn(mountpoint);
} catch (e) {
originalError = e instanceof Error ? e : new Error(String(e));
} finally {
if (pythonProcess) {
try {
// unmount
const umount = spawn({ cmd: ["fusermount", "-u", mountpoint] });
await umount.exited;
// wait for graceful exit
await Promise.race([pythonProcess.exited, Bun.sleep(1000)]);
expect(pythonProcess.exitCode).toBe(0);
} catch (e) {
pythonProcess.kill("SIGKILL");
console.error("python process errored:", await new Response(pythonProcess.stderr).text());
// Capture cleanup error but don't throw inside finally
if (!originalError) {
cleanupError = e instanceof Error ? e : new Error(String(e));
}
}
}
}
// Re-throw errors outside finally block
if (originalError) {
throw originalError;
}
if (cleanupError) {
throw cleanupError;
}
return result!;
}
// Set a long timeout so the test can clean up the filesystem mount itself
// rather than getting interrupted by timeout (matches run-file-on-fuse.test.ts)
test("Bun.Glob.scanSync finds files on FUSE mount", async () => {
await withFuseMount(async mountpoint => {
const glob = new Bun.Glob("*.js");
const results = Array.from(glob.scanSync({ cwd: mountpoint }));
// fuse-fs.py provides main.js and main-symlink.js
expect(results).toContain("main.js");
expect(results.length).toBeGreaterThanOrEqual(1);
});
}, 10000);
test("fs.globSync finds files on FUSE mount", async () => {
await withFuseMount(async mountpoint => {
const results = fs.globSync("*.js", { cwd: mountpoint });
expect(results).toContain("main.js");
expect(results.length).toBeGreaterThanOrEqual(1);
});
}, 10000);
test("fs.readdirSync works on FUSE mount", async () => {
await withFuseMount(async mountpoint => {
const results = fs.readdirSync(mountpoint);
expect(results).toContain("main.js");
expect(results).toContain("main-symlink.js");
});
}, 10000);
test("fs.readdirSync with withFileTypes returns correct types on FUSE mount", async () => {
await withFuseMount(async mountpoint => {
const results = fs.readdirSync(mountpoint, { withFileTypes: true });
const mainJs = results.find(d => d.name === "main.js");
expect(mainJs).toBeDefined();
expect(mainJs!.isFile()).toBe(true);
const symlink = results.find(d => d.name === "main-symlink.js");
expect(symlink).toBeDefined();
expect(symlink!.isSymbolicLink()).toBe(true);
});
}, 10000);
});

View File

@@ -0,0 +1,128 @@
/**
* Regression test for GitHub issue #24007
* https://github.com/oven-sh/bun/issues/24007
*
* Issue: Bun's glob/readdir functionality failed on bind-mounted paths in Docker
* because certain filesystems (sshfs, fuse, NFS, bind mounts) don't provide d_type
* information in directory entries (returns DT_UNKNOWN).
*
* Fix: Added lstatat() fallback when d_type is unknown, following the lazy stat
* pattern from PR #18172.
*
* See also: test/cli/run/glob-on-fuse.test.ts for FUSE filesystem testing.
*/
import { describe, expect, test } from "bun:test";
import { tempDir } from "harness";
import fs from "node:fs";
import path from "node:path";
describe.concurrent("issue #24007 - glob with recursive patterns", () => {
test("recursive glob pattern **/*.ts finds nested files", () => {
using dir = tempDir("issue-24007", {
"server/api/health.get.ts": "export default () => 'ok';",
"server/api/users/list.ts": "export default () => [];",
"server/routes/index.ts": "export default {};",
"server/routes/admin/dashboard.ts": "export default {};",
"config.ts": "export default {};",
});
const cwd = String(dir);
// Test recursive pattern with **
const results = fs.globSync("**/*.ts", { cwd });
expect(results).toContain("config.ts");
expect(results).toContain(path.join("server", "api", "health.get.ts"));
expect(results).toContain(path.join("server", "api", "users", "list.ts"));
expect(results).toContain(path.join("server", "routes", "index.ts"));
expect(results).toContain(path.join("server", "routes", "admin", "dashboard.ts"));
expect(results.length).toBe(5);
});
test("recursive glob pattern server/**/*.ts finds files in subdirectory", () => {
using dir = tempDir("issue-24007-subdir", {
"server/api/health.get.ts": "x",
"server/routes/status.ts": "x",
"other/file.ts": "x",
});
const cwd = String(dir);
const results = fs.globSync("server/**/*.ts", { cwd });
expect(results).toContain(path.join("server", "api", "health.get.ts"));
expect(results).toContain(path.join("server", "routes", "status.ts"));
expect(results).not.toContain(path.join("other", "file.ts"));
expect(results.length).toBe(2);
});
test("top-level glob pattern server/*.ts finds direct children", () => {
using dir = tempDir("issue-24007-toplevel", {
"server/index.ts": "x",
"server/config.ts": "x",
"server/nested/deep.ts": "x",
});
const cwd = String(dir);
const results = fs.globSync("server/*.ts", { cwd });
expect(results).toContain(path.join("server", "index.ts"));
expect(results).toContain(path.join("server", "config.ts"));
expect(results).not.toContain(path.join("server", "nested", "deep.ts"));
expect(results.length).toBe(2);
});
test("Bun.Glob recursive scan finds nested files", () => {
using dir = tempDir("issue-24007-bun-glob", {
"api/health.get.ts": "x",
"api/users/index.ts": "x",
"routes/home.ts": "x",
});
const cwd = String(dir);
const glob = new Bun.Glob("**/*.ts");
const results = Array.from(glob.scanSync({ cwd }));
expect(results).toContain(path.join("api", "health.get.ts"));
expect(results).toContain(path.join("api", "users", "index.ts"));
expect(results).toContain(path.join("routes", "home.ts"));
expect(results.length).toBe(3);
});
test("fs.readdirSync with recursive option finds all files", () => {
using dir = tempDir("issue-24007-readdir", {
"a/b/c/file.txt": "content",
"a/b/file.txt": "content",
"a/file.txt": "content",
"file.txt": "content",
});
const cwd = String(dir);
const results = fs.readdirSync(cwd, { recursive: true });
expect(results).toContain("file.txt");
expect(results).toContain(path.join("a", "file.txt"));
expect(results).toContain(path.join("a", "b", "file.txt"));
expect(results).toContain(path.join("a", "b", "c", "file.txt"));
});
test("fs.readdirSync with recursive and withFileTypes returns correct types", () => {
using dir = tempDir("issue-24007-dirent", {
"dir/subdir/file.txt": "content",
"dir/another.txt": "content",
});
const cwd = String(dir);
const results = fs.readdirSync(cwd, { recursive: true, withFileTypes: true });
// Find the nested file in dir/subdir/
const expectedParent = path.join(cwd, "dir", "subdir");
const nestedFile = results.find(d => d.name === "file.txt" && d.parentPath === expectedParent);
expect(nestedFile).toBeDefined();
expect(nestedFile!.isFile()).toBe(true);
// Find a directory entry
const dirEntry = results.find(d => d.name === "subdir");
expect(dirEntry).toBeDefined();
expect(dirEntry!.isDirectory()).toBe(true);
});
});