Compare commits

...

2 Commits

Author SHA1 Message Date
Dylan Conway
825ff9c293 Merge branch 'main' into dylan/glob-codepoint-iterator 2024-10-09 23:58:36 -07:00
Dylan Conway
90f091cc4b first part 2024-10-07 01:48:09 -07:00
6 changed files with 667 additions and 415 deletions

View File

@@ -17,18 +17,20 @@ const JSValue = @import("../bindings/bindings.zig").JSValue;
const ZigString = @import("../bindings/bindings.zig").ZigString;
const Base = @import("../base.zig");
const JSGlobalObject = @import("../bindings/bindings.zig").JSGlobalObject;
const getAllocator = Base.getAllocator;
const ResolvePath = @import("../../resolver/resolve_path.zig");
const isAllAscii = @import("../../string_immutable.zig").isAllASCII;
const CodepointIterator = @import("../../string_immutable.zig").UnsignedCodepointIterator;
const String = bun.String;
const Arena = std.heap.ArenaAllocator;
pub usingnamespace JSC.Codegen.JSGlob;
pub usingnamespace bun.New(Glob);
pattern: []const u8,
pattern_codepoints: ?std.ArrayList(u32) = null,
is_ascii: bool,
// TODO(dylan-conway): delete
pattern_utf8: ?ZigString.Slice,
pattern: String,
has_pending_activity: std.atomic.Value(usize) = std.atomic.Value(usize).init(0),
const ScanOpts = struct {
@@ -242,9 +244,9 @@ fn globWalkResultToJS(globWalk: *GlobWalker, globalThis: *JSGlobalObject) JSValu
/// by `GlobWalker.init`/`GlobWalker.initWithCwd` if all allocations work and no
/// errors occur
fn makeGlobWalker(
this: *Glob,
globalThis: *JSGlobalObject,
arguments: *ArgumentsSlice,
pattern: []const u8,
comptime fnName: []const u8,
alloc: Allocator,
arena: *Arena,
@@ -267,7 +269,7 @@ fn makeGlobWalker(
switch (globWalker.initWithCwd(
arena,
this.pattern,
pattern,
cwd.?,
dot,
absolute,
@@ -294,7 +296,7 @@ fn makeGlobWalker(
globWalker.* = .{};
switch (globWalker.init(
arena,
this.pattern,
pattern,
dot,
absolute,
follow_symlinks,
@@ -318,8 +320,6 @@ pub fn constructor(
globalThis: *JSC.JSGlobalObject,
callframe: *JSC.CallFrame,
) ?*Glob {
const alloc = getAllocator(globalThis);
const arguments_ = callframe.arguments(1);
var arguments = JSC.Node.ArgumentsSlice.init(globalThis.bunVM(), arguments_.slice());
defer arguments.deinit();
@@ -333,40 +333,22 @@ pub fn constructor(
return null;
}
const pat_str: []u8 = @constCast((pat_arg.toSliceClone(globalThis) orelse return null).slice());
const pattern_str = bun.String.fromJSRef(pat_arg, globalThis);
const all_ascii = isAllAscii(pat_str);
var glob = alloc.create(Glob) catch bun.outOfMemory();
glob.* = .{ .pattern = pat_str, .is_ascii = all_ascii };
if (!all_ascii) {
var codepoints = std.ArrayList(u32).initCapacity(alloc, glob.pattern.len * 2) catch {
globalThis.throwOutOfMemory();
return null;
};
errdefer codepoints.deinit();
convertUtf8(&codepoints, glob.pattern) catch {
globalThis.throwOutOfMemory();
return null;
};
glob.pattern_codepoints = codepoints;
}
return glob;
return Glob.new(.{
.pattern = pattern_str,
.pattern_utf8 = null,
});
}
pub fn finalize(
this: *Glob,
) callconv(.C) void {
const alloc = JSC.VirtualMachine.get().allocator;
alloc.free(this.pattern);
if (this.pattern_codepoints) |*codepoints| {
codepoints.deinit();
this.pattern.deref();
if (this.pattern_utf8) |utf8| {
utf8.deinit();
}
alloc.destroy(this);
this.destroy();
}
pub fn hasPendingActivity(this: *Glob) callconv(.C) bool {
@@ -385,20 +367,23 @@ fn decrPendingActivityFlag(has_pending_activity: *std.atomic.Value(usize)) void
}
pub fn __scan(this: *Glob, globalThis: *JSGlobalObject, callframe: *JSC.CallFrame) JSC.JSValue {
const alloc = getAllocator(globalThis);
const arguments_ = callframe.arguments(1);
var arguments = JSC.Node.ArgumentsSlice.init(globalThis.bunVM(), arguments_.slice());
defer arguments.deinit();
var arena = std.heap.ArenaAllocator.init(alloc);
const globWalker = this.makeGlobWalker(globalThis, &arguments, "scan", alloc, &arena) orelse {
const pattern = this.pattern_utf8 orelse pattern: {
this.pattern_utf8 = this.pattern.toUTF8(bun.default_allocator);
break :pattern this.pattern_utf8.?;
};
var arena = std.heap.ArenaAllocator.init(bun.default_allocator);
const globWalker = makeGlobWalker(globalThis, &arguments, pattern.slice(), "scan", bun.default_allocator, &arena) orelse {
arena.deinit();
return .undefined;
};
incrPendingActivityFlag(&this.has_pending_activity);
var task = WalkTask.create(globalThis, alloc, globWalker, &this.has_pending_activity) catch {
var task = WalkTask.create(globalThis, bun.default_allocator, globWalker, &this.has_pending_activity) catch {
decrPendingActivityFlag(&this.has_pending_activity);
globalThis.throwOutOfMemory();
return .undefined;
@@ -409,14 +394,17 @@ pub fn __scan(this: *Glob, globalThis: *JSGlobalObject, callframe: *JSC.CallFram
}
pub fn __scanSync(this: *Glob, globalThis: *JSGlobalObject, callframe: *JSC.CallFrame) JSC.JSValue {
const alloc = getAllocator(globalThis);
const arguments_ = callframe.arguments(1);
var arguments = JSC.Node.ArgumentsSlice.init(globalThis.bunVM(), arguments_.slice());
defer arguments.deinit();
var arena = std.heap.ArenaAllocator.init(alloc);
var globWalker = this.makeGlobWalker(globalThis, &arguments, "scanSync", alloc, &arena) orelse {
const pattern = this.pattern_utf8 orelse pattern: {
this.pattern_utf8 = this.pattern.toUTF8(bun.default_allocator);
break :pattern this.pattern_utf8.?;
};
var arena = std.heap.ArenaAllocator.init(bun.default_allocator);
var globWalker = makeGlobWalker(globalThis, &arguments, pattern.slice(), "scanSync", bun.default_allocator, &arena) orelse {
arena.deinit();
return .undefined;
};
@@ -439,10 +427,6 @@ pub fn __scanSync(this: *Glob, globalThis: *JSGlobalObject, callframe: *JSC.Call
}
pub fn match(this: *Glob, globalThis: *JSGlobalObject, callframe: *JSC.CallFrame) JSC.JSValue {
const alloc = getAllocator(globalThis);
var arena = Arena.init(alloc);
defer arena.deinit();
const arguments_ = callframe.arguments(1);
var arguments = JSC.Node.ArgumentsSlice.init(globalThis.bunVM(), arguments_.slice());
defer arguments.deinit();
@@ -456,37 +440,18 @@ pub fn match(this: *Glob, globalThis: *JSGlobalObject, callframe: *JSC.CallFrame
return .undefined;
}
var str = str_arg.toSlice(globalThis, arena.allocator());
defer str.deinit();
const path_str = String.fromJSRef(str_arg, globalThis);
defer path_str.deref();
if (this.is_ascii and isAllAscii(str.slice())) return JSC.JSValue.jsBoolean(globImplAscii.match(this.pattern, str.slice()));
const codepoints = codepoints: {
if (this.pattern_codepoints) |cp| break :codepoints cp.items[0..];
var codepoints = std.ArrayList(u32).initCapacity(alloc, this.pattern.len * 2) catch {
globalThis.throwOutOfMemory();
return .undefined;
};
errdefer codepoints.deinit();
convertUtf8(&codepoints, this.pattern) catch {
globalThis.throwOutOfMemory();
return .undefined;
};
this.pattern_codepoints = codepoints;
break :codepoints codepoints.items[0..codepoints.items.len];
};
return if (globImpl.matchImpl(codepoints, str.slice()).matches()) .true else .false;
}
pub fn convertUtf8(codepoints: *std.ArrayList(u32), pattern: []const u8) !void {
const iter = CodepointIterator.init(pattern);
var cursor = CodepointIterator.Cursor{};
while (iter.next(&cursor)) {
try codepoints.append(@intCast(cursor.c));
if (path_str.isUTF16()) {
if (this.pattern.isUTF16()) {
return if (globImpl.match(.utf16, this.pattern.utf16(), .utf16, path_str.utf16()).matches()) .true else .false;
}
return if (globImpl.match(.latin1, this.pattern.latin1(), .utf16, path_str.utf16()).matches()) .true else .false;
}
if (this.pattern.isUTF16()) {
return if (globImpl.match(.utf16, this.pattern.utf16(), .latin1, path_str.latin1()).matches()) .true else .false;
}
return if (globImpl.match(.latin1, this.pattern.latin1(), .latin1, path_str.latin1()).matches()) .true else .false;
}

View File

@@ -7,6 +7,7 @@ const Global = bun.Global;
const strings = bun.strings;
const JSON = bun.JSON;
const Glob = @import("../glob.zig");
const OOM = bun.OOM;
const Package = @import("../install/lockfile.zig").Package;
@@ -133,7 +134,7 @@ pub const FilterSet = struct {
}
const Pattern = struct {
codepoints: []u32,
bytes: []const u8,
kind: enum {
name,
path,
@@ -141,8 +142,8 @@ pub const FilterSet = struct {
// negate: bool = false,
};
pub fn init(allocator: std.mem.Allocator, filters: []const []const u8, cwd: []const u8) !FilterSet {
var buf: bun.PathBuffer = undefined;
pub fn init(allocator: std.mem.Allocator, filters: []const []const u8, cwd: []const u8) OOM!FilterSet {
var path_buf: bun.PathBuffer = undefined;
// TODO fixed buffer allocator with fallback?
var list = try std.ArrayList(Pattern).initCapacity(allocator, filters.len);
var self = FilterSet{ .allocator = allocator, .filters = &.{} };
@@ -156,20 +157,22 @@ pub const FilterSet = struct {
const is_path = filter_utf8.len > 0 and filter_utf8[0] == '.';
if (is_path) {
const parts = [_]string{filter_utf8};
filter_utf8 = bun.path.joinAbsStringBuf(cwd, &buf, &parts, .auto);
filter_utf8 = bun.path.joinAbsStringBuf(cwd, &path_buf, &parts, .auto);
}
var filter_utf32 = try std.ArrayListUnmanaged(u32).initCapacity(allocator, filter_utf8.len + 1);
var codepointer_iter = strings.UnsignedCodepointIterator.init(filter_utf8);
var cursor = strings.UnsignedCodepointIterator.Cursor{};
while (codepointer_iter.next(&cursor)) {
if (cursor.c == @as(u32, '\\')) {
try filter_utf32.append(self.allocator, cursor.c);
var bytes = try std.ArrayListUnmanaged(u8).initCapacity(allocator, filter_utf8.len + 1);
// TODO(dylan-conway): investigate if this is necessary
for (filter_utf8) |c| {
if (c == '\\') {
try bytes.append(allocator, c);
}
try filter_utf32.append(self.allocator, cursor.c);
try bytes.append(allocator, c);
}
self.has_name_filters = self.has_name_filters or !is_path;
try list.append(.{
.codepoints = filter_utf32.items,
.bytes = bytes.items,
.kind = if (is_path) .path else .name,
});
}
@@ -180,14 +183,14 @@ pub const FilterSet = struct {
pub fn deinit(self: *FilterSet) void {
for (self.filters) |filter| {
// TODO is this free correct? we're freeing only part of the array
self.allocator.free(filter.codepoints);
self.allocator.free(filter.bytes);
}
self.allocator.free(self.filters);
}
pub fn matchesPath(self: *const FilterSet, path: []const u8) bool {
for (self.filters) |filter| {
if (Glob.matchImpl(filter.codepoints, path).matches()) {
if (Glob.match(.utf8, filter.bytes, .utf8, path).matches()) {
return true;
}
}
@@ -200,7 +203,7 @@ pub const FilterSet = struct {
.name => name,
.path => path,
};
if (Glob.matchImpl(filter.codepoints, target).matches()) {
if (Glob.match(.utf8, filter.bytes, .utf8, target).matches()) {
return true;
}
}

View File

@@ -18,6 +18,7 @@ const FileSystem = bun.fs.FileSystem;
const path = bun.path;
const glob = bun.glob;
const Table = bun.fmt.Table;
const OOM = bun.OOM;
pub const OutdatedCommand = struct {
pub fn exec(ctx: Command.Context) !void {
@@ -120,10 +121,10 @@ pub const OutdatedCommand = struct {
// TODO: use in `bun pack, publish, run, ...`
const FilterType = union(enum) {
all,
name: []const u32,
path: []const u32,
name: []const u8,
path: []const u8,
pub fn init(pattern: []const u32, is_path: bool) @This() {
pub fn init(pattern: []const u8, is_path: bool) @This() {
return if (is_path) .{
.path = pattern,
} else .{
@@ -144,7 +145,7 @@ pub const OutdatedCommand = struct {
original_cwd: string,
manager: *PackageManager,
filters: []const string,
) error{OutOfMemory}![]const PackageID {
) OOM![]const PackageID {
const lockfile = manager.lockfile;
const packages = lockfile.packages.slice();
const pkg_names = packages.items(.name);
@@ -177,17 +178,7 @@ pub const OutdatedCommand = struct {
continue;
}
const length = bun.simdutf.length.utf32.from.utf8.le(joined_filter);
const convert_buf = try allocator.alloc(u32, length);
const convert_result = bun.simdutf.convert.utf8.to.utf32.with_errors.le(joined_filter, convert_buf);
if (!convert_result.isSuccessful()) {
// nothing would match
converted.* = FilterType.init(&.{}, false);
continue;
}
converted.* = FilterType.init(convert_buf[0..convert_result.count], is_path);
converted.* = FilterType.init(try allocator.dupe(u8, joined_filter), is_path);
}
break :converted_filters buf;
};
@@ -218,14 +209,15 @@ pub const OutdatedCommand = struct {
const abs_res_path = path.joinAbsString(FileSystem.instance.top_level_dir, &[_]string{res_path}, .posix);
if (!glob.matchImpl(pattern, strings.withoutTrailingSlash(abs_res_path)).matches()) {
if (!glob.match(.utf8, pattern, .utf8, strings.withoutTrailingSlash(abs_res_path)).matches()) {
break :matched false;
}
},
.name => |pattern| {
const name = pkg_names[workspace_pkg_id].slice(string_buf);
if (!glob.matchImpl(pattern, name).matches()) {
// TODO(dylan-conway): ascii name
if (!glob.match(.utf8, pattern, .latin1, name).matches()) {
break :matched false;
}
},
@@ -271,17 +263,8 @@ pub const OutdatedCommand = struct {
continue;
}
const length = bun.simdutf.length.utf32.from.utf8.le(arg);
const convert_buf = bun.default_allocator.alloc(u32, length) catch bun.outOfMemory();
const convert_result = bun.simdutf.convert.utf8.to.utf32.with_errors.le(arg, convert_buf);
if (!convert_result.isSuccessful()) {
converted.* = FilterType.init(&.{}, false);
continue;
}
converted.* = FilterType.init(convert_buf[0..convert_result.count], false);
at_least_one_greater_than_zero = at_least_one_greater_than_zero or convert_result.count > 0;
converted.* = FilterType.init(arg, false);
at_least_one_greater_than_zero = true;
}
// nothing will match
@@ -337,7 +320,8 @@ pub const OutdatedCommand = struct {
.path => unreachable,
.name => |name_pattern| {
if (name_pattern.len == 0) continue;
if (!glob.matchImpl(name_pattern, dep.name.slice(string_buf)).matches()) {
// TODO(dylan-conway): ascii dependency name
if (!glob.match(.utf8, name_pattern, .latin1, dep.name.slice(string_buf)).matches()) {
break :match false;
}
},

View File

@@ -224,7 +224,8 @@ pub const PackCommand = struct {
const package_prefix = "package/";
const root_default_ignore_patterns = [_][]const u32{
// TODO(dylan-conway): cleanup
const root_default_ignore_patterns = [_][]const u8{
&.{ 112, 97, 99, 107, 97, 103, 101, 45, 108, 111, 99, 107, 46, 106, 115, 111, 110 }, // package-lock.json
&.{ 121, 97, 114, 110, 46, 108, 111, 99, 107 }, // yarn.lock
&.{ 112, 110, 112, 109, 45, 108, 111, 99, 107, 46, 121, 97, 109, 108 }, // pnpm-lock.yaml
@@ -232,8 +233,9 @@ pub const PackCommand = struct {
&.{ 'b', 'u', 'n', '.', 'l', 'o', 'c', 'k' },
};
// TODO(dylan-conway): cleanup
// pattern, can override
const default_ignore_patterns = [_]struct { []const u32, bool }{
const default_ignore_patterns = [_]struct { []const u8, bool }{
.{ &.{ '.', '*', '.', 's', 'w', 'p' }, true },
.{ &.{ 46, 95, 42 }, true }, // "._*",
.{ &.{ 46, 68, 83, 95, 83, 116, 111, 114, 101 }, true }, // ".DS_Store",
@@ -341,7 +343,7 @@ pub const PackCommand = struct {
// normally the behavior of `index.js` and `**/index.js` are the same,
// but includes require `**/`
const match_path = if (include.@"leading **/") entry_name else entry_subpath;
switch (glob.matchImpl(include.glob, match_path)) {
switch (glob.match(.utf8, include.glob, .utf8, match_path)) {
.match => included = true,
.negate_no_match => included = false,
@@ -438,9 +440,9 @@ pub const PackCommand = struct {
if (isExcluded(entry, entry_subpath, dir_depth, ignores.items)) |used_pattern_info| {
if (comptime log_level.isVerbose()) {
const pattern, const kind = used_pattern_info;
Output.prettyln("<r><blue>ignore<r> <d>[{s}:{}]<r> {s}{s}", .{
Output.prettyln("<r><blue>ignore<r> <d>[{s}:{s}]<r> {s}{s}", .{
@tagName(kind),
bun.fmt.debugUtf32PathFormatter(pattern),
pattern,
entry_subpath,
if (entry.kind == .directory) "/" else "",
});
@@ -701,9 +703,9 @@ pub const PackCommand = struct {
if (isExcluded(entry, entry_subpath, dir_depth, &.{})) |used_pattern_info| {
if (comptime log_level.isVerbose()) {
const pattern, const kind = used_pattern_info;
Output.prettyln("<r><blue>ignore<r> <d>[{s}:{}]<r> {s}{s}", .{
Output.prettyln("<r><blue>ignore<r> <d>[{s}:{s}]<r> {s}{s}", .{
@tagName(kind),
bun.fmt.debugUtf32PathFormatter(pattern),
pattern,
entry_subpath,
if (entry.kind == .directory) "/" else "",
});
@@ -799,9 +801,9 @@ pub const PackCommand = struct {
if (isExcluded(entry, entry_subpath, dir_depth, ignores.items)) |used_pattern_info| {
if (comptime log_level.isVerbose()) {
const pattern, const kind = used_pattern_info;
Output.prettyln("<r><blue>ignore<r> <d>[{s}:{}]<r> {s}{s}", .{
Output.prettyln("<r><blue>ignore<r> <d>[{s}:{s}]<r> {s}{s}", .{
@tagName(kind),
bun.fmt.debugUtf32PathFormatter(pattern),
pattern,
entry_subpath,
if (entry.kind == .directory) "/" else "",
});
@@ -958,7 +960,7 @@ pub const PackCommand = struct {
entry_subpath: stringZ,
dir_depth: usize,
ignores: []const IgnorePatterns,
) ?struct { []const u32, IgnorePatterns.Kind } {
) ?struct { []const u8, IgnorePatterns.Kind } {
const entry_name = entry.name.slice();
if (dir_depth == 1) {
@@ -982,7 +984,7 @@ pub const PackCommand = struct {
// check default ignores that only apply to the root project directory
for (root_default_ignore_patterns) |pattern| {
switch (glob.matchImpl(pattern, entry_name)) {
switch (glob.match(.latin1, pattern, .utf8, entry_name)) {
.match => {
// cannot be reversed
return .{
@@ -1000,7 +1002,7 @@ pub const PackCommand = struct {
}
}
var ignore_pattern: []const u32 = &.{};
var ignore_pattern: []const u8 = &.{};
var ignore_kind: IgnorePatterns.Kind = .@".npmignore";
// then check default ignore list. None of the defaults contain slashes
@@ -1009,7 +1011,7 @@ pub const PackCommand = struct {
for (default_ignore_patterns) |pattern_info| {
const pattern, const can_override = pattern_info;
switch (glob.matchImpl(pattern, entry_name)) {
switch (glob.match(.latin1, pattern, .utf8, entry_name)) {
.match => {
if (can_override) {
ignored = true;
@@ -1051,7 +1053,7 @@ pub const PackCommand = struct {
if (pattern.dirs_only and entry.kind != .directory) continue;
const match_path = if (pattern.rel_path) rel else entry_name;
switch (glob.matchImpl(pattern.glob, match_path)) {
switch (glob.match(.utf8, pattern.glob, .utf8, match_path)) {
.match => {
ignored = true;
ignore_pattern = pattern.glob;
@@ -2018,7 +2020,7 @@ pub const PackCommand = struct {
/// from .npmignore, .gitignore, or `files`
/// in package.json
const Pattern = struct {
glob: []const u32,
glob: []const u8,
/// beginning or middle slash (leading slash was trimmed)
rel_path: bool,
// can only match directories (had an ending slash, also trimmed)
@@ -2068,20 +2070,16 @@ pub const PackCommand = struct {
break :check_slashes .{ leading_or_middle_slash, trailing_slash, skipped_negate };
};
const length = bun.simdutf.length.utf32.from.utf8.le(remain) + @intFromBool(add_negate);
const buf = try allocator.alloc(u32, length);
const result = bun.simdutf.convert.utf8.to.utf32.with_errors.le(remain, buf[@intFromBool(add_negate)..]);
if (!result.isSuccessful()) {
allocator.free(buf);
return null;
}
const result = try allocator.alloc(u8, remain.len + @intFromBool(add_negate));
var glob_remain = result;
if (add_negate) {
buf[0] = '!';
glob_remain[0] = '!';
glob_remain = glob_remain[1..];
}
@memcpy(glob_remain, remain);
return .{
.glob = buf[0 .. result.count + @intFromBool(add_negate)],
.glob = result,
.rel_path = has_leading_or_middle_slash,
.@"leading **/" = @"has leading **/, (could start with '!')",
.dirs_only = has_trailing_slash,

View File

@@ -300,25 +300,6 @@ pub inline fn utf16(slice_: []const u16) FormatUTF16 {
return FormatUTF16{ .buf = slice_ };
}
/// Debug, this does not handle invalid utf32
pub inline fn debugUtf32PathFormatter(path: []const u32) DebugUTF32PathFormatter {
return DebugUTF32PathFormatter{ .path = path };
}
pub const DebugUTF32PathFormatter = struct {
path: []const u32,
pub fn format(this: @This(), comptime _: []const u8, _: anytype, writer: anytype) !void {
var path_buf: bun.PathBuffer = undefined;
const result = bun.simdutf.convert.utf32.to.utf8.with_errors.le(this.path, &path_buf);
const converted = if (result.isSuccessful())
path_buf[0..result.count]
else
"Invalid UTF32!";
try writer.writeAll(converted);
}
};
pub const FormatUTF16 = struct {
buf: []const u16,
path_fmt_opts: ?PathFormatOptions = null,

File diff suppressed because it is too large Load Diff