mirror of
https://github.com/oven-sh/bun
synced 2026-02-09 18:38:55 +00:00
Fixes ENG-21287
Build times, from `bun run build && echo '//' >> src/main.zig && time
bun run build`
|Platform|0.14.1|0.15.2|Speedup|
|-|-|-|-|
|macos debug asan|126.90s|106.27s|1.19x|
|macos debug noasan|60.62s|50.85s|1.19x|
|linux debug asan|292.77s|241.45s|1.21x|
|linux debug noasan|146.58s|130.94s|1.12x|
|linux debug use_llvm=false|n/a|78.27s|1.87x|
|windows debug asan|177.13s|142.55s|1.24x|
Runtime performance:
- next build memory usage may have gone up by 5%. Otherwise seems the
same. Some code with writers may have gotten slower, especially one
instance of a counting writer and a few instances of unbuffered writers
that now have vtable overhead.
- File size reduced by 800kb (from 100.2mb to 99.4mb)
Improvements:
- `@export` hack is no longer needed for watch
- native x86_64 backend for linux builds faster. to use it, set use_llvm
false and no_link_obj false. also set `ASAN_OPTIONS=detect_leaks=0`
otherwise it will spam the output with tens of thousands of lines of
debug info errors. may need to use the zig lldb fork for debugging.
- zig test-obj, which we will be able to use for zig unit tests
Still an issue:
- false 'dependency loop' errors remain in watch mode
- watch mode crashes observed
Follow-up:
- [ ] search `comptime Writer: type` and `comptime W: type` and remove
- [ ] remove format_mode in our zig fork
- [ ] remove deprecated.zig autoFormatLabelFallback
- [ ] remove deprecated.zig autoFormatLabel
- [ ] remove deprecated.BufferedWriter and BufferedReader
- [ ] remove override_no_export_cpp_apis as it is no longer needed
- [ ] css Parser(W) -> Parser, and remove all the comptime writer: type
params
- [ ] remove deprecated writer fully
Files that add lines:
```
649 src/deprecated.zig
167 scripts/pack-codegen-for-zig-team.ts
54 scripts/cleartrace-impl.js
46 scripts/cleartrace.ts
43 src/windows.zig
18 src/fs.zig
17 src/bun.js/ConsoleObject.zig
16 src/output.zig
12 src/bun.js/test/debug.zig
12 src/bun.js/node/node_fs.zig
8 src/env_loader.zig
7 src/css/printer.zig
7 src/cli/init_command.zig
7 src/bun.js/node.zig
6 src/string/escapeRegExp.zig
6 src/install/PnpmMatcher.zig
5 src/bun.js/webcore/Blob.zig
4 src/crash_handler.zig
4 src/bun.zig
3 src/install/lockfile/bun.lock.zig
3 src/cli/update_interactive_command.zig
3 src/cli/pack_command.zig
3 build.zig
2 src/Progress.zig
2 src/install/lockfile/lockfile_json_stringify_for_debugging.zig
2 src/css/small_list.zig
2 src/bun.js/webcore/prompt.zig
1 test/internal/ban-words.test.ts
1 test/internal/ban-limits.json
1 src/watcher/WatcherTrace.zig
1 src/transpiler.zig
1 src/shell/builtin/cp.zig
1 src/js_printer.zig
1 src/io/PipeReader.zig
1 src/install/bin.zig
1 src/css/selectors/selector.zig
1 src/cli/run_command.zig
1 src/bun.js/RuntimeTranspilerStore.zig
1 src/bun.js/bindings/JSRef.zig
1 src/bake/DevServer.zig
```
Files that remove lines:
```
-1 src/test/recover.zig
-1 src/sql/postgres/SocketMonitor.zig
-1 src/sql/mysql/MySQLRequestQueue.zig
-1 src/sourcemap/CodeCoverage.zig
-1 src/css/values/color_js.zig
-1 src/compile_target.zig
-1 src/bundler/linker_context/convertStmtsForChunk.zig
-1 src/bundler/bundle_v2.zig
-1 src/bun.js/webcore/blob/read_file.zig
-1 src/ast/base.zig
-2 src/sql/postgres/protocol/ArrayList.zig
-2 src/shell/builtin/mkdir.zig
-2 src/install/PackageManager/patchPackage.zig
-2 src/install/PackageManager/PackageManagerDirectories.zig
-2 src/fmt.zig
-2 src/css/declaration.zig
-2 src/css/css_parser.zig
-2 src/collections/baby_list.zig
-2 src/bun.js/bindings/ZigStackFrame.zig
-2 src/ast/E.zig
-3 src/StandaloneModuleGraph.zig
-3 src/deps/picohttp.zig
-3 src/deps/libuv.zig
-3 src/btjs.zig
-4 src/threading/Futex.zig
-4 src/shell/builtin/touch.zig
-4 src/meta.zig
-4 src/install/lockfile.zig
-4 src/css/selectors/parser.zig
-5 src/shell/interpreter.zig
-5 src/css/error.zig
-5 src/bun.js/web_worker.zig
-5 src/bun.js.zig
-6 src/cli/test_command.zig
-6 src/bun.js/VirtualMachine.zig
-6 src/bun.js/uuid.zig
-6 src/bun.js/bindings/JSValue.zig
-9 src/bun.js/test/pretty_format.zig
-9 src/bun.js/api/BunObject.zig
-14 src/install/install_binding.zig
-14 src/fd.zig
-14 src/bun.js/node/path.zig
-14 scripts/pack-codegen-for-zig-team.sh
-17 src/bun.js/test/diff_format.zig
```
`git diff --numstat origin/main...HEAD | awk '{ print ($1-$2)"\t"$3 }' |
sort -rn`
---------
Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
Co-authored-by: Dylan Conway <dylan.conway567@gmail.com>
Co-authored-by: Meghan Denny <meghan@bun.com>
Co-authored-by: tayor.fish <contact@taylor.fish>
4644 lines
168 KiB
Zig
4644 lines
168 KiB
Zig
pub const interpret = @import("./interpreter.zig");
|
||
pub const subproc = @import("./subproc.zig");
|
||
|
||
pub const AllocScope = @import("./AllocScope.zig");
|
||
|
||
pub const EnvMap = interpret.EnvMap;
|
||
pub const EnvStr = interpret.EnvStr;
|
||
pub const Interpreter = interpret.Interpreter;
|
||
pub const ParsedShellScript = interpret.ParsedShellScript;
|
||
pub const Subprocess = subproc.ShellSubprocess;
|
||
pub const ExitCode = interpret.ExitCode;
|
||
pub const IOWriter = Interpreter.IOWriter;
|
||
pub const IOReader = Interpreter.IOReader;
|
||
// pub const IOWriter = interpret.IOWriter;
|
||
// pub const SubprocessMini = subproc.ShellSubprocessMini;
|
||
|
||
pub const Yield = @import("./Yield.zig").Yield;
|
||
pub const unreachableState = interpret.unreachableState;
|
||
|
||
const GlobWalker = bun.glob.GlobWalker(null, true);
|
||
// const GlobWalker = Glob.BunGlobWalker;
|
||
|
||
pub const SUBSHELL_TODO_ERROR = "Subshells are not implemented, please open GitHub issue!";
|
||
|
||
/// Using these instead of the file descriptor decl literals to make sure we use LivUV fds on Windows
|
||
pub const STDIN_FD: bun.FileDescriptor = .fromUV(0);
|
||
pub const STDOUT_FD: bun.FileDescriptor = .fromUV(1);
|
||
pub const STDERR_FD: bun.FileDescriptor = .fromUV(2);
|
||
|
||
pub const POSIX_DEV_NULL: [:0]const u8 = "/dev/null";
|
||
pub const WINDOWS_DEV_NULL: [:0]const u8 = "NUL";
|
||
|
||
/// The strings in this type are allocated with event loop ctx allocator
|
||
pub const ShellErr = union(enum) {
|
||
sys: jsc.SystemError,
|
||
custom: []const u8,
|
||
invalid_arguments: struct { val: []const u8 = "" },
|
||
todo: []const u8,
|
||
|
||
pub fn newSys(e: anytype) @This() {
|
||
return .{
|
||
.sys = switch (@TypeOf(e)) {
|
||
Syscall.Error => e.toShellSystemError(),
|
||
jsc.SystemError => e,
|
||
else => @compileError("Invalid `e`: " ++ @typeName(e)),
|
||
},
|
||
};
|
||
}
|
||
|
||
pub fn format(this: *const ShellErr, writer: *std.Io.Writer) !void {
|
||
return switch (this.*) {
|
||
.sys => |e| writer.print("bun: {f}: {f}", .{ e.message, e.path }),
|
||
.custom => |msg| writer.print("bun: {s}", .{msg}),
|
||
.invalid_arguments => |args| writer.print("bun: invalid arguments: {s}", .{args.val}),
|
||
.todo => |msg| writer.print("bun: TODO: {s}", .{msg}),
|
||
};
|
||
}
|
||
|
||
pub fn throwJS(this: *const @This(), globalThis: *jsc.JSGlobalObject) bun.JSError {
|
||
defer {
|
||
// basically `transferToJS`. don't want to double deref the sys error
|
||
switch (this.*) {
|
||
.sys => {
|
||
// sys.toErrorInstance handles decrementing the ref count
|
||
},
|
||
.custom, .invalid_arguments, .todo => {
|
||
this.deinit(bun.default_allocator);
|
||
},
|
||
}
|
||
}
|
||
switch (this.*) {
|
||
.sys => {
|
||
const err = this.sys.toErrorInstance(globalThis);
|
||
return globalThis.throwValue(err);
|
||
},
|
||
.custom => {
|
||
const err_value = bun.String.cloneUTF8(this.custom).toErrorInstance(globalThis);
|
||
return globalThis.throwValue(err_value);
|
||
// this.bunVM().allocator.free(jsc.ZigString.untagged(str._unsafe_ptr_do_not_use)[0..str.len]);
|
||
},
|
||
.invalid_arguments => {
|
||
return globalThis.throwInvalidArguments("{s}", .{this.invalid_arguments.val});
|
||
},
|
||
.todo => {
|
||
return globalThis.throwTODO(this.todo);
|
||
},
|
||
}
|
||
}
|
||
|
||
pub fn throwMini(this: @This()) noreturn {
|
||
defer this.deinit(bun.default_allocator);
|
||
switch (this) {
|
||
.sys => |err| {
|
||
bun.Output.prettyErrorln("<r><red>error<r>: Failed due to error: <b>bunsh: {f}: {f}<r>", .{ err.message, err.path });
|
||
},
|
||
.custom => |custom| {
|
||
bun.Output.prettyErrorln("<r><red>error<r>: Failed due to error: <b>{s}<r>", .{custom});
|
||
},
|
||
.invalid_arguments => |invalid_arguments| {
|
||
bun.Output.prettyErrorln("<r><red>error<r>: Failed due to error: <b>bunsh: invalid arguments: {s}<r>", .{invalid_arguments.val});
|
||
},
|
||
.todo => |todo| {
|
||
bun.Output.prettyErrorln("<r><red>error<r>: Failed due to error: <b>TODO: {s}<r>", .{todo});
|
||
},
|
||
}
|
||
bun.Global.exit(1);
|
||
}
|
||
|
||
pub fn deinit(this: *const @This(), allocator: Allocator) void {
|
||
switch (this.*) {
|
||
.sys => {
|
||
this.sys.deref();
|
||
},
|
||
.custom => allocator.free(this.custom),
|
||
.invalid_arguments => {},
|
||
.todo => allocator.free(this.todo),
|
||
}
|
||
}
|
||
};
|
||
|
||
pub fn Result(comptime T: anytype) type {
|
||
return union(enum) {
|
||
result: T,
|
||
err: ShellErr,
|
||
|
||
pub const success: @This() = @This(){
|
||
.result = std.mem.zeroes(T),
|
||
};
|
||
|
||
pub fn asErr(this: @This()) ?ShellErr {
|
||
if (this == .err) return this.err;
|
||
return null;
|
||
}
|
||
};
|
||
}
|
||
|
||
pub const ShellError = error{ Init, Process, GlobalThisThrown, Spawn };
|
||
pub const ParseError = error{
|
||
Unsupported,
|
||
Expected,
|
||
Unexpected,
|
||
Unknown,
|
||
Lex,
|
||
};
|
||
|
||
extern "c" fn setenv(name: [*:0]const u8, value: [*:0]const u8, overwrite: i32) i32;
|
||
|
||
fn setEnv(name: [*:0]const u8, value: [*:0]const u8) void {
|
||
// TODO: windows
|
||
_ = setenv(name, value, 1);
|
||
}
|
||
|
||
/// [0] => read end
|
||
/// [1] => write end
|
||
pub const Pipe = [2]bun.FileDescriptor;
|
||
|
||
const log = bun.Output.scoped(.SHELL, .hidden);
|
||
|
||
pub const GlobalJS = struct {
|
||
globalThis: *jsc.JSGlobalObject,
|
||
|
||
pub inline fn init(g: *jsc.JSGlobalObject) GlobalJS {
|
||
return .{
|
||
.globalThis = g,
|
||
};
|
||
}
|
||
|
||
pub inline fn allocator(this: @This()) Allocator {
|
||
return this.globalThis.bunVM().allocator;
|
||
}
|
||
|
||
pub inline fn eventLoopCtx(this: @This()) *jsc.VirtualMachine {
|
||
return this.globalThis.bunVM();
|
||
}
|
||
|
||
pub inline fn throwInvalidArguments(this: @This(), comptime fmt: []const u8, args: anytype) ShellErr {
|
||
return .{
|
||
.invalid_arguments = .{ .val = bun.handleOom(std.fmt.allocPrint(this.globalThis.bunVM().allocator, fmt, args)) },
|
||
};
|
||
}
|
||
|
||
pub inline fn throwTODO(this: @This(), msg: []const u8) ShellErr {
|
||
return .{
|
||
.todo = bun.handleOom(std.fmt.allocPrint(this.globalThis.bunVM().allocator, "{s}", .{msg})),
|
||
};
|
||
}
|
||
|
||
pub inline fn throwError(this: @This(), err: bun.sys.Error) void {
|
||
this.globalThis.throwValue(err.toJS(this.globalThis));
|
||
}
|
||
|
||
pub inline fn handleError(this: @This(), err: anytype, comptime fmt: []const u8) ShellErr {
|
||
const str = bun.handleOom(std.fmt.allocPrint(this.globalThis.bunVM().allocator, "{s} " ++ fmt, .{@errorName(err)}));
|
||
return .{
|
||
.custom = str,
|
||
};
|
||
}
|
||
|
||
pub inline fn throw(this: @This(), comptime fmt: []const u8, args: anytype) ShellErr {
|
||
const str = bun.handleOom(std.fmt.allocPrint(this.globalThis.bunVM().allocator, fmt, args));
|
||
return .{
|
||
.custom = str,
|
||
};
|
||
}
|
||
|
||
pub inline fn createNullDelimitedEnvMap(this: @This(), alloc: Allocator) ![:null]?[*:0]const u8 {
|
||
return this.globalThis.bunVM().transpiler.env.map.createNullDelimitedEnvMap(alloc);
|
||
}
|
||
|
||
pub inline fn getAllocator(this: @This()) Allocator {
|
||
return this.globalThis.bunVM().allocator;
|
||
}
|
||
|
||
pub inline fn enqueueTaskConcurrentWaitPid(this: @This(), task: anytype) void {
|
||
this.globalThis.bunVMConcurrently().enqueueTaskConcurrent(jsc.ConcurrentTask.create(jsc.Task.init(task)));
|
||
}
|
||
|
||
pub inline fn topLevelDir(this: @This()) []const u8 {
|
||
return this.globalThis.bunVM().transpiler.fs.top_level_dir;
|
||
}
|
||
|
||
pub inline fn env(this: @This()) *bun.DotEnv.Loader {
|
||
return this.globalThis.bunVM().transpiler.env;
|
||
}
|
||
|
||
pub inline fn platformEventLoop(this: @This()) *jsc.PlatformEventLoop {
|
||
const loop = jsc.AbstractVM(this.eventLoopCtx());
|
||
return loop.platformEventLoop();
|
||
}
|
||
|
||
pub inline fn actuallyThrow(this: @This(), shellerr: ShellErr) void {
|
||
shellerr.throwJS(this.globalThis);
|
||
}
|
||
};
|
||
|
||
pub const GlobalMini = struct {
|
||
mini: *jsc.MiniEventLoop,
|
||
|
||
pub inline fn init(g: *jsc.MiniEventLoop) @This() {
|
||
return .{
|
||
.mini = g,
|
||
};
|
||
}
|
||
|
||
pub inline fn env(this: @This()) *bun.DotEnv.Loader {
|
||
return this.mini.env.?;
|
||
}
|
||
|
||
pub inline fn allocator(this: @This()) Allocator {
|
||
return this.mini.allocator;
|
||
}
|
||
|
||
pub inline fn eventLoopCtx(this: @This()) *jsc.MiniEventLoop {
|
||
return this.mini;
|
||
}
|
||
|
||
// pub inline fn throwShellErr(this: @This(), shell_err: ShellErr
|
||
|
||
pub inline fn throwTODO(this: @This(), msg: []const u8) ShellErr {
|
||
return .{
|
||
.todo = bun.handleOom(std.fmt.allocPrint(this.mini.allocator, "{s}", .{msg})),
|
||
};
|
||
}
|
||
|
||
pub inline fn throwInvalidArguments(this: @This(), comptime fmt: []const u8, args: anytype) ShellErr {
|
||
return .{
|
||
.invalid_arguments = .{ .val = bun.handleOom(std.fmt.allocPrint(this.allocator(), fmt, args)) },
|
||
};
|
||
}
|
||
|
||
pub inline fn handleError(this: @This(), err: anytype, comptime fmt: []const u8) ShellErr {
|
||
const str = bun.handleOom(std.fmt.allocPrint(this.mini.allocator, "{s} " ++ fmt, .{@errorName(err)}));
|
||
return .{
|
||
.custom = str,
|
||
};
|
||
}
|
||
|
||
pub inline fn createNullDelimitedEnvMap(this: @This(), alloc: Allocator) ![:null]?[*:0]const u8 {
|
||
return this.mini.env.?.map.createNullDelimitedEnvMap(alloc);
|
||
}
|
||
|
||
pub inline fn getAllocator(this: @This()) Allocator {
|
||
return this.mini.allocator;
|
||
}
|
||
|
||
pub inline fn enqueueTaskConcurrentWaitPid(this: @This(), task: anytype) void {
|
||
var anytask = bun.handleOom(bun.default_allocator.create(jsc.AnyTaskWithExtraContext));
|
||
_ = anytask.from(task, "runFromMainThreadMini");
|
||
this.mini.enqueueTaskConcurrent(anytask);
|
||
}
|
||
|
||
pub inline fn topLevelDir(this: @This()) []const u8 {
|
||
return this.mini.top_level_dir;
|
||
}
|
||
|
||
pub inline fn throw(this: @This(), comptime fmt: []const u8, args: anytype) ShellErr {
|
||
const str = bun.handleOom(std.fmt.allocPrint(this.allocator(), fmt, args));
|
||
return .{
|
||
.custom = str,
|
||
};
|
||
}
|
||
|
||
pub inline fn actuallyThrow(_: @This(), shellerr: ShellErr) void {
|
||
shellerr.throwMini();
|
||
}
|
||
|
||
pub inline fn platformEventLoop(this: @This()) *jsc.PlatformEventLoop {
|
||
const loop = jsc.AbstractVM(this.eventLoopCtx());
|
||
return loop.platformEventLoop();
|
||
}
|
||
};
|
||
|
||
// const GlobalHandle = if (jsc.EventLoopKind == .js) GlobalJS else GlobalMini;
|
||
|
||
pub const AST = struct {
|
||
pub const Script = struct {
|
||
stmts: []Stmt,
|
||
|
||
pub fn memoryCost(this: *const @This()) usize {
|
||
var cost: usize = 0;
|
||
for (this.stmts) |*stmt| {
|
||
cost += stmt.memoryCost();
|
||
}
|
||
return cost;
|
||
}
|
||
};
|
||
|
||
pub const Stmt = struct {
|
||
exprs: []Expr,
|
||
|
||
pub fn memoryCost(this: *const @This()) usize {
|
||
var cost: usize = 0;
|
||
for (this.exprs) |*expr| {
|
||
cost += expr.memoryCost();
|
||
}
|
||
return cost;
|
||
}
|
||
};
|
||
|
||
pub const Expr = union(Expr.Tag) {
|
||
assign: []Assign,
|
||
binary: *Binary,
|
||
pipeline: *Pipeline,
|
||
cmd: *Cmd,
|
||
subshell: *Subshell,
|
||
@"if": *If,
|
||
condexpr: *CondExpr,
|
||
/// Valid async (`&`) expressions:
|
||
/// - pipeline
|
||
/// - cmd
|
||
/// - subshell
|
||
/// - if
|
||
/// - condexpr
|
||
/// Note that commands in a pipeline cannot be async
|
||
/// TODO: Extra indirection for essentially a boolean feels bad for performance
|
||
/// could probably find a more efficient way to encode this information.
|
||
async: *Expr,
|
||
|
||
pub fn memoryCost(this: *const @This()) usize {
|
||
return switch (this.*) {
|
||
.assign => |assign| brk: {
|
||
var cost: usize = 0;
|
||
for (assign) |*expr| {
|
||
cost += expr.memoryCost();
|
||
}
|
||
break :brk cost;
|
||
},
|
||
.binary => |binary| binary.memoryCost(),
|
||
.pipeline => |pipeline| pipeline.memoryCost(),
|
||
.cmd => |cmd| cmd.memoryCost(),
|
||
.subshell => |subshell| subshell.memoryCost(),
|
||
.@"if" => |@"if"| @"if".memoryCost(),
|
||
.condexpr => |condexpr| condexpr.memoryCost(),
|
||
.async => |async| async.memoryCost(),
|
||
};
|
||
}
|
||
|
||
pub fn asPipelineItem(this: *Expr) ?PipelineItem {
|
||
return switch (this.*) {
|
||
.assign => .{ .assigns = this.assign },
|
||
.cmd => .{ .cmd = this.cmd },
|
||
.subshell => .{ .subshell = this.subshell },
|
||
.@"if" => .{ .@"if" = this.@"if" },
|
||
.condexpr => .{ .condexpr = this.condexpr },
|
||
else => null,
|
||
};
|
||
}
|
||
|
||
pub const Tag = enum {
|
||
assign,
|
||
binary,
|
||
pipeline,
|
||
cmd,
|
||
subshell,
|
||
@"if",
|
||
condexpr,
|
||
async,
|
||
};
|
||
};
|
||
|
||
/// https://www.gnu.org/software/bash/manual/bash.html#Bash-Conditional-Expressions
|
||
pub const CondExpr = struct {
|
||
op: Op,
|
||
args: ArgList = ArgList.zeroes,
|
||
|
||
const ArgList = SmolList(Atom, 2);
|
||
|
||
pub fn memoryCost(this: *const @This()) usize {
|
||
var cost: usize = @sizeOf(Op);
|
||
cost += this.args.memoryCost();
|
||
return cost;
|
||
}
|
||
|
||
// args: SmolList(1, comptime INLINED_MAX: comptime_int)
|
||
pub const Op = enum {
|
||
/// -a file
|
||
/// True if file exists.
|
||
@"-a",
|
||
|
||
/// -b file
|
||
/// True if file exists and is a block special file.
|
||
@"-b",
|
||
|
||
/// -c file
|
||
/// True if file exists and is a character special file.
|
||
@"-c",
|
||
|
||
/// -d file
|
||
/// True if file exists and is a directory.
|
||
@"-d",
|
||
|
||
/// -e file
|
||
/// True if file exists.
|
||
@"-e",
|
||
|
||
/// -f file
|
||
/// True if file exists and is a regular file.
|
||
@"-f",
|
||
|
||
/// -g file
|
||
/// True if file exists and its set-group-id bit is set.
|
||
@"-g",
|
||
|
||
/// -h file
|
||
/// True if file exists and is a symbolic link.
|
||
@"-h",
|
||
|
||
/// -k file
|
||
/// True if file exists and its "sticky" bit is set.
|
||
@"-k",
|
||
|
||
/// -p file
|
||
/// True if file exists and is a named pipe (FIFO).
|
||
@"-p",
|
||
|
||
/// -r file
|
||
/// True if file exists and is readable.
|
||
@"-r",
|
||
|
||
/// -s file
|
||
/// True if file exists and has a size greater than zero.
|
||
@"-s",
|
||
|
||
/// -t fd
|
||
/// True if file descriptor fd is open and refers to a terminal.
|
||
@"-t",
|
||
|
||
/// -u file
|
||
/// True if file exists and its set-user-id bit is set.
|
||
@"-u",
|
||
|
||
/// -w file
|
||
/// True if file exists and is writable.
|
||
@"-w",
|
||
|
||
/// -x file
|
||
/// True if file exists and is executable.
|
||
@"-x",
|
||
|
||
/// -G file
|
||
/// True if file exists and is owned by the effective group id.
|
||
@"-G",
|
||
|
||
/// -L file
|
||
/// True if file exists and is a symbolic link.
|
||
@"-L",
|
||
|
||
/// -N file
|
||
/// True if file exists and has been modified since it was last read.
|
||
@"-N",
|
||
|
||
/// -O file
|
||
/// True if file exists and is owned by the effective user id.
|
||
@"-O",
|
||
|
||
/// -S file
|
||
/// True if file exists and is a socket.
|
||
@"-S",
|
||
|
||
/// file1 -ef file2
|
||
/// True if file1 and file2 refer to the same device and inode numbers.
|
||
@"-ef",
|
||
|
||
/// file1 -nt file2
|
||
/// True if file1 is newer than file2, or if file1 exists and file2 does not.
|
||
@"-nt",
|
||
|
||
/// file1 -ot file2
|
||
/// True if file1 is older than file2, or if file2 exists and file1 does not.
|
||
@"-ot",
|
||
|
||
/// -o optname
|
||
/// True if the shell option optname is enabled.
|
||
@"-o",
|
||
|
||
/// -v varname
|
||
/// True if the shell variable varname is set.
|
||
@"-v",
|
||
|
||
/// -R varname
|
||
/// True if the shell variable varname is set and is a name reference.
|
||
@"-R",
|
||
|
||
/// -z string
|
||
/// True if the length of string is zero.
|
||
@"-z",
|
||
|
||
/// -n string
|
||
/// True if the length of string is non-zero.
|
||
@"-n",
|
||
|
||
/// string1 == string2
|
||
/// True if the strings are equal.
|
||
@"==",
|
||
|
||
/// string1 != string2
|
||
/// True if the strings are not equal.
|
||
@"!=",
|
||
|
||
/// string1 < string2
|
||
/// True if string1 sorts before string2 lexicographically.
|
||
@"<",
|
||
|
||
/// string1 > string2
|
||
/// True if string1 sorts after string2 lexicographically.
|
||
@">",
|
||
|
||
/// arg1 OP arg2
|
||
/// OP is one of ‘-eq’, ‘-ne’, ‘-lt’, ‘-le’, ‘-gt’, or ‘-ge’.
|
||
/// These arithmetic binary operators return true if arg1 is equal to, not equal to, less than,
|
||
/// less than or equal to, greater than, or greater than or equal to arg2, respectively.
|
||
@"-eq",
|
||
@"-ne",
|
||
@"-lt",
|
||
@"-le",
|
||
@"-gt",
|
||
@"-ge",
|
||
|
||
pub const SUPPORTED: []const Op = &.{
|
||
.@"-f",
|
||
.@"-z",
|
||
.@"-n",
|
||
.@"-d",
|
||
.@"-c",
|
||
.@"==",
|
||
.@"!=",
|
||
};
|
||
|
||
pub fn isSupported(op: Op) bool {
|
||
inline for (SUPPORTED) |supported_op| {
|
||
if (supported_op == op) return true;
|
||
}
|
||
return false;
|
||
}
|
||
|
||
const SINGLE_ARG_OPS: []const std.builtin.Type.EnumField = brk: {
|
||
const fields: []const std.builtin.Type.EnumField = std.meta.fields(AST.CondExpr.Op);
|
||
const count = count: {
|
||
var count: usize = 0;
|
||
for (fields) |f| {
|
||
if (f.name[0] == '-' and f.name.len == 2) {
|
||
count += 1;
|
||
}
|
||
}
|
||
break :count count;
|
||
};
|
||
var ret: [count]std.builtin.Type.EnumField = undefined;
|
||
var len: usize = 0;
|
||
for (fields) |f| {
|
||
if (f.name[0] == '-' and f.name.len == 2) {
|
||
ret[len] = f;
|
||
len += 1;
|
||
}
|
||
}
|
||
const final = ret[0..].*;
|
||
break :brk &final;
|
||
};
|
||
|
||
const BINARY_OPS: []const std.builtin.Type.EnumField = brk: {
|
||
const fields: []const std.builtin.Type.EnumField = std.meta.fields(AST.CondExpr.Op);
|
||
const count = count: {
|
||
var count: usize = 0;
|
||
for (fields) |f| {
|
||
if (!(f.name[0] == '-' and f.name.len == 2)) {
|
||
count += 1;
|
||
}
|
||
}
|
||
break :count count;
|
||
};
|
||
var ret: [count]std.builtin.Type.EnumField = undefined;
|
||
var len: usize = 0;
|
||
for (fields) |f| {
|
||
if (!(f.name[0] == '-' and f.name.len == 2)) {
|
||
ret[len] = f;
|
||
len += 1;
|
||
}
|
||
}
|
||
const final = ret[0..].*;
|
||
break :brk &final;
|
||
};
|
||
};
|
||
|
||
pub fn to_expr(this: CondExpr, alloc: Allocator) !Expr {
|
||
const condexpr = try alloc.create(CondExpr);
|
||
condexpr.* = this;
|
||
return .{
|
||
.condexpr = condexpr,
|
||
};
|
||
}
|
||
};
|
||
|
||
pub const Subshell = struct {
|
||
script: Script,
|
||
redirect: ?Redirect = null,
|
||
redirect_flags: RedirectFlags = .{},
|
||
|
||
pub fn memoryCost(this: *const @This()) usize {
|
||
var cost: usize = @sizeOf(Subshell);
|
||
cost += this.script.memoryCost();
|
||
if (this.redirect) |*redirect| {
|
||
cost += redirect.memoryCost();
|
||
}
|
||
return cost;
|
||
}
|
||
};
|
||
|
||
/// TODO: If we know cond/then/elif/else is just a single command we don't need to store the stmt
|
||
pub const If = struct {
|
||
cond: SmolList(Stmt, 1) = SmolList(Stmt, 1).zeroes,
|
||
then: SmolList(Stmt, 1) = SmolList(Stmt, 1).zeroes,
|
||
/// From the spec:
|
||
///
|
||
/// else_part : Elif compound_list Then else_part
|
||
/// | Else compound_list
|
||
///
|
||
/// If len is:
|
||
/// - 0 => no else
|
||
/// - 1 => just else
|
||
/// - 2n (n is # of elif/then branches) => n elif/then branches
|
||
/// - 2n + 1 => n elif/then branches and an else branch
|
||
else_parts: SmolList(SmolList(Stmt, 1), 1) = SmolList(SmolList(Stmt, 1), 1).zeroes,
|
||
|
||
pub fn to_expr(this: If, alloc: Allocator) !Expr {
|
||
const @"if" = try alloc.create(If);
|
||
@"if".* = this;
|
||
return .{
|
||
.@"if" = @"if",
|
||
};
|
||
}
|
||
|
||
pub fn memoryCost(this: *const @This()) usize {
|
||
var cost: usize = @sizeOf(If);
|
||
cost += this.cond.memoryCost();
|
||
cost += this.then.memoryCost();
|
||
cost += this.else_parts.memoryCost();
|
||
return cost;
|
||
}
|
||
};
|
||
|
||
pub const Binary = struct {
|
||
op: Op,
|
||
left: Expr,
|
||
right: Expr,
|
||
|
||
const Op = enum { And, Or };
|
||
|
||
pub fn memoryCost(this: *const @This()) usize {
|
||
var cost: usize = @sizeOf(Binary);
|
||
cost += this.left.memoryCost();
|
||
cost += this.right.memoryCost();
|
||
return cost;
|
||
}
|
||
};
|
||
|
||
pub const Pipeline = struct {
|
||
items: []PipelineItem,
|
||
|
||
pub fn memoryCost(this: *const @This()) usize {
|
||
var cost: usize = 0;
|
||
for (this.items) |*item| {
|
||
cost += item.memoryCost();
|
||
}
|
||
return cost;
|
||
}
|
||
};
|
||
|
||
pub const PipelineItem = union(enum) {
|
||
cmd: *Cmd,
|
||
assigns: []Assign,
|
||
subshell: *Subshell,
|
||
@"if": *If,
|
||
condexpr: *CondExpr,
|
||
|
||
pub fn memoryCost(this: *const @This()) usize {
|
||
var cost: usize = 0;
|
||
switch (this.*) {
|
||
.cmd => |cmd| {
|
||
cost += cmd.memoryCost();
|
||
},
|
||
.assigns => |assigns| {
|
||
for (assigns) |*assign| {
|
||
cost += assign.memoryCost();
|
||
}
|
||
},
|
||
.subshell => |subshell| {
|
||
cost += subshell.memoryCost();
|
||
},
|
||
.@"if" => |@"if"| {
|
||
cost += @"if".memoryCost();
|
||
},
|
||
.condexpr => |condexpr| {
|
||
cost += condexpr.memoryCost();
|
||
},
|
||
}
|
||
return cost;
|
||
}
|
||
};
|
||
|
||
pub const CmdOrAssigns = union(CmdOrAssigns.Tag) {
|
||
cmd: Cmd,
|
||
assigns: []Assign,
|
||
|
||
pub const Tag = enum { cmd, assigns };
|
||
|
||
pub fn to_pipeline_item(this: CmdOrAssigns, alloc: Allocator) PipelineItem {
|
||
switch (this) {
|
||
.cmd => |cmd| {
|
||
const cmd_ptr = try alloc.create(Cmd);
|
||
cmd_ptr.* = cmd;
|
||
return .{ .cmd = cmd_ptr };
|
||
},
|
||
.assigns => |assigns| {
|
||
return .{ .assign = assigns };
|
||
},
|
||
}
|
||
}
|
||
|
||
pub fn to_expr(this: CmdOrAssigns, alloc: Allocator) !Expr {
|
||
switch (this) {
|
||
.cmd => |cmd| {
|
||
const cmd_ptr = try alloc.create(Cmd);
|
||
cmd_ptr.* = cmd;
|
||
return .{ .cmd = cmd_ptr };
|
||
},
|
||
.assigns => |assigns| {
|
||
return .{ .assign = assigns };
|
||
},
|
||
}
|
||
}
|
||
};
|
||
|
||
/// A "buffer" from a JS object can be piped from and to, and also have
|
||
/// output from commands redirected into it. Only BunFile, ArrayBufferView
|
||
/// are supported.
|
||
pub const JSBuf = struct {
|
||
idx: u32,
|
||
|
||
pub fn new(idx: u32) JSBuf {
|
||
return .{ .idx = idx };
|
||
}
|
||
};
|
||
|
||
/// A Subprocess from JS
|
||
pub const JSProc = struct { idx: JSValue };
|
||
|
||
pub const Assign = struct {
|
||
label: []const u8,
|
||
value: Atom,
|
||
|
||
pub fn new(label: []const u8, value: Atom) Assign {
|
||
return .{
|
||
.label = label,
|
||
.value = value,
|
||
};
|
||
}
|
||
|
||
pub fn memoryCost(this: *const @This()) usize {
|
||
var cost: usize = @sizeOf(Assign);
|
||
cost += this.label.len;
|
||
cost += this.value.memoryCost();
|
||
return cost;
|
||
}
|
||
};
|
||
|
||
pub const Cmd = struct {
|
||
assigns: []Assign,
|
||
name_and_args: []Atom,
|
||
redirect: RedirectFlags = .{},
|
||
redirect_file: ?Redirect = null,
|
||
|
||
pub fn memoryCost(this: *const @This()) usize {
|
||
var cost: usize = @sizeOf(Cmd);
|
||
for (this.assigns) |*assign| {
|
||
cost += assign.memoryCost();
|
||
}
|
||
for (this.name_and_args) |*atom| {
|
||
cost += atom.memoryCost();
|
||
}
|
||
|
||
if (this.redirect_file) |*redirect_file| {
|
||
cost += redirect_file.memoryCost();
|
||
}
|
||
return cost;
|
||
}
|
||
};
|
||
|
||
/// Bit flags for redirects:
|
||
/// - `>` = Redirect.Stdout
|
||
/// - `1>` = Redirect.Stdout
|
||
/// - `2>` = Redirect.Stderr
|
||
/// - `&>` = Redirect.Stdout | Redirect.Stderr
|
||
/// - `>>` = Redirect.Append | Redirect.Stdout
|
||
/// - `1>>` = Redirect.Append | Redirect.Stdout
|
||
/// - `2>>` = Redirect.Append | Redirect.Stderr
|
||
/// - `&>>` = Redirect.Append | Redirect.Stdout | Redirect.Stderr
|
||
///
|
||
/// Multiple redirects and redirecting stdin is not supported yet.
|
||
pub const RedirectFlags = packed struct(u8) {
|
||
stdin: bool = false,
|
||
stdout: bool = false,
|
||
stderr: bool = false,
|
||
append: bool = false,
|
||
/// 1>&2 === stdout=true and duplicate_out=true
|
||
/// 2>&1 === stderr=true and duplicate_out=true
|
||
duplicate_out: bool = false,
|
||
__unused: u3 = 0,
|
||
|
||
pub inline fn isEmpty(this: RedirectFlags) bool {
|
||
return @as(u8, @bitCast(this)) == 0;
|
||
}
|
||
|
||
pub fn redirectsElsewhere(this: RedirectFlags, io_kind: enum { stdin, stdout, stderr }) bool {
|
||
return switch (io_kind) {
|
||
.stdin => this.stdin,
|
||
.stdout => if (this.duplicate_out) !this.stdout else this.stdout,
|
||
.stderr => if (this.duplicate_out) !this.stderr else this.stderr,
|
||
};
|
||
}
|
||
|
||
pub fn @"2>&1"() RedirectFlags {
|
||
return .{ .stderr = true, .duplicate = true };
|
||
}
|
||
|
||
pub fn @"1>&2"() RedirectFlags {
|
||
return .{ .stdout = true, .duplicate = true };
|
||
}
|
||
|
||
pub fn toFlags(this: RedirectFlags) i32 {
|
||
const read_write_flags: i32 = if (this.stdin) bun.O.RDONLY else bun.O.WRONLY | bun.O.CREAT;
|
||
const extra: i32 = if (this.append) bun.O.APPEND else bun.O.TRUNC;
|
||
const final_flags: i32 = if (this.stdin) read_write_flags else extra | read_write_flags;
|
||
return final_flags;
|
||
}
|
||
|
||
pub fn @"<"() RedirectFlags {
|
||
return .{ .stdin = true };
|
||
}
|
||
|
||
pub fn @"<<"() RedirectFlags {
|
||
return .{ .stdin = true, .append = true };
|
||
}
|
||
|
||
pub fn @">"() RedirectFlags {
|
||
return .{ .stdout = true };
|
||
}
|
||
|
||
pub fn @">>"() RedirectFlags {
|
||
return .{ .append = true, .stdout = true };
|
||
}
|
||
|
||
pub fn @"&>"() RedirectFlags {
|
||
return .{ .stdout = true, .stderr = true };
|
||
}
|
||
|
||
pub fn @"&>>"() RedirectFlags {
|
||
return .{ .append = true, .stdout = true, .stderr = true };
|
||
}
|
||
|
||
pub fn merge(a: RedirectFlags, b: RedirectFlags) RedirectFlags {
|
||
const anum: u8 = @bitCast(a);
|
||
const bnum: u8 = @bitCast(b);
|
||
return @bitCast(anum | bnum);
|
||
}
|
||
};
|
||
|
||
pub const Redirect = union(enum) {
|
||
atom: Atom,
|
||
jsbuf: JSBuf,
|
||
|
||
pub fn memoryCost(this: *const @This()) usize {
|
||
return switch (this.*) {
|
||
.atom => |*atom| atom.memoryCost(),
|
||
.jsbuf => @sizeOf(JSBuf),
|
||
};
|
||
}
|
||
};
|
||
|
||
pub const Atom = union(Atom.Tag) {
|
||
simple: SimpleAtom,
|
||
compound: CompoundAtom,
|
||
|
||
pub const Tag = enum(u8) { simple, compound };
|
||
|
||
pub fn memoryCost(this: *const @This()) usize {
|
||
return switch (this.*) {
|
||
.simple => |*simple| simple.memoryCost(),
|
||
.compound => |*compound| compound.memoryCost(),
|
||
};
|
||
}
|
||
|
||
pub fn merge(this: Atom, right: Atom, allocator: Allocator) !Atom {
|
||
if (this == .simple and right == .simple) {
|
||
var atoms = try allocator.alloc(SimpleAtom, 2);
|
||
atoms[0] = this.simple;
|
||
atoms[1] = right.simple;
|
||
return .{ .compound = .{
|
||
.atoms = atoms,
|
||
.brace_expansion_hint = this.simple == .brace_begin or this.simple == .brace_end or right.simple == .brace_begin or right.simple == .brace_end,
|
||
.glob_hint = this.simple == .asterisk or this.simple == .double_asterisk or right.simple == .asterisk or right.simple == .double_asterisk,
|
||
} };
|
||
}
|
||
|
||
if (this == .compound and right == .compound) {
|
||
var atoms = try allocator.alloc(SimpleAtom, this.compound.atoms.len + right.compound.atoms.len);
|
||
@memcpy(atoms[0..this.compound.atoms.len], this.compound.atoms);
|
||
@memcpy(atoms[this.compound.atoms.len .. this.compound.atoms.len + right.compound.atoms.len], right.compound.atoms);
|
||
return .{ .compound = .{
|
||
.atoms = atoms,
|
||
.brace_expansion_hint = this.compound.brace_expansion_hint or right.compound.brace_expansion_hint,
|
||
.glob_hint = this.compound.glob_hint or right.compound.glob_hint,
|
||
} };
|
||
}
|
||
|
||
if (this == .simple) {
|
||
var atoms = try allocator.alloc(SimpleAtom, 1 + right.compound.atoms.len);
|
||
atoms[0] = this.simple;
|
||
@memcpy(atoms[1 .. right.compound.atoms.len + 1], right.compound.atoms);
|
||
return .{ .compound = .{
|
||
.atoms = atoms,
|
||
.brace_expansion_hint = this.simple == .brace_begin or this.simple == .brace_end or right.compound.brace_expansion_hint,
|
||
.glob_hint = this.simple == .asterisk or this.simple == .double_asterisk or right.compound.glob_hint,
|
||
} };
|
||
}
|
||
|
||
var atoms = try allocator.alloc(SimpleAtom, 1 + this.compound.atoms.len);
|
||
@memcpy(atoms[0..this.compound.atoms.len], this.compound.atoms);
|
||
atoms[this.compound.atoms.len] = right.simple;
|
||
return .{ .compound = .{
|
||
.atoms = atoms,
|
||
.brace_expansion_hint = right.simple == .brace_begin or right.simple == .brace_end or this.compound.brace_expansion_hint,
|
||
.glob_hint = right.simple == .asterisk or right.simple == .double_asterisk or this.compound.glob_hint,
|
||
} };
|
||
}
|
||
|
||
pub fn atomsLen(this: *const Atom) u32 {
|
||
return switch (this.*) {
|
||
.simple => 1,
|
||
.compound => @intCast(this.compound.atoms.len),
|
||
};
|
||
}
|
||
|
||
pub fn new_simple(atom: SimpleAtom) Atom {
|
||
return .{ .simple = atom };
|
||
}
|
||
|
||
pub fn is_compound(self: *const Atom) bool {
|
||
switch (self.*) {
|
||
.compound => return true,
|
||
.simple => return false,
|
||
}
|
||
}
|
||
|
||
pub fn has_expansions(self: *const Atom) bool {
|
||
return self.has_glob_expansion() or self.has_brace_expansion();
|
||
}
|
||
|
||
pub fn has_glob_expansion(self: *const Atom) bool {
|
||
return switch (self.*) {
|
||
.simple => self.simple.glob_hint(),
|
||
.compound => self.compound.glob_hint,
|
||
};
|
||
}
|
||
|
||
pub fn has_brace_expansion(self: *const Atom) bool {
|
||
return switch (self.*) {
|
||
.simple => false,
|
||
.compound => self.compound.brace_expansion_hint,
|
||
};
|
||
}
|
||
|
||
pub fn hasTildeExpansion(self: *const Atom) bool {
|
||
return switch (self.*) {
|
||
.simple => self.simple == .tilde,
|
||
.compound => self.compound.atoms.len > 0 and self.compound.atoms[0] == .tilde,
|
||
};
|
||
}
|
||
};
|
||
|
||
pub const SimpleAtom = union(enum) {
|
||
Var: []const u8,
|
||
VarArgv: u8,
|
||
Text: []const u8,
|
||
asterisk,
|
||
double_asterisk,
|
||
brace_begin,
|
||
brace_end,
|
||
comma,
|
||
tilde,
|
||
cmd_subst: struct {
|
||
script: Script,
|
||
quoted: bool = false,
|
||
|
||
pub fn memoryCost(this: *const @This()) usize {
|
||
var cost: usize = @sizeOf(@This());
|
||
cost += this.script.memoryCost();
|
||
return cost;
|
||
}
|
||
},
|
||
|
||
pub fn glob_hint(this: SimpleAtom) bool {
|
||
return switch (this) {
|
||
.Var => false,
|
||
.VarArgv => false,
|
||
.Text => false,
|
||
.asterisk => true,
|
||
.double_asterisk => true,
|
||
.brace_begin => false,
|
||
.brace_end => false,
|
||
.comma => false,
|
||
.cmd_subst => false,
|
||
.tilde => false,
|
||
};
|
||
}
|
||
|
||
pub fn memoryCost(this: *const @This()) usize {
|
||
return switch (this.*) {
|
||
.Var => this.Var.len,
|
||
.Text => this.Text.len,
|
||
.cmd_subst => this.cmd_subst.memoryCost(),
|
||
else => 0,
|
||
} + @sizeOf(SimpleAtom);
|
||
}
|
||
};
|
||
|
||
pub const CompoundAtom = struct {
|
||
atoms: []SimpleAtom,
|
||
brace_expansion_hint: bool = false,
|
||
glob_hint: bool = false,
|
||
|
||
pub fn memoryCost(this: *const @This()) usize {
|
||
var cost: usize = @sizeOf(CompoundAtom);
|
||
cost += this.#atomsMemoryCost();
|
||
return cost;
|
||
}
|
||
|
||
fn #atomsMemoryCost(this: *const @This()) usize {
|
||
var cost: usize = 0;
|
||
for (this.atoms) |*atom| {
|
||
cost += atom.memoryCost();
|
||
}
|
||
return cost;
|
||
}
|
||
};
|
||
};
|
||
|
||
pub const Parser = struct {
|
||
strpool: []const u8,
|
||
tokens: []const Token,
|
||
alloc: Allocator,
|
||
jsobjs: []JSValue,
|
||
current: u32 = 0,
|
||
errors: std.array_list.Managed(Error),
|
||
inside_subshell: ?SubshellKind = null,
|
||
|
||
const SubshellKind = enum {
|
||
cmd_subst,
|
||
normal,
|
||
pub fn closing_tok(this: SubshellKind) TokenTag {
|
||
return switch (this) {
|
||
.cmd_subst => TokenTag.CmdSubstEnd,
|
||
.normal => TokenTag.CloseParen,
|
||
};
|
||
}
|
||
};
|
||
|
||
// FIXME error location
|
||
const Error = struct { msg: []const u8 };
|
||
|
||
pub fn new(
|
||
allocator: Allocator,
|
||
lex_result: LexResult,
|
||
jsobjs: []JSValue,
|
||
) !Parser {
|
||
return .{
|
||
.strpool = lex_result.strpool,
|
||
.tokens = lex_result.tokens,
|
||
.alloc = allocator,
|
||
.jsobjs = jsobjs,
|
||
.errors = std.array_list.Managed(Error).init(allocator),
|
||
};
|
||
}
|
||
|
||
/// __WARNING__:
|
||
/// If you make a subparser and call some fallible functions on it, you need to catch the errors and call `.continue_from_subparser()`, otherwise errors
|
||
/// will not propagate upwards to the parent.
|
||
pub fn make_subparser(this: *Parser, kind: SubshellKind) Parser {
|
||
const subparser: Parser = .{
|
||
.strpool = this.strpool,
|
||
.tokens = this.tokens,
|
||
.alloc = this.alloc,
|
||
.jsobjs = this.jsobjs,
|
||
.current = this.current,
|
||
// We replace the old Parser's struct with the updated error list
|
||
// when this subparser is done
|
||
.errors = this.errors,
|
||
.inside_subshell = kind,
|
||
};
|
||
return subparser;
|
||
}
|
||
|
||
pub fn continue_from_subparser(this: *Parser, subparser: *Parser) void {
|
||
// this.current = if (this.tokens[subparser.current] == .Eof) subparser.current else subparser;
|
||
this.current =
|
||
if (subparser.current >= this.tokens.len) subparser.current else subparser.current + 1;
|
||
this.errors = subparser.errors;
|
||
}
|
||
|
||
/// Main parse function
|
||
///
|
||
/// Loosely based on the shell gramar documented in the spec: https://pubs.opengroup.org/onlinepubs/009604499/utilities/xcu_chap02.html#tag_02_10
|
||
pub fn parse(self: *Parser) !AST.Script {
|
||
return try self.parse_impl();
|
||
}
|
||
|
||
pub fn parse_impl(self: *Parser) !AST.Script {
|
||
var stmts = ArrayList(AST.Stmt).init(self.alloc);
|
||
if (self.tokens.len == 0 or self.tokens.len == 1 and self.tokens[0] == .Eof)
|
||
return .{ .stmts = stmts.items[0..stmts.items.len] };
|
||
|
||
while (if (self.inside_subshell == null)
|
||
!self.match(.Eof)
|
||
else
|
||
!self.match_any(&.{ .Eof, self.inside_subshell.?.closing_tok() }))
|
||
{
|
||
self.skip_newlines();
|
||
try stmts.append(try self.parse_stmt());
|
||
self.skip_newlines();
|
||
}
|
||
if (self.inside_subshell) |kind| {
|
||
_ = self.expect_any(&.{ .Eof, kind.closing_tok() });
|
||
} else {
|
||
_ = self.expect(.Eof);
|
||
}
|
||
return .{ .stmts = stmts.items[0..stmts.items.len] };
|
||
}
|
||
|
||
pub fn parse_stmt(self: *Parser) !AST.Stmt {
|
||
var exprs = std.array_list.Managed(AST.Expr).init(self.alloc);
|
||
|
||
while (if (self.inside_subshell == null)
|
||
!self.match_any_comptime(&.{ .Semicolon, .Newline, .Eof })
|
||
else
|
||
!self.match_any(&.{ .Semicolon, .Newline, .Eof, self.inside_subshell.?.closing_tok() }))
|
||
{
|
||
const expr = try self.parse_expr();
|
||
if (self.match(.Ampersand)) {
|
||
try self.add_error("Background commands \"&\" are not supported yet.", .{});
|
||
return ParseError.Unsupported;
|
||
// Uncomment when we enable ampersand
|
||
// switch (expr) {
|
||
// .binary => {
|
||
// var newexpr = expr;
|
||
// const right_alloc = try self.allocate(AST.Expr, newexpr.binary.right);
|
||
// const right: AST.Expr = .{ .@"async" = right_alloc };
|
||
// newexpr.binary.right = right;
|
||
// try exprs.append(newexpr);
|
||
// },
|
||
// else => {
|
||
// const @"async" = .{ .@"async" = try self.allocate(AST.Expr, expr) };
|
||
// try exprs.append(@"async");
|
||
// },
|
||
// }
|
||
|
||
// _ = self.match_any_comptime(&.{ .Semicolon, .Newline });
|
||
|
||
// // Scripts like: `echo foo & && echo hi` aren't allowed because
|
||
// // `&&` and `||` require the left-hand side's exit code to be
|
||
// // immediately observable, but the `&` makes it run in the
|
||
// // background.
|
||
// //
|
||
// // So we do a quick check for this kind of syntax here, and
|
||
// // provide a helpful error message to the user.
|
||
// if (self.peek() == .DoubleAmpersand) {
|
||
// try self.add_error("\"&\" is not allowed on the left-hand side of \"&&\"", .{});
|
||
// return ParseError.Unsupported;
|
||
// }
|
||
|
||
// break;
|
||
}
|
||
try exprs.append(expr);
|
||
|
||
// This might be necessary, so leaving it here in case it is
|
||
// switch (self.peek()) {
|
||
// .Eof, .Newline, .Semicolon => {},
|
||
// else => |t| {
|
||
// if (self.inside_subshell == null or self.inside_subshell.?.closing_tok() != t) {
|
||
// @panic("Oh no!");
|
||
// }
|
||
// },
|
||
// }
|
||
}
|
||
|
||
return .{
|
||
.exprs = exprs.items[0..],
|
||
};
|
||
}
|
||
|
||
fn parse_expr(self: *Parser) !AST.Expr {
|
||
return try self.parse_binary();
|
||
}
|
||
|
||
fn parse_binary(self: *Parser) !AST.Expr {
|
||
var left = try self.parse_pipeline();
|
||
while (self.match_any_comptime(&.{ .DoubleAmpersand, .DoublePipe })) {
|
||
const op: AST.Binary.Op = op: {
|
||
const previous = @as(TokenTag, self.prev());
|
||
switch (previous) {
|
||
.DoubleAmpersand => break :op .And,
|
||
.DoublePipe => break :op .Or,
|
||
else => unreachable,
|
||
}
|
||
};
|
||
|
||
const right = try self.parse_pipeline();
|
||
|
||
const binary = try self.allocate(AST.Binary, .{ .op = op, .left = left, .right = right });
|
||
left = .{ .binary = binary };
|
||
}
|
||
|
||
return left;
|
||
}
|
||
|
||
fn parse_pipeline(self: *Parser) !AST.Expr {
|
||
var expr = try self.parse_compound_cmd();
|
||
|
||
if (self.peek() == .Pipe) {
|
||
var pipeline_items = std.array_list.Managed(AST.PipelineItem).init(self.alloc);
|
||
try pipeline_items.append(expr.asPipelineItem() orelse {
|
||
try self.add_error_expected_pipeline_item(@as(AST.Expr.Tag, expr));
|
||
return ParseError.Expected;
|
||
});
|
||
|
||
while (self.match(.Pipe)) {
|
||
expr = try self.parse_compound_cmd();
|
||
try pipeline_items.append(expr.asPipelineItem() orelse {
|
||
try self.add_error_expected_pipeline_item(@as(AST.Expr.Tag, expr));
|
||
return ParseError.Expected;
|
||
});
|
||
}
|
||
const pipeline = try self.allocate(AST.Pipeline, .{ .items = pipeline_items.items[0..] });
|
||
return .{ .pipeline = pipeline };
|
||
}
|
||
|
||
return expr;
|
||
}
|
||
|
||
fn extractIfClauseTextToken(comptime if_clause_token: @TypeOf(.enum_literal)) []const u8 {
|
||
const tagname = comptime switch (if_clause_token) {
|
||
.@"if" => "if",
|
||
.@"else" => "else",
|
||
.elif => "elif",
|
||
.then => "then",
|
||
.fi => "fi",
|
||
else => @compileError("Invalid " ++ @tagName(if_clause_token)),
|
||
};
|
||
return tagname;
|
||
}
|
||
|
||
fn expectIfClauseTextToken(self: *Parser, comptime if_clause_token: @TypeOf(.enum_literal)) Token {
|
||
const tagname = comptime extractIfClauseTextToken(if_clause_token);
|
||
if (bun.Environment.allow_assert) assert(@as(TokenTag, self.peek()) == .Text);
|
||
if (self.peek() == .Text and
|
||
self.delimits(self.peek_n(1)) and
|
||
std.mem.eql(u8, self.text(self.peek().Text), tagname))
|
||
{
|
||
const tok = self.advance();
|
||
_ = self.expect_delimit();
|
||
return tok;
|
||
}
|
||
@panic("Expected: " ++ @tagName(if_clause_token));
|
||
}
|
||
|
||
fn isIfClauseTextToken(self: *Parser, comptime if_clause_token: @TypeOf(.enum_literal)) bool {
|
||
return switch (self.peek()) {
|
||
.Text => |range| self.isIfClauseTextTokenImpl(range, if_clause_token),
|
||
else => false,
|
||
};
|
||
}
|
||
|
||
fn isIfClauseTextTokenImpl(self: *Parser, range: Token.TextRange, comptime if_clause_token: @TypeOf(.enum_literal)) bool {
|
||
const tagname = comptime extractIfClauseTextToken(if_clause_token);
|
||
return bun.strings.eqlComptime(self.text(range), tagname);
|
||
}
|
||
|
||
fn skip_newlines(self: *Parser) void {
|
||
while (self.match(.Newline)) {}
|
||
}
|
||
|
||
fn parse_compound_cmd(self: *Parser) anyerror!AST.Expr {
|
||
// Placeholder for when we fully support subshells
|
||
if (self.peek() == .OpenParen) {
|
||
const subshell = try self.parse_subshell();
|
||
if (!subshell.redirect_flags.isEmpty()) {
|
||
try self.add_error("Subshells with redirections are currently not supported. Please open a GitHub issue.", .{});
|
||
return ParseError.Unsupported;
|
||
}
|
||
|
||
return .{
|
||
.subshell = try self.allocate(AST.Subshell, subshell),
|
||
};
|
||
}
|
||
|
||
if (self.isIfClauseTextToken(.@"if")) return (try self.parse_if_clause()).to_expr(self.alloc);
|
||
|
||
switch (self.peek()) {
|
||
.DoubleBracketOpen => return (try self.parse_cond_expr()).to_expr(self.alloc),
|
||
else => {},
|
||
}
|
||
|
||
return (try self.parse_simple_cmd()).to_expr(self.alloc);
|
||
}
|
||
|
||
fn parse_subshell(self: *Parser) !AST.Subshell {
|
||
_ = self.expect(.OpenParen);
|
||
var subparser = self.make_subparser(.normal);
|
||
const script = subparser.parse_impl() catch |e| {
|
||
self.continue_from_subparser(&subparser);
|
||
return e;
|
||
};
|
||
self.continue_from_subparser(&subparser);
|
||
const parsed_redirect = try self.parse_redirect();
|
||
|
||
return .{
|
||
.script = script,
|
||
.redirect = parsed_redirect.redirect,
|
||
.redirect_flags = parsed_redirect.flags,
|
||
};
|
||
}
|
||
|
||
fn parse_cond_expr(self: *Parser) !AST.CondExpr {
|
||
_ = self.expect(.DoubleBracketOpen);
|
||
|
||
// Quick check to see if it's a single operand operator
|
||
// Operators are not allowed to be expanded (i.e. `FOO=-f; [[ $FOO package.json ]]` won't work)
|
||
// So it must be a .Text token
|
||
// Also, all single operand operators start with "-", so check it starts with "-".
|
||
switch (self.peek()) {
|
||
.Text => |range| {
|
||
const txt = self.text(range);
|
||
|
||
if (txt[0] == '-') {
|
||
// Is a potential single arg op
|
||
inline for (AST.CondExpr.Op.SINGLE_ARG_OPS) |single_arg_op| {
|
||
if (bun.strings.eqlComptime(txt, single_arg_op.name)) {
|
||
const is_supported = comptime AST.CondExpr.Op.isSupported(@enumFromInt(single_arg_op.value));
|
||
if (!is_supported) {
|
||
try self.add_error("Conditional expression operation: {s}, is not supported right now. Please open a GitHub issue if you would like it to be supported.", .{single_arg_op.name});
|
||
return ParseError.Unsupported;
|
||
}
|
||
|
||
_ = self.expect(.Text);
|
||
if (!self.match(.Delimit)) {
|
||
try self.add_error("Expected a single, simple word", .{});
|
||
return ParseError.Expected;
|
||
}
|
||
|
||
const arg = try self.parse_atom() orelse {
|
||
try self.add_error("Expected a word, but got: {s}", .{self.peek().asHumanReadable(self.strpool)});
|
||
return ParseError.Expected;
|
||
};
|
||
|
||
if (!self.match(.DoubleBracketClose)) {
|
||
try self.add_error("Expected \"]]\" but got: {s}", .{self.peek().asHumanReadable(self.strpool)});
|
||
return ParseError.Expected;
|
||
}
|
||
|
||
return .{
|
||
.op = @enumFromInt(single_arg_op.value),
|
||
.args = AST.CondExpr.ArgList.initWith(arg),
|
||
};
|
||
}
|
||
}
|
||
|
||
try self.add_error("Unknown conditional expression operation: {s}", .{txt});
|
||
return ParseError.Unknown;
|
||
}
|
||
},
|
||
else => {},
|
||
}
|
||
|
||
// Otherwise check binary operators like:
|
||
// arg1 -eq arg2
|
||
// Again the token associated with the operator (in this case `-eq`) *must* be a .Text token.
|
||
|
||
const arg1 = try self.parse_atom() orelse {
|
||
try self.add_error("Expected a conditional expression operand, but got: {s}", .{self.peek().asHumanReadable(self.strpool)});
|
||
return ParseError.Expected;
|
||
};
|
||
|
||
// Operator must be a regular text token
|
||
if (self.peek() != .Text) {
|
||
try self.add_error("Expected a conditional expression operator, but got: {s}", .{self.peek().asHumanReadable(self.strpool)});
|
||
return ParseError.Expected;
|
||
}
|
||
|
||
const op = self.expect(.Text);
|
||
if (!self.match(.Delimit)) {
|
||
try self.add_error("Expected a single, simple word", .{});
|
||
return ParseError.Expected;
|
||
}
|
||
const txt = self.text(op.Text);
|
||
|
||
inline for (AST.CondExpr.Op.BINARY_OPS) |binary_op| {
|
||
if (bun.strings.eqlComptime(txt, binary_op.name)) {
|
||
const is_supported = comptime AST.CondExpr.Op.isSupported(@enumFromInt(binary_op.value));
|
||
if (!is_supported) {
|
||
try self.add_error("Conditional expression operation: {s}, is not supported right now. Please open a GitHub issue if you would like it to be supported.", .{binary_op.name});
|
||
return ParseError.Unsupported;
|
||
}
|
||
|
||
const arg2 = try self.parse_atom() orelse {
|
||
try self.add_error("Expected a word, but got: {s}", .{self.peek().asHumanReadable(self.strpool)});
|
||
return ParseError.Expected;
|
||
};
|
||
|
||
if (!self.match(.DoubleBracketClose)) {
|
||
try self.add_error("Expected \"]]\" but got: {s}", .{self.peek().asHumanReadable(self.strpool)});
|
||
return ParseError.Expected;
|
||
}
|
||
|
||
return .{
|
||
.op = @enumFromInt(binary_op.value),
|
||
.args = AST.CondExpr.ArgList.initWithSlice(&.{ arg1, arg2 }),
|
||
};
|
||
}
|
||
}
|
||
|
||
try self.add_error("Unknown conditional expression operation: {s}", .{txt});
|
||
return ParseError.Unknown;
|
||
}
|
||
|
||
/// We make it so that `if`/`else`/`elif`/`then`/`fi` need to be single,
|
||
/// simple .Text tokens (so the whitespace logic remains the same).
|
||
/// This is used to convert them
|
||
const IfClauseTok = enum {
|
||
@"if",
|
||
@"else",
|
||
elif,
|
||
then,
|
||
fi,
|
||
|
||
pub fn fromTok(p: *Parser, tok: Token) ?IfClauseTok {
|
||
return switch (tok) {
|
||
.Text => fromText(p.text(tok.Text)),
|
||
else => null,
|
||
};
|
||
}
|
||
|
||
pub fn fromText(txt: []const u8) ?IfClauseTok {
|
||
if (bun.strings.eqlComptime(txt, "if")) return .@"if";
|
||
if (bun.strings.eqlComptime(txt, "else")) return .@"else";
|
||
if (bun.strings.eqlComptime(txt, "elif")) return .elif;
|
||
if (bun.strings.eqlComptime(txt, "then")) return .then;
|
||
if (bun.strings.eqlComptime(txt, "fi")) return .fi;
|
||
|
||
return null;
|
||
}
|
||
};
|
||
|
||
fn parse_if_body(self: *Parser, comptime until: []const IfClauseTok) !SmolList(AST.Stmt, 1) {
|
||
var ret: SmolList(AST.Stmt, 1) = SmolList(AST.Stmt, 1).zeroes;
|
||
while (if (self.inside_subshell == null)
|
||
!self.peek_any_comptime_ifclausetok(until) and !self.peek_any_comptime(&.{.Eof})
|
||
else
|
||
!self.peek_any_ifclausetok(until) and !self.peek_any(&.{ self.inside_subshell.?.closing_tok(), .Eof }))
|
||
{
|
||
self.skip_newlines();
|
||
const stmt = try self.parse_stmt();
|
||
ret.append(stmt);
|
||
self.skip_newlines();
|
||
}
|
||
|
||
return ret;
|
||
}
|
||
|
||
fn parse_if_clause(self: *Parser) !AST.If {
|
||
_ = self.expectIfClauseTextToken(.@"if");
|
||
// _ = self.expect(.If);
|
||
|
||
const cond = try self.parse_if_body(&.{.then});
|
||
|
||
if (!self.match_if_clausetok(.then)) {
|
||
try self.add_error("Expected \"then\" but got: {s}", .{@tagName(self.peek())});
|
||
return ParseError.Expected;
|
||
}
|
||
|
||
const then = try self.parse_if_body(&.{ .@"else", .elif, .fi });
|
||
|
||
var else_parts: SmolList(SmolList(AST.Stmt, 1), 1) = SmolList(SmolList(AST.Stmt, 1), 1).zeroes;
|
||
|
||
const if_clause_tok = IfClauseTok.fromTok(self, self.peek()) orelse {
|
||
try self.add_error("Expected \"else\", \"elif\", or \"fi\" but got: {s}", .{@tagName(self.peek())});
|
||
return ParseError.Expected;
|
||
};
|
||
|
||
switch (if_clause_tok) {
|
||
.@"if", .then => {
|
||
try self.add_error("Expected \"else\", \"elif\", or \"fi\" but got: {s}", .{@tagName(self.peek())});
|
||
return ParseError.Expected;
|
||
},
|
||
.@"else" => {
|
||
_ = self.expectIfClauseTextToken(.@"else");
|
||
const @"else" = try self.parse_if_body(&.{.fi});
|
||
if (!self.match_if_clausetok(.fi)) {
|
||
try self.add_error("Expected \"fi\" but got: {s}", .{@tagName(self.peek())});
|
||
return ParseError.Expected;
|
||
}
|
||
else_parts.append(@"else");
|
||
return .{
|
||
.cond = cond,
|
||
.then = then,
|
||
.else_parts = else_parts,
|
||
};
|
||
},
|
||
.elif => {
|
||
while (true) {
|
||
_ = self.expectIfClauseTextToken(.elif);
|
||
const elif_cond = try self.parse_if_body(&.{.then});
|
||
if (!self.match_if_clausetok(.then)) {
|
||
try self.add_error("Expected \"then\" but got: {s}", .{@tagName(self.peek())});
|
||
return ParseError.Expected;
|
||
}
|
||
const then_part = try self.parse_if_body(&.{ .elif, .@"else", .fi });
|
||
else_parts.append(elif_cond);
|
||
else_parts.append(then_part);
|
||
|
||
switch (IfClauseTok.fromTok(self, self.peek()) orelse {
|
||
break;
|
||
}) {
|
||
.elif => continue,
|
||
.@"else" => {
|
||
_ = self.expectIfClauseTextToken(.@"else");
|
||
const else_part = try self.parse_if_body(&.{.fi});
|
||
else_parts.append(else_part);
|
||
break;
|
||
},
|
||
else => break,
|
||
}
|
||
}
|
||
if (!self.match_if_clausetok(.fi)) {
|
||
try self.add_error("Expected \"fi\" but got: {s}", .{@tagName(self.peek())});
|
||
return ParseError.Expected;
|
||
}
|
||
return .{
|
||
.cond = cond,
|
||
.then = then,
|
||
.else_parts = else_parts,
|
||
};
|
||
},
|
||
.fi => {
|
||
_ = self.expectIfClauseTextToken(.fi);
|
||
return .{
|
||
.cond = cond,
|
||
.then = then,
|
||
};
|
||
},
|
||
}
|
||
}
|
||
|
||
fn parse_simple_cmd(self: *Parser) !AST.CmdOrAssigns {
|
||
var assigns = std.array_list.Managed(AST.Assign).init(self.alloc);
|
||
while (if (self.inside_subshell == null)
|
||
!self.check_any_comptime(&.{ .Semicolon, .Newline, .Eof })
|
||
else
|
||
!self.check_any(&.{ .Semicolon, .Newline, .Eof, self.inside_subshell.?.closing_tok() }))
|
||
{
|
||
if (try self.parse_assign()) |assign| {
|
||
try assigns.append(assign);
|
||
} else {
|
||
break;
|
||
}
|
||
}
|
||
|
||
if (if (self.inside_subshell == null)
|
||
self.check_any_comptime(&.{ .Semicolon, .Newline, .Eof })
|
||
else
|
||
self.check_any(&.{ .Semicolon, .Newline, .Eof, self.inside_subshell.?.closing_tok() }))
|
||
{
|
||
if (assigns.items.len == 0) {
|
||
try self.add_error("expected a command or assignment", .{});
|
||
return ParseError.Expected;
|
||
}
|
||
return .{ .assigns = assigns.items[0..] };
|
||
}
|
||
|
||
const name = try self.parse_atom() orelse {
|
||
if (assigns.items.len == 0) {
|
||
try self.add_error("expected a command or assignment but got: \"{s}\"", .{@tagName(self.peek())});
|
||
return ParseError.Expected;
|
||
}
|
||
return .{ .assigns = assigns.items[0..] };
|
||
};
|
||
|
||
var name_and_args = std.array_list.Managed(AST.Atom).init(self.alloc);
|
||
try name_and_args.append(name);
|
||
while (try self.parse_atom()) |arg| {
|
||
try name_and_args.append(arg);
|
||
}
|
||
const parsed_redirect = try self.parse_redirect();
|
||
|
||
return .{ .cmd = .{
|
||
.assigns = assigns.items[0..],
|
||
.name_and_args = name_and_args.items[0..],
|
||
.redirect_file = parsed_redirect.redirect,
|
||
.redirect = parsed_redirect.flags,
|
||
} };
|
||
}
|
||
|
||
fn parse_redirect(self: *Parser) !ParsedRedirect {
|
||
const has_redirect = self.match(.Redirect);
|
||
const redirect = if (has_redirect) self.prev().Redirect else AST.RedirectFlags{};
|
||
const redirect_file: ?AST.Redirect = redirect_file: {
|
||
if (has_redirect) {
|
||
if (self.match(.JSObjRef)) {
|
||
const obj_ref = self.prev().JSObjRef;
|
||
break :redirect_file .{ .jsbuf = AST.JSBuf.new(obj_ref) };
|
||
}
|
||
|
||
const redirect_file = try self.parse_atom() orelse {
|
||
if (redirect.duplicate_out) break :redirect_file null;
|
||
try self.add_error("Redirection with no file", .{});
|
||
return ParseError.Expected;
|
||
};
|
||
break :redirect_file .{ .atom = redirect_file };
|
||
}
|
||
break :redirect_file null;
|
||
};
|
||
// TODO check for multiple redirects and error
|
||
return .{ .flags = redirect, .redirect = redirect_file };
|
||
}
|
||
|
||
const ParsedRedirect = struct {
|
||
flags: AST.RedirectFlags = .{},
|
||
redirect: ?AST.Redirect = null,
|
||
};
|
||
|
||
/// Try to parse an assignment. If no assignment could be parsed then return
|
||
/// null and backtrack the parser state
|
||
fn parse_assign(self: *Parser) !?AST.Assign {
|
||
const old = self.current;
|
||
_ = old;
|
||
switch (self.peek()) {
|
||
.Text => |txtrng| {
|
||
const start_idx = self.current;
|
||
_ = self.expect(.Text);
|
||
const txt = self.text(txtrng);
|
||
const var_decl: ?AST.Assign = var_decl: {
|
||
if (hasEqSign(txt)) |eq_idx| {
|
||
// If it starts with = then it's not valid assignment (e.g. `=FOO`)
|
||
if (eq_idx == 0) break :var_decl null;
|
||
const label = txt[0..eq_idx];
|
||
if (!isValidVarName(label)) {
|
||
break :var_decl null;
|
||
}
|
||
|
||
if (eq_idx == txt.len - 1) {
|
||
if (self.delimits(self.peek())) {
|
||
_ = self.expect_delimit();
|
||
break :var_decl .{
|
||
.label = label,
|
||
.value = .{ .simple = .{ .Text = "" } },
|
||
};
|
||
}
|
||
const atom = try self.parse_atom() orelse {
|
||
try self.add_error("Expected an atom", .{});
|
||
return ParseError.Expected;
|
||
};
|
||
break :var_decl .{
|
||
.label = label,
|
||
.value = atom,
|
||
};
|
||
}
|
||
|
||
const txt_value = txt[eq_idx + 1 .. txt.len];
|
||
if (self.delimits(self.peek())) {
|
||
_ = self.expect_delimit();
|
||
break :var_decl .{
|
||
.label = label,
|
||
.value = .{ .simple = .{ .Text = txt_value } },
|
||
};
|
||
}
|
||
|
||
const right = try self.parse_atom() orelse {
|
||
try self.add_error("Expected an atom", .{});
|
||
return ParseError.Expected;
|
||
};
|
||
const left: AST.Atom = .{
|
||
.simple = .{ .Text = txt_value },
|
||
};
|
||
const merged = try AST.Atom.merge(left, right, self.alloc);
|
||
break :var_decl .{
|
||
.label = label,
|
||
.value = merged,
|
||
};
|
||
}
|
||
break :var_decl null;
|
||
};
|
||
|
||
if (var_decl) |vd| {
|
||
return vd;
|
||
}
|
||
|
||
// Rollback
|
||
self.current = start_idx;
|
||
return null;
|
||
},
|
||
else => return null,
|
||
}
|
||
}
|
||
|
||
fn parse_atom(self: *Parser) !?AST.Atom {
|
||
var array_alloc = std.heap.stackFallback(@sizeOf(AST.SimpleAtom), self.alloc);
|
||
var atoms = try std.array_list.Managed(AST.SimpleAtom).initCapacity(array_alloc.get(), 1);
|
||
var has_brace_open = false;
|
||
var has_brace_close = false;
|
||
var has_comma = false;
|
||
var has_glob_syntax = false;
|
||
{
|
||
while (switch (self.peek()) {
|
||
.Delimit => brk: {
|
||
_ = self.expect(.Delimit);
|
||
break :brk false;
|
||
},
|
||
.Eof, .Semicolon, .Newline => false,
|
||
else => |t| brk: {
|
||
if (self.inside_subshell != null and self.inside_subshell.?.closing_tok() == t) break :brk false;
|
||
break :brk true;
|
||
},
|
||
}) {
|
||
const next = self.peek_n(1);
|
||
const next_delimits = self.delimits(next);
|
||
const peeked = self.peek();
|
||
const should_break = next_delimits;
|
||
switch (peeked) {
|
||
.Asterisk => {
|
||
has_glob_syntax = true;
|
||
_ = self.expect(.Asterisk);
|
||
try atoms.append(.asterisk);
|
||
if (next_delimits) {
|
||
_ = self.match(.Delimit);
|
||
break;
|
||
}
|
||
},
|
||
.DoubleAsterisk => {
|
||
has_glob_syntax = true;
|
||
_ = self.expect(.DoubleAsterisk);
|
||
try atoms.append(.double_asterisk);
|
||
if (next_delimits) {
|
||
_ = self.match(.Delimit);
|
||
break;
|
||
}
|
||
},
|
||
.BraceBegin => {
|
||
has_brace_open = true;
|
||
_ = self.expect(.BraceBegin);
|
||
try atoms.append(.brace_begin);
|
||
// TODO in this case we know it can't possibly be the beginning of a brace expansion so maybe its faster to just change it to text here
|
||
if (next_delimits) {
|
||
_ = self.match(.Delimit);
|
||
if (should_break) break;
|
||
}
|
||
},
|
||
.BraceEnd => {
|
||
has_brace_close = true;
|
||
_ = self.expect(.BraceEnd);
|
||
try atoms.append(.brace_end);
|
||
if (next_delimits) {
|
||
_ = self.match(.Delimit);
|
||
break;
|
||
}
|
||
},
|
||
.Comma => {
|
||
has_comma = true;
|
||
_ = self.expect(.Comma);
|
||
try atoms.append(.comma);
|
||
if (next_delimits) {
|
||
_ = self.match(.Delimit);
|
||
if (should_break) break;
|
||
}
|
||
},
|
||
.CmdSubstBegin => {
|
||
_ = self.expect(.CmdSubstBegin);
|
||
const is_quoted = self.match(.CmdSubstQuoted);
|
||
var subparser = self.make_subparser(.cmd_subst);
|
||
const script = subparser.parse_impl() catch |e| {
|
||
self.continue_from_subparser(&subparser);
|
||
return e;
|
||
};
|
||
try atoms.append(.{ .cmd_subst = .{
|
||
.script = script,
|
||
.quoted = is_quoted,
|
||
} });
|
||
self.continue_from_subparser(&subparser);
|
||
if (self.delimits(self.peek())) {
|
||
_ = self.match(.Delimit);
|
||
break;
|
||
}
|
||
},
|
||
.SingleQuotedText, .DoubleQuotedText, .Text => |txtrng| {
|
||
_ = self.advance();
|
||
var txt = self.text(txtrng);
|
||
if (peeked == .Text and txt.len > 0 and txt[0] == '~') {
|
||
txt = txt[1..];
|
||
try atoms.append(.tilde);
|
||
if (txt.len > 0) {
|
||
try atoms.append(.{ .Text = txt });
|
||
}
|
||
} else {
|
||
try atoms.append(.{ .Text = txt });
|
||
}
|
||
if (next_delimits) {
|
||
_ = self.match(.Delimit);
|
||
if (should_break) break;
|
||
}
|
||
},
|
||
.Var => |txtrng| {
|
||
_ = self.expect(.Var);
|
||
const txt = self.text(txtrng);
|
||
try atoms.append(.{ .Var = txt });
|
||
if (next_delimits) {
|
||
_ = self.match(.Delimit);
|
||
if (should_break) break;
|
||
}
|
||
},
|
||
.VarArgv => |int| {
|
||
_ = self.expect(.VarArgv);
|
||
try atoms.append(.{ .VarArgv = int });
|
||
if (next_delimits) {
|
||
_ = self.match(.Delimit);
|
||
if (should_break) break;
|
||
}
|
||
},
|
||
.OpenParen, .CloseParen => {
|
||
try self.add_error("Unexpected token: `{s}`", .{if (peeked == .OpenParen) "(" else ")"});
|
||
return ParseError.Unexpected;
|
||
},
|
||
.Pipe => return null,
|
||
.DoublePipe => return null,
|
||
.Ampersand => return null,
|
||
.DoubleAmpersand => return null,
|
||
.Redirect => return null,
|
||
.Dollar => return null,
|
||
.Eq => return null,
|
||
.Semicolon => return null,
|
||
.Newline => return null,
|
||
.CmdSubstQuoted => return null,
|
||
.CmdSubstEnd => return null,
|
||
.JSObjRef => return null,
|
||
.Delimit => return null,
|
||
.Eof => return null,
|
||
.DoubleBracketOpen => return null,
|
||
.DoubleBracketClose => return null,
|
||
}
|
||
}
|
||
}
|
||
|
||
return switch (atoms.items.len) {
|
||
0 => null,
|
||
1 => {
|
||
if (bun.Environment.allow_assert) assert(atoms.capacity == 1);
|
||
return AST.Atom.new_simple(atoms.items[0]);
|
||
},
|
||
else => .{ .compound = .{
|
||
.atoms = atoms.items[0..atoms.items.len],
|
||
.brace_expansion_hint = has_brace_open and has_brace_close and has_comma,
|
||
.glob_hint = has_glob_syntax,
|
||
} },
|
||
};
|
||
}
|
||
|
||
fn allocate(self: *const Parser, comptime T: type, val: T) !*T {
|
||
const heap = try self.alloc.create(T);
|
||
heap.* = val;
|
||
return heap;
|
||
}
|
||
|
||
fn text(self: *const Parser, range: Token.TextRange) []const u8 {
|
||
return self.strpool[range.start..range.end];
|
||
}
|
||
|
||
fn advance(self: *Parser) Token {
|
||
if (!self.is_at_end()) {
|
||
self.current += 1;
|
||
}
|
||
return self.prev();
|
||
}
|
||
|
||
fn is_at_end(self: *Parser) bool {
|
||
return self.peek() == .Eof or self.inside_subshell != null and self.inside_subshell.?.closing_tok() == self.peek();
|
||
}
|
||
|
||
fn expect(self: *Parser, toktag: TokenTag) Token {
|
||
if (bun.Environment.allow_assert) assert(toktag == @as(TokenTag, self.peek()));
|
||
if (self.check(toktag)) {
|
||
return self.advance();
|
||
}
|
||
@panic("Unexpected token");
|
||
}
|
||
|
||
fn expect_any(self: *Parser, toktags: []const TokenTag) Token {
|
||
const peeked = self.peek();
|
||
for (toktags) |toktag| {
|
||
if (toktag == @as(TokenTag, peeked)) return self.advance();
|
||
}
|
||
|
||
@panic("Unexpected token");
|
||
}
|
||
|
||
fn delimits(self: *Parser, tok: Token) bool {
|
||
return tok == .Delimit or tok == .Semicolon or tok == .Semicolon or tok == .Eof or tok == .Newline or (self.inside_subshell != null and tok == self.inside_subshell.?.closing_tok());
|
||
}
|
||
|
||
fn expect_delimit(self: *Parser) Token {
|
||
if (bun.Environment.allow_assert) assert(self.delimits(self.peek()));
|
||
if (self.check(.Delimit) or self.check(.Semicolon) or self.check(.Newline) or self.check(.Eof) or (self.inside_subshell != null and self.check(self.inside_subshell.?.closing_tok()))) {
|
||
return self.advance();
|
||
}
|
||
@panic("Expected a delimiter token");
|
||
}
|
||
|
||
fn match_if_clausetok(self: *Parser, toktag: IfClauseTok) bool {
|
||
if (self.peek() == .Text and
|
||
self.delimits(self.peek_n(1)) and
|
||
bun.strings.eql(self.text(self.peek().Text), @tagName(toktag)))
|
||
{
|
||
_ = self.advance();
|
||
_ = self.expect_delimit();
|
||
return true;
|
||
}
|
||
return false;
|
||
}
|
||
|
||
/// Consumes token if it matches
|
||
fn match(self: *Parser, toktag: TokenTag) bool {
|
||
if (@as(TokenTag, self.peek()) == toktag) {
|
||
_ = self.advance();
|
||
return true;
|
||
}
|
||
return false;
|
||
}
|
||
|
||
fn match_any_comptime(self: *Parser, comptime toktags: []const TokenTag) bool {
|
||
const peeked = @as(TokenTag, self.peek());
|
||
inline for (toktags) |tag| {
|
||
if (peeked == tag) {
|
||
_ = self.advance();
|
||
return true;
|
||
}
|
||
}
|
||
return false;
|
||
}
|
||
|
||
fn match_any(self: *Parser, toktags: []const TokenTag) bool {
|
||
const peeked = @as(TokenTag, self.peek());
|
||
for (toktags) |tag| {
|
||
if (peeked == tag) {
|
||
_ = self.advance();
|
||
return true;
|
||
}
|
||
}
|
||
return false;
|
||
}
|
||
|
||
fn peek_any_ifclausetok(self: *Parser, toktags: []const IfClauseTok) bool {
|
||
const peektok = self.peek();
|
||
const peeked = @as(TokenTag, peektok);
|
||
if (peeked != .Text) return false;
|
||
|
||
const txt = self.text(peektok.Text);
|
||
for (toktags) |tag| {
|
||
if (bun.strings.eql(txt, @tagName(tag))) {
|
||
return true;
|
||
}
|
||
}
|
||
return false;
|
||
}
|
||
|
||
fn peek_any_comptime_ifclausetok(self: *Parser, comptime toktags: []const IfClauseTok) bool {
|
||
const peektok = self.peek();
|
||
const peeked = @as(TokenTag, peektok);
|
||
if (peeked != .Text) return false;
|
||
|
||
const txt = self.text(peektok.Text);
|
||
inline for (toktags) |tag| {
|
||
if (bun.strings.eqlComptime(txt, @tagName(tag))) {
|
||
return true;
|
||
}
|
||
}
|
||
return false;
|
||
}
|
||
|
||
fn peek_any_comptime(self: *Parser, comptime toktags: []const TokenTag) bool {
|
||
const peeked = @as(TokenTag, self.peek());
|
||
inline for (toktags) |tag| {
|
||
if (peeked == tag) {
|
||
return true;
|
||
}
|
||
}
|
||
return false;
|
||
}
|
||
|
||
fn peek_any(self: *Parser, toktags: []const TokenTag) bool {
|
||
const peeked = @as(TokenTag, self.peek());
|
||
for (toktags) |tag| {
|
||
if (peeked == tag) {
|
||
return true;
|
||
}
|
||
}
|
||
return false;
|
||
}
|
||
|
||
fn check_any_comptime(self: *Parser, comptime toktags: []const TokenTag) bool {
|
||
const peeked = @as(TokenTag, self.peek());
|
||
inline for (toktags) |tag| {
|
||
if (peeked == tag) {
|
||
return true;
|
||
}
|
||
}
|
||
return false;
|
||
}
|
||
|
||
fn check_any(self: *Parser, toktags: []const TokenTag) bool {
|
||
const peeked = @as(TokenTag, self.peek());
|
||
for (toktags) |tag| {
|
||
if (peeked == tag) {
|
||
return true;
|
||
}
|
||
}
|
||
return false;
|
||
}
|
||
|
||
fn check(self: *Parser, toktag: TokenTag) bool {
|
||
return @as(TokenTag, self.peek()) == @as(TokenTag, toktag);
|
||
}
|
||
|
||
fn peek(self: *Parser) Token {
|
||
return self.tokens[self.current];
|
||
}
|
||
|
||
fn peek_n(self: *Parser, n: u32) Token {
|
||
if (self.current + n >= self.tokens.len) {
|
||
return self.tokens[self.tokens.len - 1];
|
||
}
|
||
|
||
return self.tokens[self.current + n];
|
||
}
|
||
|
||
fn prev(self: *Parser) Token {
|
||
return self.tokens[self.current - 1];
|
||
}
|
||
|
||
pub fn combineErrors(self: *Parser) []const u8 {
|
||
const errors = self.errors.items[0..];
|
||
const str = str: {
|
||
const size = size: {
|
||
var i: usize = 0;
|
||
for (errors) |e| {
|
||
i += e.msg.len;
|
||
}
|
||
break :size i;
|
||
};
|
||
var buf = bun.handleOom(self.alloc.alloc(u8, size));
|
||
var i: usize = 0;
|
||
for (errors) |e| {
|
||
@memcpy(buf[i .. i + e.msg.len], e.msg);
|
||
i += e.msg.len;
|
||
}
|
||
break :str buf;
|
||
};
|
||
return str;
|
||
}
|
||
|
||
fn add_error(self: *Parser, comptime fmt: []const u8, args: anytype) !void {
|
||
const error_msg = try std.fmt.allocPrint(self.alloc, fmt, args);
|
||
try self.errors.append(.{ .msg = error_msg });
|
||
}
|
||
|
||
fn add_error_expected_pipeline_item(self: *Parser, kind: AST.Expr.Tag) !void {
|
||
const error_msg = try std.fmt.allocPrint(self.alloc, "Expected a command, assignment, or subshell but got: {s}", .{@tagName(kind)});
|
||
try self.errors.append(.{ .msg = error_msg });
|
||
}
|
||
};
|
||
|
||
pub const TokenTag = enum {
|
||
Pipe,
|
||
DoublePipe,
|
||
Ampersand,
|
||
DoubleAmpersand,
|
||
Redirect,
|
||
Dollar,
|
||
Asterisk,
|
||
DoubleAsterisk,
|
||
Eq,
|
||
Semicolon,
|
||
Newline,
|
||
// Comment,
|
||
BraceBegin,
|
||
Comma,
|
||
BraceEnd,
|
||
CmdSubstBegin,
|
||
CmdSubstQuoted,
|
||
CmdSubstEnd,
|
||
OpenParen,
|
||
CloseParen,
|
||
Var,
|
||
VarArgv,
|
||
Text,
|
||
SingleQuotedText,
|
||
DoubleQuotedText,
|
||
JSObjRef,
|
||
DoubleBracketOpen,
|
||
DoubleBracketClose,
|
||
Delimit,
|
||
Eof,
|
||
};
|
||
|
||
pub const Token = union(TokenTag) {
|
||
/// |
|
||
Pipe,
|
||
/// ||
|
||
DoublePipe,
|
||
/// &
|
||
Ampersand,
|
||
/// &&
|
||
DoubleAmpersand,
|
||
|
||
Redirect: AST.RedirectFlags,
|
||
|
||
/// $
|
||
Dollar,
|
||
// `*`
|
||
Asterisk,
|
||
DoubleAsterisk,
|
||
|
||
/// =
|
||
Eq,
|
||
/// ;
|
||
Semicolon,
|
||
/// \n (unescaped newline)
|
||
Newline,
|
||
|
||
BraceBegin,
|
||
Comma,
|
||
BraceEnd,
|
||
CmdSubstBegin,
|
||
/// When cmd subst is wrapped in quotes, then it should be interpreted as literal string, not word split-ed arguments to a cmd.
|
||
/// We lose quotation context in the AST, so we don't know how to disambiguate that.
|
||
/// So this is a quick hack to give the AST that context.
|
||
///
|
||
/// This matches this shell behaviour:
|
||
/// echo test$(echo "1 2") -> test1 2\n
|
||
/// echo "test$(echo "1 2")" -> test1 2\n
|
||
CmdSubstQuoted,
|
||
CmdSubstEnd,
|
||
OpenParen,
|
||
CloseParen,
|
||
|
||
Var: TextRange,
|
||
VarArgv: u8,
|
||
Text: TextRange,
|
||
/// Quotation information is lost from the lexer -> parser stage and it is
|
||
/// helpful to disambiguate from regular text and quoted text
|
||
SingleQuotedText: TextRange,
|
||
DoubleQuotedText: TextRange,
|
||
JSObjRef: u32,
|
||
|
||
DoubleBracketOpen,
|
||
DoubleBracketClose,
|
||
|
||
Delimit,
|
||
Eof,
|
||
|
||
pub const TextRange = struct {
|
||
start: u32,
|
||
end: u32,
|
||
|
||
pub fn len(range: TextRange) u32 {
|
||
if (bun.Environment.allow_assert) assert(range.start <= range.end);
|
||
return range.end - range.start;
|
||
}
|
||
|
||
pub fn slice(range: TextRange, buf: []const u8) []const u8 {
|
||
return buf[range.start..range.end];
|
||
}
|
||
};
|
||
|
||
pub fn asHumanReadable(self: Token, strpool: []const u8) []const u8 {
|
||
const varargv_strings = blk: {
|
||
var res: [10][2]u8 = undefined;
|
||
for (&res, 0..) |*item, i| {
|
||
item[0] = '$';
|
||
item[1] = @as(u8, @intCast(i)) + '0';
|
||
}
|
||
break :blk res;
|
||
};
|
||
return switch (self) {
|
||
.Pipe => "`|`",
|
||
.DoublePipe => "`||`",
|
||
.Ampersand => "`&`",
|
||
.DoubleAmpersand => "`&&`",
|
||
.Redirect => "`>`",
|
||
.Dollar => "`$`",
|
||
.Asterisk => "`*`",
|
||
.DoubleAsterisk => "`**`",
|
||
.Eq => "`=`",
|
||
.Semicolon => "`;`",
|
||
.Newline => "`\\n`",
|
||
// Comment,
|
||
.BraceBegin => "`{`",
|
||
.Comma => "`,`",
|
||
.BraceEnd => "`}`",
|
||
.CmdSubstBegin => "`$(`",
|
||
.CmdSubstQuoted => "CmdSubstQuoted",
|
||
.CmdSubstEnd => "`)`",
|
||
.OpenParen => "`(`",
|
||
.CloseParen => "`)",
|
||
.Var => strpool[self.Var.start..self.Var.end],
|
||
.VarArgv => &varargv_strings[self.VarArgv],
|
||
.Text => strpool[self.Text.start..self.Text.end],
|
||
.SingleQuotedText => strpool[self.SingleQuotedText.start..self.SingleQuotedText.end],
|
||
.DoubleQuotedText => strpool[self.DoubleQuotedText.start..self.DoubleQuotedText.end],
|
||
.JSObjRef => "JSObjRef",
|
||
.DoubleBracketOpen => "[[",
|
||
.DoubleBracketClose => "]]",
|
||
.Delimit => "Delimit",
|
||
.Eof => "EOF",
|
||
};
|
||
}
|
||
};
|
||
|
||
pub const LexerAscii = NewLexer(.ascii);
|
||
pub const LexerUnicode = NewLexer(.wtf8);
|
||
pub const LexResult = struct {
|
||
errors: []LexError,
|
||
tokens: []const Token,
|
||
strpool: []const u8,
|
||
|
||
pub fn combineErrors(this: *const LexResult, arena: Allocator) []const u8 {
|
||
const errors = this.errors;
|
||
const str = str: {
|
||
const size = size: {
|
||
var i: usize = 0;
|
||
for (errors) |e| {
|
||
i += e.msg.len();
|
||
}
|
||
break :size i;
|
||
};
|
||
var buf = bun.handleOom(arena.alloc(u8, size));
|
||
var i: usize = 0;
|
||
for (errors) |e| {
|
||
@memcpy(buf[i .. i + e.msg.len()], e.msg.slice(this.strpool));
|
||
i += e.msg.len();
|
||
}
|
||
break :str buf;
|
||
};
|
||
return str;
|
||
}
|
||
};
|
||
pub const LexError = struct {
|
||
/// Allocated with lexer arena
|
||
msg: Token.TextRange,
|
||
};
|
||
|
||
/// A special char used to denote the beginning of a special token
|
||
/// used for substituting JS variables into the script string.
|
||
///
|
||
/// \b (decimal value of 8) is deliberately chosen so that it is not
|
||
/// easy for the user to accidentally use this char in their script.
|
||
///
|
||
const SPECIAL_JS_CHAR = 8;
|
||
pub const LEX_JS_OBJREF_PREFIX = &[_]u8{SPECIAL_JS_CHAR} ++ "__bun_";
|
||
pub const LEX_JS_STRING_PREFIX = &[_]u8{SPECIAL_JS_CHAR} ++ "__bunstr_";
|
||
|
||
pub fn NewLexer(comptime encoding: StringEncoding) type {
|
||
const Chars = ShellCharIter(encoding);
|
||
return struct {
|
||
chars: Chars,
|
||
|
||
/// Tell us the beginning of a "word", indexes into the string pool (`buf`)
|
||
/// Anytime a word is added, this needs to be updated
|
||
word_start: u32 = 0,
|
||
|
||
/// Keeps track of the end of a "word", indexes into the string pool (`buf`),
|
||
/// anytime characters are added to the string pool this needs to be updated
|
||
j: u32 = 0,
|
||
|
||
strpool: ArrayList(u8),
|
||
tokens: ArrayList(Token),
|
||
delimit_quote: bool = false,
|
||
in_subshell: ?SubShellKind = null,
|
||
errors: std.array_list.Managed(LexError),
|
||
|
||
/// Contains a list of strings we need to escape
|
||
/// Not owned by this struct
|
||
string_refs: []bun.String,
|
||
|
||
const SubShellKind = enum {
|
||
/// (echo hi; echo hello)
|
||
normal,
|
||
/// `echo hi; echo hello`
|
||
backtick,
|
||
/// $(echo hi; echo hello)
|
||
dollar,
|
||
};
|
||
|
||
const LexerError = error{
|
||
OutOfMemory,
|
||
Utf8CannotEncodeSurrogateHalf,
|
||
Utf8InvalidStartByte,
|
||
CodepointTooLarge,
|
||
};
|
||
|
||
pub const js_objref_prefix = "$__bun_";
|
||
|
||
const State = Chars.State;
|
||
|
||
const InputChar = Chars.InputChar;
|
||
|
||
const BacktrackSnapshot = struct {
|
||
chars: Chars,
|
||
j: u32,
|
||
word_start: u32,
|
||
delimit_quote: bool,
|
||
};
|
||
|
||
pub fn new(alloc: Allocator, src: []const u8, strings_to_escape: []bun.String) @This() {
|
||
return .{
|
||
.chars = Chars.init(src),
|
||
.tokens = ArrayList(Token).init(alloc),
|
||
.strpool = ArrayList(u8).init(alloc),
|
||
.errors = ArrayList(LexError).init(alloc),
|
||
.string_refs = strings_to_escape,
|
||
};
|
||
}
|
||
|
||
pub fn get_result(self: @This()) LexResult {
|
||
return .{
|
||
.tokens = self.tokens.items,
|
||
.strpool = self.strpool.items,
|
||
.errors = self.errors.items,
|
||
};
|
||
}
|
||
|
||
pub fn add_error(self: *@This(), msg: []const u8) void {
|
||
const start = self.strpool.items.len;
|
||
bun.handleOom(self.strpool.appendSlice(msg));
|
||
const end = self.strpool.items.len;
|
||
bun.handleOom(self.errors.append(.{ .msg = .{ .start = @intCast(start), .end = @intCast(end) } }));
|
||
}
|
||
|
||
fn make_sublexer(self: *@This(), kind: SubShellKind) @This() {
|
||
log("[lex] make sublexer", .{});
|
||
var sublexer: @This() = .{
|
||
.chars = self.chars,
|
||
.strpool = self.strpool,
|
||
.tokens = self.tokens,
|
||
.errors = self.errors,
|
||
.in_subshell = kind,
|
||
|
||
.word_start = self.word_start,
|
||
.j = self.j,
|
||
.string_refs = self.string_refs,
|
||
};
|
||
sublexer.chars.state = .Normal;
|
||
return sublexer;
|
||
}
|
||
|
||
fn continue_from_sublexer(self: *@This(), sublexer: *@This()) void {
|
||
log("[lex] drop sublexer", .{});
|
||
self.strpool = sublexer.strpool;
|
||
self.tokens = sublexer.tokens;
|
||
self.errors = sublexer.errors;
|
||
|
||
self.chars = sublexer.chars;
|
||
self.word_start = sublexer.word_start;
|
||
self.j = sublexer.j;
|
||
self.delimit_quote = sublexer.delimit_quote;
|
||
}
|
||
|
||
fn make_snapshot(self: *@This()) BacktrackSnapshot {
|
||
return .{
|
||
.chars = self.chars,
|
||
.j = self.j,
|
||
.word_start = self.word_start,
|
||
.delimit_quote = self.delimit_quote,
|
||
};
|
||
}
|
||
|
||
fn backtrack(self: *@This(), snap: BacktrackSnapshot) void {
|
||
self.chars = snap.chars;
|
||
self.j = snap.j;
|
||
self.word_start = snap.word_start;
|
||
self.delimit_quote = snap.delimit_quote;
|
||
}
|
||
|
||
fn last_tok_tag(self: *@This()) ?TokenTag {
|
||
if (self.tokens.items.len == 0) return null;
|
||
return @as(TokenTag, self.tokens.items[self.tokens.items.len - 1]);
|
||
}
|
||
|
||
pub fn lex(self: *@This()) LexerError!void {
|
||
while (true) {
|
||
const input = self.eat() orelse {
|
||
try self.break_word(true);
|
||
break;
|
||
};
|
||
const char = input.char;
|
||
const escaped = input.escaped;
|
||
|
||
// Special token to denote substituted JS variables
|
||
// we use 8 or \b which is a non printable char
|
||
if (char == SPECIAL_JS_CHAR) {
|
||
if (self.looksLikeJSStringRef()) {
|
||
if (self.eatJSStringRef()) |bunstr| {
|
||
try self.break_word(false);
|
||
try self.handleJSStringRef(bunstr);
|
||
continue;
|
||
}
|
||
} else if (self.looksLikeJSObjRef()) {
|
||
if (self.eatJSObjRef()) |tok| {
|
||
if (self.chars.state == .Double) {
|
||
self.add_error("JS object reference not allowed in double quotes");
|
||
return;
|
||
}
|
||
try self.break_word(false);
|
||
try self.tokens.append(tok);
|
||
continue;
|
||
}
|
||
}
|
||
}
|
||
// Handle non-escaped chars:
|
||
// 1. special syntax (operators, etc.)
|
||
// 2. lexing state switchers (quotes)
|
||
// 3. word breakers (spaces, etc.)
|
||
else if (!escaped) escaped: {
|
||
switch (char) {
|
||
// possibly double bracket open
|
||
'[' => {
|
||
comptime assertSpecialChar('[');
|
||
|
||
if (self.chars.state == .Single or self.chars.state == .Double) break :escaped;
|
||
if (self.peek()) |p| {
|
||
if (p.escaped or p.char != '[') break :escaped;
|
||
const state = self.make_snapshot();
|
||
_ = self.eat();
|
||
do_backtrack: {
|
||
const p2 = self.peek() orelse {
|
||
try self.break_word(true);
|
||
try self.tokens.append(.DoubleBracketClose);
|
||
continue;
|
||
};
|
||
if (p2.escaped) break :do_backtrack;
|
||
switch (p2.char) {
|
||
' ', '\r', '\n', '\t' => {
|
||
try self.break_word(true);
|
||
try self.tokens.append(.DoubleBracketOpen);
|
||
},
|
||
else => break :do_backtrack,
|
||
}
|
||
continue;
|
||
}
|
||
self.backtrack(state);
|
||
}
|
||
break :escaped;
|
||
},
|
||
']' => {
|
||
comptime assertSpecialChar(']');
|
||
|
||
if (self.chars.state == .Single or self.chars.state == .Double) break :escaped;
|
||
if (self.peek()) |p| {
|
||
if (p.escaped or p.char != ']') break :escaped;
|
||
const state = self.make_snapshot();
|
||
_ = self.eat();
|
||
do_backtrack: {
|
||
const p2 = self.peek() orelse {
|
||
try self.break_word(true);
|
||
try self.tokens.append(.DoubleBracketClose);
|
||
continue;
|
||
};
|
||
if (p2.escaped) break :do_backtrack;
|
||
switch (p2.char) {
|
||
' ', '\r', '\n', '\t', ';', '&', '|', '>' => {
|
||
try self.break_word(true);
|
||
try self.tokens.append(.DoubleBracketClose);
|
||
},
|
||
else => break :do_backtrack,
|
||
}
|
||
continue;
|
||
}
|
||
self.backtrack(state);
|
||
}
|
||
break :escaped;
|
||
},
|
||
|
||
'#' => {
|
||
comptime assertSpecialChar('#');
|
||
|
||
if (self.chars.state == .Single or self.chars.state == .Double) break :escaped;
|
||
const whitespace_preceding =
|
||
if (self.chars.prev) |prev|
|
||
Chars.isWhitespace(prev)
|
||
else
|
||
true;
|
||
if (!whitespace_preceding) break :escaped;
|
||
try self.break_word(true);
|
||
self.eatComment();
|
||
continue;
|
||
},
|
||
';' => {
|
||
comptime assertSpecialChar(';');
|
||
|
||
if (self.chars.state == .Single or self.chars.state == .Double) break :escaped;
|
||
try self.break_word(true);
|
||
try self.tokens.append(.Semicolon);
|
||
continue;
|
||
},
|
||
'\n' => {
|
||
comptime assertSpecialChar('\n');
|
||
|
||
if (self.chars.state == .Single or self.chars.state == .Double) break :escaped;
|
||
try self.break_word_impl(true, true, false);
|
||
try self.tokens.append(.Newline);
|
||
continue;
|
||
},
|
||
|
||
// glob asterisks
|
||
'*' => {
|
||
comptime assertSpecialChar('*');
|
||
|
||
if (self.chars.state == .Single or self.chars.state == .Double) break :escaped;
|
||
if (self.peek()) |next| {
|
||
if (!next.escaped and next.char == '*') {
|
||
_ = self.eat();
|
||
try self.break_word(false);
|
||
try self.tokens.append(.DoubleAsterisk);
|
||
continue;
|
||
}
|
||
}
|
||
try self.break_word(false);
|
||
try self.tokens.append(.Asterisk);
|
||
continue;
|
||
},
|
||
|
||
// brace expansion syntax
|
||
'{' => {
|
||
comptime assertSpecialChar('{');
|
||
|
||
if (self.chars.state == .Single or self.chars.state == .Double) break :escaped;
|
||
try self.break_word(false);
|
||
try self.tokens.append(.BraceBegin);
|
||
continue;
|
||
},
|
||
',' => {
|
||
comptime assertSpecialChar(',');
|
||
|
||
if (self.chars.state == .Single or self.chars.state == .Double) break :escaped;
|
||
try self.break_word(false);
|
||
try self.tokens.append(.Comma);
|
||
continue;
|
||
},
|
||
'}' => {
|
||
comptime assertSpecialChar('}');
|
||
|
||
if (self.chars.state == .Single or self.chars.state == .Double) break :escaped;
|
||
try self.break_word(false);
|
||
try self.tokens.append(.BraceEnd);
|
||
continue;
|
||
},
|
||
|
||
// Command substitution
|
||
'`' => {
|
||
comptime assertSpecialChar('`');
|
||
|
||
if (self.chars.state == .Single) break :escaped;
|
||
if (self.in_subshell == .backtick) {
|
||
try self.break_word_operator();
|
||
if (self.last_tok_tag()) |toktag| {
|
||
if (toktag != .Delimit) try self.tokens.append(.Delimit);
|
||
}
|
||
try self.tokens.append(.CmdSubstEnd);
|
||
return;
|
||
} else {
|
||
try self.eat_subshell(.backtick);
|
||
}
|
||
},
|
||
// Command substitution/vars
|
||
'$' => {
|
||
comptime assertSpecialChar('$');
|
||
|
||
if (self.chars.state == .Single) break :escaped;
|
||
|
||
const peeked = self.peek() orelse InputChar{ .char = 0 };
|
||
if (!peeked.escaped and peeked.char == '(') {
|
||
try self.break_word(false);
|
||
try self.eat_subshell(.dollar);
|
||
continue;
|
||
}
|
||
|
||
// const snapshot = self.make_snapshot();
|
||
// Handle variable
|
||
try self.break_word(false);
|
||
const var_tok = try self.eat_var();
|
||
|
||
switch (var_tok.len()) {
|
||
0 => {
|
||
try self.appendCharToStrPool('$');
|
||
try self.break_word(false);
|
||
},
|
||
1 => blk: {
|
||
const c = self.strpool.items[var_tok.start];
|
||
if (c >= '0' and c <= '9') {
|
||
try self.tokens.append(.{ .VarArgv = c - '0' });
|
||
break :blk;
|
||
}
|
||
try self.tokens.append(.{ .Var = var_tok });
|
||
},
|
||
else => {
|
||
try self.tokens.append(.{ .Var = var_tok });
|
||
},
|
||
}
|
||
self.word_start = self.j;
|
||
continue;
|
||
},
|
||
'(' => {
|
||
comptime assertSpecialChar('(');
|
||
|
||
if (self.chars.state == .Single or self.chars.state == .Double) break :escaped;
|
||
try self.break_word(true);
|
||
try self.eat_subshell(.normal);
|
||
continue;
|
||
},
|
||
')' => {
|
||
comptime assertSpecialChar(')');
|
||
|
||
if (self.chars.state == .Single or self.chars.state == .Double) break :escaped;
|
||
if (self.in_subshell != .dollar and self.in_subshell != .normal) {
|
||
self.add_error("Unexpected ')'");
|
||
continue;
|
||
}
|
||
|
||
try self.break_word(true);
|
||
// Command substitution can be put in a word so need
|
||
// to add delimiter
|
||
if (self.in_subshell == .dollar) {
|
||
if (self.last_tok_tag()) |toktag| {
|
||
switch (toktag) {
|
||
.Delimit, .Semicolon, .Eof, .Newline => {},
|
||
else => {
|
||
try self.tokens.append(.Delimit);
|
||
},
|
||
}
|
||
}
|
||
}
|
||
|
||
if (self.in_subshell == .dollar) {
|
||
try self.tokens.append(.CmdSubstEnd);
|
||
} else if (self.in_subshell == .normal) {
|
||
try self.tokens.append(.CloseParen);
|
||
}
|
||
return;
|
||
},
|
||
|
||
'0'...'9' => {
|
||
comptime for ('0'..'9') |c| assertSpecialChar(c);
|
||
|
||
if (self.chars.state != .Normal) break :escaped;
|
||
const snapshot = self.make_snapshot();
|
||
if (self.eat_redirect(input)) |redirect| {
|
||
try self.break_word(true);
|
||
try self.tokens.append(.{ .Redirect = redirect });
|
||
continue;
|
||
}
|
||
self.backtrack(snapshot);
|
||
break :escaped;
|
||
},
|
||
|
||
// Operators
|
||
'|' => {
|
||
comptime assertSpecialChar('|');
|
||
|
||
if (self.chars.state == .Single or self.chars.state == .Double) break :escaped;
|
||
try self.break_word_operator();
|
||
|
||
const next = self.peek() orelse {
|
||
self.add_error("Unexpected EOF");
|
||
return;
|
||
};
|
||
if (!next.escaped and next.char == '&') {
|
||
self.add_error("Piping stdout and stderr (`|&`) is not supported yet. Please file an issue on GitHub.");
|
||
return;
|
||
}
|
||
if (next.escaped or next.char != '|') {
|
||
try self.tokens.append(.Pipe);
|
||
} else if (next.char == '|') {
|
||
_ = self.eat() orelse unreachable;
|
||
try self.tokens.append(.DoublePipe);
|
||
}
|
||
continue;
|
||
},
|
||
'>' => {
|
||
comptime assertSpecialChar('>');
|
||
|
||
if (self.chars.state == .Single or self.chars.state == .Double) break :escaped;
|
||
try self.break_word_operator();
|
||
const redirect = self.eat_simple_redirect(.out);
|
||
try self.tokens.append(.{ .Redirect = redirect });
|
||
continue;
|
||
},
|
||
'<' => {
|
||
comptime assertSpecialChar('<');
|
||
|
||
if (self.chars.state == .Single or self.chars.state == .Double) break :escaped;
|
||
try self.break_word_operator();
|
||
const redirect = self.eat_simple_redirect(.in);
|
||
try self.tokens.append(.{ .Redirect = redirect });
|
||
continue;
|
||
},
|
||
'&' => {
|
||
comptime assertSpecialChar('&');
|
||
|
||
if (self.chars.state == .Single or self.chars.state == .Double) break :escaped;
|
||
try self.break_word_operator();
|
||
|
||
const next = self.peek() orelse {
|
||
try self.tokens.append(.Ampersand);
|
||
continue;
|
||
};
|
||
|
||
if (next.char == '>' and !next.escaped) {
|
||
_ = self.eat();
|
||
const inner = if (self.eat_simple_redirect_operator(.out))
|
||
AST.RedirectFlags.@"&>>"()
|
||
else
|
||
AST.RedirectFlags.@"&>"();
|
||
try self.tokens.append(.{ .Redirect = inner });
|
||
} else if (next.escaped or next.char != '&') {
|
||
try self.tokens.append(.Ampersand);
|
||
} else if (next.char == '&') {
|
||
_ = self.eat() orelse unreachable;
|
||
try self.tokens.append(.DoubleAmpersand);
|
||
} else {
|
||
try self.tokens.append(.Ampersand);
|
||
continue;
|
||
}
|
||
},
|
||
|
||
// 2. State switchers
|
||
'\'' => {
|
||
comptime assertSpecialChar('\'');
|
||
|
||
if (self.chars.state == .Single) {
|
||
self.chars.state = .Normal;
|
||
continue;
|
||
}
|
||
if (self.chars.state == .Normal) {
|
||
self.chars.state = .Single;
|
||
continue;
|
||
}
|
||
break :escaped;
|
||
},
|
||
'"' => {
|
||
comptime assertSpecialChar('"');
|
||
|
||
if (self.chars.state == .Single) break :escaped;
|
||
if (self.chars.state == .Normal) {
|
||
try self.break_word(false);
|
||
self.chars.state = .Double;
|
||
} else if (self.chars.state == .Double) {
|
||
try self.break_word(false);
|
||
// self.delimit_quote = true;
|
||
self.chars.state = .Normal;
|
||
}
|
||
continue;
|
||
},
|
||
|
||
// 3. Word breakers
|
||
' ' => {
|
||
comptime assertSpecialChar(' ');
|
||
|
||
if (self.chars.state == .Normal) {
|
||
try self.break_word_impl(true, true, false);
|
||
continue;
|
||
}
|
||
break :escaped;
|
||
},
|
||
|
||
else => break :escaped,
|
||
}
|
||
continue;
|
||
}
|
||
// Treat newline preceded by backslash as whitespace
|
||
else if (char == '\n') {
|
||
if (comptime bun.Environment.allow_assert) {
|
||
assert(input.escaped);
|
||
}
|
||
if (self.chars.state != .Double) {
|
||
try self.break_word_impl(true, true, false);
|
||
}
|
||
continue;
|
||
}
|
||
|
||
try self.appendCharToStrPool(char);
|
||
}
|
||
|
||
if (self.in_subshell) |subshell_kind| {
|
||
switch (subshell_kind) {
|
||
.dollar, .backtick => self.add_error("Unclosed command substitution"),
|
||
.normal => self.add_error("Unclosed subshell"),
|
||
}
|
||
return;
|
||
}
|
||
|
||
try self.tokens.append(.Eof);
|
||
}
|
||
|
||
fn appendCharToStrPool(self: *@This(), char: Chars.CodepointType) !void {
|
||
if (comptime encoding == .ascii) {
|
||
try self.strpool.append(char);
|
||
self.j += 1;
|
||
} else {
|
||
if (char <= 0x7F) {
|
||
try self.strpool.append(@intCast(char));
|
||
self.j += 1;
|
||
return;
|
||
} else {
|
||
try self.appendUnicodeCharToStrPool(char);
|
||
}
|
||
}
|
||
}
|
||
|
||
fn appendUnicodeCharToStrPool(self: *@This(), char: Chars.CodepointType) !void {
|
||
@branchHint(.cold);
|
||
|
||
const ichar: i32 = @intCast(char);
|
||
var bytes: [4]u8 = undefined;
|
||
const n = bun.strings.encodeWTF8Rune(&bytes, ichar);
|
||
self.j += n;
|
||
try self.strpool.appendSlice(bytes[0..n]);
|
||
}
|
||
|
||
fn break_word(self: *@This(), add_delimiter: bool) !void {
|
||
return try self.break_word_impl(add_delimiter, false, false);
|
||
}
|
||
|
||
/// NOTE: this adds a delimiter
|
||
fn break_word_operator(self: *@This()) !void {
|
||
return try self.break_word_impl(true, false, true);
|
||
}
|
||
|
||
inline fn isImmediatelyEscapedQuote(self: *@This()) bool {
|
||
return (self.chars.state == .Double and
|
||
(self.chars.current != null and !self.chars.current.?.escaped and self.chars.current.?.char == '"') and
|
||
(self.chars.prev != null and !self.chars.prev.?.escaped and self.chars.prev.?.char == '"'));
|
||
}
|
||
|
||
fn break_word_impl(self: *@This(), add_delimiter: bool, in_normal_space: bool, in_operator: bool) !void {
|
||
const start: u32 = self.word_start;
|
||
const end: u32 = self.j;
|
||
if (start != end or
|
||
self.isImmediatelyEscapedQuote() // we want to preserve immediately escaped quotes like: ""
|
||
) {
|
||
const tok: Token =
|
||
switch (self.chars.state) {
|
||
.Normal => @unionInit(Token, "Text", .{ .start = start, .end = end }),
|
||
.Single => @unionInit(Token, "SingleQuotedText", .{ .start = start, .end = end }),
|
||
.Double => @unionInit(Token, "DoubleQuotedText", .{ .start = start, .end = end }),
|
||
};
|
||
try self.tokens.append(tok);
|
||
if (add_delimiter) {
|
||
try self.tokens.append(.Delimit);
|
||
}
|
||
} else if ((in_normal_space or in_operator) and self.tokens.items.len > 0 and
|
||
// whether or not to add a delimiter token
|
||
switch (self.tokens.items[self.tokens.items.len - 1]) {
|
||
.Var,
|
||
.VarArgv,
|
||
.Text,
|
||
.SingleQuotedText,
|
||
.DoubleQuotedText,
|
||
.BraceBegin,
|
||
.Comma,
|
||
.BraceEnd,
|
||
.CmdSubstEnd,
|
||
.Asterisk,
|
||
=> true,
|
||
|
||
.Pipe,
|
||
.DoublePipe,
|
||
.Ampersand,
|
||
.DoubleAmpersand,
|
||
.Redirect,
|
||
.Dollar,
|
||
.DoubleAsterisk,
|
||
.Eq,
|
||
.Semicolon,
|
||
.Newline,
|
||
.CmdSubstBegin,
|
||
.CmdSubstQuoted,
|
||
.OpenParen,
|
||
.CloseParen,
|
||
.JSObjRef,
|
||
.DoubleBracketOpen,
|
||
.DoubleBracketClose,
|
||
.Delimit,
|
||
.Eof,
|
||
=> false,
|
||
})
|
||
{
|
||
try self.tokens.append(.Delimit);
|
||
self.delimit_quote = false;
|
||
}
|
||
self.word_start = self.j;
|
||
}
|
||
|
||
const RedirectDirection = enum { out, in };
|
||
|
||
fn eat_simple_redirect(self: *@This(), dir: RedirectDirection) AST.RedirectFlags {
|
||
const is_double = self.eat_simple_redirect_operator(dir);
|
||
|
||
if (is_double) {
|
||
return switch (dir) {
|
||
.out => AST.RedirectFlags.@">>"(),
|
||
.in => AST.RedirectFlags.@"<<"(),
|
||
};
|
||
}
|
||
|
||
return switch (dir) {
|
||
.out => AST.RedirectFlags.@">"(),
|
||
.in => AST.RedirectFlags.@"<"(),
|
||
};
|
||
}
|
||
|
||
/// Returns true if the operator is "double one": >> or <<
|
||
/// Returns null if it is invalid: <> ><
|
||
fn eat_simple_redirect_operator(self: *@This(), dir: RedirectDirection) bool {
|
||
if (self.peek()) |peeked| {
|
||
if (peeked.escaped) return false;
|
||
switch (peeked.char) {
|
||
'>' => {
|
||
if (dir == .out) {
|
||
_ = self.eat();
|
||
return true;
|
||
}
|
||
return false;
|
||
},
|
||
'<' => {
|
||
if (dir == .in) {
|
||
_ = self.eat();
|
||
return true;
|
||
}
|
||
return false;
|
||
},
|
||
else => return false,
|
||
}
|
||
}
|
||
return false;
|
||
}
|
||
|
||
// TODO Arbitrary file descriptor redirect
|
||
fn eat_redirect(self: *@This(), first: InputChar) ?AST.RedirectFlags {
|
||
var flags: AST.RedirectFlags = .{};
|
||
switch (first.char) {
|
||
'0' => flags.stdin = true,
|
||
'1' => flags.stdout = true,
|
||
'2' => flags.stderr = true,
|
||
// Just allow the std file descriptors for now
|
||
else => return null,
|
||
}
|
||
var dir: RedirectDirection = .out;
|
||
if (self.peek()) |input| {
|
||
if (input.escaped) return null;
|
||
switch (input.char) {
|
||
'>' => {
|
||
_ = self.eat();
|
||
dir = .out;
|
||
const is_double = self.eat_simple_redirect_operator(dir);
|
||
if (is_double) flags.append = true;
|
||
if (self.peek()) |peeked| {
|
||
if (!peeked.escaped and peeked.char == '&') {
|
||
_ = self.eat();
|
||
if (self.peek()) |peeked2| {
|
||
switch (peeked2.char) {
|
||
'1' => {
|
||
_ = self.eat();
|
||
if (!flags.stdout and flags.stderr) {
|
||
flags.duplicate_out = true;
|
||
flags.stdout = true;
|
||
flags.stderr = false;
|
||
} else return null;
|
||
},
|
||
'2' => {
|
||
_ = self.eat();
|
||
if (!flags.stderr and flags.stdout) {
|
||
flags.duplicate_out = true;
|
||
flags.stderr = true;
|
||
flags.stdout = false;
|
||
} else return null;
|
||
},
|
||
else => return null,
|
||
}
|
||
}
|
||
}
|
||
}
|
||
return flags;
|
||
},
|
||
'<' => {
|
||
dir = .in;
|
||
const is_double = self.eat_simple_redirect_operator(dir);
|
||
if (is_double) flags.append = true;
|
||
return flags;
|
||
},
|
||
else => return null,
|
||
}
|
||
} else return null;
|
||
}
|
||
|
||
fn eat_redirect_old(self: *@This(), first: InputChar) ?AST.RedirectFlags {
|
||
var flags: AST.RedirectFlags = .{};
|
||
if (self.matchesAsciiLiteral("2>&1")) {} else if (self.matchesAsciiLiteral("1>&2")) {} else switch (first.char) {
|
||
'0'...'9' => {
|
||
// Codepoint int casts are safe here because the digits are in the ASCII range
|
||
var count: usize = 1;
|
||
var buf: [32]u8 = [_]u8{@intCast(first.char)} ** 32;
|
||
|
||
while (self.peek()) |peeked| {
|
||
const char = peeked.char;
|
||
switch (char) {
|
||
'0'...'9' => {
|
||
_ = self.eat();
|
||
if (count >= 32) {
|
||
return null;
|
||
}
|
||
buf[count] = @intCast(char);
|
||
count += 1;
|
||
continue;
|
||
},
|
||
else => break,
|
||
}
|
||
}
|
||
|
||
const num = std.fmt.parseInt(usize, buf[0..count], 10) catch {
|
||
// This means the number was really large, meaning it
|
||
// probably was supposed to be a string
|
||
return null;
|
||
};
|
||
|
||
switch (num) {
|
||
0 => {
|
||
flags.stdin = true;
|
||
},
|
||
1 => {
|
||
flags.stdout = true;
|
||
},
|
||
2 => {
|
||
flags.stderr = true;
|
||
},
|
||
else => {
|
||
// FIXME support redirection to any arbitrary fd
|
||
log("redirection to fd {d} is invalid\n", .{num});
|
||
return null;
|
||
},
|
||
}
|
||
},
|
||
'&' => {
|
||
if (first.escaped) return null;
|
||
flags.stdout = true;
|
||
flags.stderr = true;
|
||
_ = self.eat();
|
||
},
|
||
else => return null,
|
||
}
|
||
|
||
var dir: RedirectDirection = .out;
|
||
if (self.peek()) |input| {
|
||
if (input.escaped) return null;
|
||
switch (input.char) {
|
||
'>' => dir = .out,
|
||
'<' => dir = .in,
|
||
else => return null,
|
||
}
|
||
_ = self.eat();
|
||
} else return null;
|
||
|
||
const is_double = self.eat_simple_redirect_operator(dir);
|
||
if (is_double) {
|
||
flags.append = true;
|
||
}
|
||
|
||
return flags;
|
||
}
|
||
|
||
/// Assumes the first character of the literal has been eaten
|
||
/// Backtracks and returns false if unsuccessful
|
||
fn eat_literal(self: *@This(), comptime CodepointType: type, comptime literal: []const CodepointType) bool {
|
||
const literal_skip_first = literal[1..];
|
||
const snapshot = self.make_snapshot();
|
||
const slice = self.eat_slice(CodepointType, literal_skip_first.len) orelse {
|
||
self.backtrack(snapshot);
|
||
return false;
|
||
};
|
||
|
||
if (std.mem.eql(CodepointType, &slice, literal_skip_first))
|
||
return true;
|
||
|
||
self.backtrack(snapshot);
|
||
return false;
|
||
}
|
||
|
||
fn eat_number_word(self: *@This()) ?usize {
|
||
const snap = self.make_snapshot();
|
||
var count: usize = 0;
|
||
var buf: [32]u8 = [_]u8{0} ** 32;
|
||
|
||
while (self.eat()) |result| {
|
||
const char = result.char;
|
||
switch (char) {
|
||
'0'...'9' => {
|
||
if (count >= 32) return null;
|
||
// Safe to cast here because 0-8 is in ASCII range
|
||
buf[count] = @intCast(char);
|
||
count += 1;
|
||
continue;
|
||
},
|
||
else => {
|
||
break;
|
||
},
|
||
}
|
||
}
|
||
|
||
if (count == 0) {
|
||
self.backtrack(snap);
|
||
return null;
|
||
}
|
||
|
||
const num = std.fmt.parseInt(usize, buf[0..count], 10) catch {
|
||
self.backtrack(snap);
|
||
return null;
|
||
};
|
||
|
||
return num;
|
||
}
|
||
|
||
fn eat_subshell(self: *@This(), kind: SubShellKind) !void {
|
||
if (kind == .dollar) {
|
||
// Eat the open paren
|
||
_ = self.eat();
|
||
}
|
||
|
||
switch (kind) {
|
||
.dollar, .backtick => {
|
||
try self.tokens.append(.CmdSubstBegin);
|
||
if (self.chars.state == .Double) {
|
||
try self.tokens.append(.CmdSubstQuoted);
|
||
}
|
||
},
|
||
.normal => try self.tokens.append(.OpenParen),
|
||
}
|
||
const prev_quote_state = self.chars.state;
|
||
var sublexer = self.make_sublexer(kind);
|
||
try sublexer.lex();
|
||
self.continue_from_sublexer(&sublexer);
|
||
self.chars.state = prev_quote_state;
|
||
}
|
||
|
||
fn appendStringToStrPool(self: *@This(), bunstr: bun.String) !void {
|
||
const start = self.strpool.items.len;
|
||
if (bunstr.isUTF16()) {
|
||
const utf16 = bunstr.utf16();
|
||
const additional = bun.simdutf.simdutf__utf8_length_from_utf16le(utf16.ptr, utf16.len);
|
||
try self.strpool.ensureUnusedCapacity(additional);
|
||
try bun.strings.convertUTF16ToUTF8Append(&self.strpool, bunstr.utf16());
|
||
} else if (bunstr.isUTF8()) {
|
||
try self.strpool.appendSlice(bunstr.byteSlice());
|
||
} else if (bunstr.is8Bit()) {
|
||
if (isAllAscii(bunstr.byteSlice())) {
|
||
try self.strpool.appendSlice(bunstr.byteSlice());
|
||
} else {
|
||
const bytes = bunstr.byteSlice();
|
||
const non_ascii_idx = bun.strings.firstNonASCII(bytes) orelse 0;
|
||
|
||
if (non_ascii_idx > 0) {
|
||
try self.strpool.appendSlice(bytes[0..non_ascii_idx]);
|
||
}
|
||
self.strpool = try bun.strings.allocateLatin1IntoUTF8WithList(self.strpool, self.strpool.items.len, bytes[non_ascii_idx..]);
|
||
}
|
||
}
|
||
const end = self.strpool.items.len;
|
||
self.j += @intCast(end - start);
|
||
}
|
||
|
||
fn handleJSStringRef(self: *@This(), bunstr: bun.String) !void {
|
||
try self.appendStringToStrPool(bunstr);
|
||
}
|
||
|
||
fn looksLikeJSObjRef(self: *@This()) bool {
|
||
const bytes = self.chars.srcBytesAtCursor();
|
||
if (LEX_JS_OBJREF_PREFIX.len - 1 >= bytes.len) return false;
|
||
return std.mem.eql(u8, bytes[0 .. LEX_JS_OBJREF_PREFIX.len - 1], LEX_JS_OBJREF_PREFIX[1..]);
|
||
}
|
||
|
||
fn looksLikeJSStringRef(self: *@This()) bool {
|
||
const bytes = self.chars.srcBytesAtCursor();
|
||
if (LEX_JS_STRING_PREFIX.len - 1 >= bytes.len) return false;
|
||
return std.mem.eql(u8, bytes[0 .. LEX_JS_STRING_PREFIX.len - 1], LEX_JS_STRING_PREFIX[1..]);
|
||
}
|
||
|
||
fn bumpCursorAscii(self: *@This(), new_idx: usize, prev_ascii_char: ?u7, cur_ascii_char: u7) void {
|
||
if (comptime encoding == .ascii) {
|
||
self.chars.src.i = new_idx;
|
||
if (prev_ascii_char) |pc| self.chars.prev = .{ .char = pc };
|
||
self.chars.current = .{ .char = cur_ascii_char };
|
||
return;
|
||
}
|
||
self.chars.src.cursor = CodepointIterator.Cursor{
|
||
.i = @intCast(new_idx),
|
||
.c = cur_ascii_char,
|
||
.width = 1,
|
||
};
|
||
self.chars.src.next_cursor = self.chars.src.cursor;
|
||
SrcUnicode.nextCursor(&self.chars.src.iter, &self.chars.src.next_cursor);
|
||
if (prev_ascii_char) |pc| self.chars.prev = .{ .char = pc };
|
||
self.chars.current = .{ .char = cur_ascii_char };
|
||
}
|
||
|
||
fn matchesAsciiLiteral(self: *@This(), literal: []const u8) bool {
|
||
const bytes = self.chars.srcBytesAtCursor();
|
||
if (literal.len >= bytes.len) return false;
|
||
return std.mem.eql(u8, bytes[0..literal.len], literal[0..]);
|
||
}
|
||
|
||
fn eatJSSubstitutionIdx(self: *@This(), comptime literal: []const u8, comptime name: []const u8, comptime validate: *const fn (*@This(), usize) bool) ?usize {
|
||
if (self.matchesAsciiLiteral(literal[1..literal.len])) {
|
||
const bytes = self.chars.srcBytesAtCursor();
|
||
var i: usize = 0;
|
||
var digit_buf: [32]u8 = undefined;
|
||
var digit_buf_count: u8 = 0;
|
||
|
||
i += literal.len - 1;
|
||
|
||
while (i < bytes.len) : (i += 1) {
|
||
switch (bytes[i]) {
|
||
'0'...'9' => {
|
||
if (digit_buf_count >= digit_buf.len) {
|
||
const ERROR_STR = "Invalid " ++ name ++ " (number too high): ";
|
||
var error_buf: [ERROR_STR.len + digit_buf.len + 1]u8 = undefined;
|
||
const error_msg = std.fmt.bufPrint(error_buf[0..], "{s} {s}{c}", .{ ERROR_STR, digit_buf[0..digit_buf_count], bytes[i] }) catch @panic("Should not happen");
|
||
self.add_error(error_msg);
|
||
return null;
|
||
}
|
||
digit_buf[digit_buf_count] = bytes[i];
|
||
digit_buf_count += 1;
|
||
},
|
||
else => break,
|
||
}
|
||
}
|
||
|
||
if (digit_buf_count == 0) {
|
||
self.add_error("Invalid " ++ name ++ " (no idx)");
|
||
return null;
|
||
}
|
||
|
||
const idx = std.fmt.parseInt(usize, digit_buf[0..digit_buf_count], 10) catch {
|
||
self.add_error("Invalid " ++ name ++ " ref ");
|
||
return null;
|
||
};
|
||
|
||
if (!validate(self, idx)) return null;
|
||
// if (idx >= self.string_refs.len) {
|
||
// self.add_error("Invalid " ++ name ++ " (out of bounds");
|
||
// return null;
|
||
// }
|
||
|
||
// Bump the cursor
|
||
const new_idx = self.chars.cursorPos() + i;
|
||
const prev_ascii_char: ?u7 = if (digit_buf_count == 1) null else @truncate(digit_buf[digit_buf_count - 2]);
|
||
const cur_ascii_char: u7 = @truncate(digit_buf[digit_buf_count - 1]);
|
||
self.bumpCursorAscii(new_idx, prev_ascii_char, cur_ascii_char);
|
||
|
||
// return self.string_refs[idx];
|
||
return idx;
|
||
}
|
||
return null;
|
||
}
|
||
|
||
/// __NOTE__: Do not store references to the returned bun.String, it does not have its ref count incremented
|
||
fn eatJSStringRef(self: *@This()) ?bun.String {
|
||
if (self.eatJSSubstitutionIdx(
|
||
LEX_JS_STRING_PREFIX,
|
||
"JS string ref",
|
||
validateJSStringRefIdx,
|
||
)) |idx| {
|
||
return self.string_refs[idx];
|
||
}
|
||
return null;
|
||
}
|
||
|
||
fn validateJSStringRefIdx(self: *@This(), idx: usize) bool {
|
||
if (idx >= self.string_refs.len) {
|
||
self.add_error("Invalid JS string ref (out of bounds");
|
||
return false;
|
||
}
|
||
return true;
|
||
}
|
||
|
||
fn eatJSObjRef(self: *@This()) ?Token {
|
||
if (self.eatJSSubstitutionIdx(
|
||
LEX_JS_OBJREF_PREFIX,
|
||
"JS object ref",
|
||
validateJSObjRefIdx,
|
||
)) |idx| {
|
||
return .{ .JSObjRef = @intCast(idx) };
|
||
}
|
||
return null;
|
||
}
|
||
|
||
fn validateJSObjRefIdx(self: *@This(), idx: usize) bool {
|
||
if (idx >= std.math.maxInt(u32)) {
|
||
self.add_error("Invalid JS object ref (out of bounds)");
|
||
return false;
|
||
}
|
||
return true;
|
||
}
|
||
|
||
fn eat_var(self: *@This()) !Token.TextRange {
|
||
const start = self.j;
|
||
var i: usize = 0;
|
||
var is_int = false;
|
||
// Eat until special character
|
||
while (self.peek()) |result| {
|
||
defer i += 1;
|
||
const char = result.char;
|
||
const escaped = result.escaped;
|
||
|
||
if (i == 0) {
|
||
switch (char) {
|
||
'=' => return .{ .start = start, .end = self.j },
|
||
'0'...'9' => {
|
||
is_int = true;
|
||
_ = self.eat().?;
|
||
try self.appendCharToStrPool(char);
|
||
continue;
|
||
},
|
||
'a'...'z', 'A'...'Z', '_' => {},
|
||
else => return .{ .start = start, .end = self.j },
|
||
}
|
||
}
|
||
if (is_int) {
|
||
return .{ .start = start, .end = self.j };
|
||
}
|
||
|
||
// if (char
|
||
switch (char) {
|
||
'{', '}', ';', '\'', '\"', ' ', '|', '&', '>', ',', '$' => {
|
||
return .{ .start = start, .end = self.j };
|
||
},
|
||
else => {
|
||
if (!escaped and
|
||
(self.in_subshell == .dollar and char == ')') or (self.in_subshell == .backtick and char == '`') or (self.in_subshell == .normal and char == ')'))
|
||
{
|
||
return .{ .start = start, .end = self.j };
|
||
}
|
||
switch (char) {
|
||
'0'...'9', 'a'...'z', 'A'...'Z', '_' => {
|
||
_ = self.eat() orelse unreachable;
|
||
try self.appendCharToStrPool(char);
|
||
},
|
||
else => return .{ .start = start, .end = self.j },
|
||
}
|
||
},
|
||
}
|
||
}
|
||
return .{ .start = start, .end = self.j };
|
||
}
|
||
|
||
fn eat(self: *@This()) ?InputChar {
|
||
return self.chars.eat();
|
||
}
|
||
|
||
fn eatComment(self: *@This()) void {
|
||
while (self.eat()) |peeked| {
|
||
if (peeked.escaped) {
|
||
continue;
|
||
}
|
||
if (peeked.char == '\n') break;
|
||
}
|
||
}
|
||
|
||
fn eat_slice(self: *@This(), comptime CodepointType: type, comptime N: usize) ?[N]CodepointType {
|
||
var slice = [_]CodepointType{0} ** N;
|
||
var i: usize = 0;
|
||
while (self.peek()) |result| {
|
||
// If we passed in codepoint range that is equal to the source
|
||
// string, or is greater than the codepoint range of source string than an int cast
|
||
// will not panic
|
||
if (CodepointType == Chars.CodepointType or std.math.maxInt(CodepointType) >= std.math.maxInt(Chars.CodepointType)) {
|
||
slice[i] = @intCast(result.char);
|
||
} else {
|
||
// Otherwise the codepoint range is smaller than the source, so we need to check that the chars are valid
|
||
if (result.char > std.math.maxInt(CodepointType)) {
|
||
return null;
|
||
}
|
||
slice[i] = @intCast(result.char);
|
||
}
|
||
|
||
i += 1;
|
||
_ = self.eat();
|
||
if (i == N) {
|
||
return slice;
|
||
}
|
||
}
|
||
|
||
return null;
|
||
}
|
||
|
||
fn peek(self: *@This()) ?InputChar {
|
||
return self.chars.peek();
|
||
}
|
||
|
||
fn read_char(self: *@This()) ?InputChar {
|
||
return self.chars.read_char();
|
||
}
|
||
};
|
||
}
|
||
|
||
pub const StringEncoding = enum { ascii, wtf8, utf16 };
|
||
|
||
const SrcAscii = struct {
|
||
bytes: []const u8,
|
||
i: usize,
|
||
|
||
const IndexValue = packed struct(u8) {
|
||
char: u7,
|
||
escaped: bool = false,
|
||
};
|
||
|
||
fn init(bytes: []const u8) SrcAscii {
|
||
return .{
|
||
.bytes = bytes,
|
||
.i = 0,
|
||
};
|
||
}
|
||
|
||
inline fn index(this: *const SrcAscii) ?IndexValue {
|
||
if (this.i >= this.bytes.len) return null;
|
||
return .{ .char = @intCast(this.bytes[this.i]) };
|
||
}
|
||
|
||
inline fn indexNext(this: *const SrcAscii) ?IndexValue {
|
||
if (this.i + 1 >= this.bytes.len) return null;
|
||
return .{ .char = @intCast(this.bytes[this.i + 1]) };
|
||
}
|
||
|
||
inline fn eat(this: *SrcAscii, escaped: bool) void {
|
||
this.i += 1 + @as(u32, @intFromBool(escaped));
|
||
}
|
||
};
|
||
|
||
const SrcUnicode = struct {
|
||
iter: CodepointIterator,
|
||
cursor: CodepointIterator.Cursor,
|
||
next_cursor: CodepointIterator.Cursor,
|
||
|
||
const IndexValue = struct {
|
||
char: u32,
|
||
width: u8,
|
||
};
|
||
|
||
fn nextCursor(iter: *const CodepointIterator, cursor: *CodepointIterator.Cursor) void {
|
||
if (!iter.next(cursor)) {
|
||
// This will make `i > sourceBytes.len` so the condition in `index` will fail
|
||
cursor.i = @intCast(iter.bytes.len + 1);
|
||
cursor.width = 1;
|
||
cursor.c = CodepointIterator.ZeroValue;
|
||
}
|
||
}
|
||
|
||
fn init(bytes: []const u8) SrcUnicode {
|
||
var iter = CodepointIterator.init(bytes);
|
||
var cursor = CodepointIterator.Cursor{};
|
||
nextCursor(&iter, &cursor);
|
||
var next_cursor: CodepointIterator.Cursor = cursor;
|
||
nextCursor(&iter, &next_cursor);
|
||
return .{ .iter = iter, .cursor = cursor, .next_cursor = next_cursor };
|
||
}
|
||
|
||
inline fn index(this: *const SrcUnicode) ?IndexValue {
|
||
if (this.cursor.width + this.cursor.i > this.iter.bytes.len) return null;
|
||
return .{ .char = this.cursor.c, .width = this.cursor.width };
|
||
}
|
||
|
||
inline fn indexNext(this: *const SrcUnicode) ?IndexValue {
|
||
if (this.next_cursor.width + this.next_cursor.i > this.iter.bytes.len) return null;
|
||
return .{ .char = @intCast(this.next_cursor.c), .width = this.next_cursor.width };
|
||
}
|
||
|
||
inline fn eat(this: *SrcUnicode, escaped: bool) void {
|
||
// eat two codepoints
|
||
if (escaped) {
|
||
nextCursor(&this.iter, &this.next_cursor);
|
||
this.cursor = this.next_cursor;
|
||
nextCursor(&this.iter, &this.next_cursor);
|
||
} else {
|
||
// eat one codepoint
|
||
this.cursor = this.next_cursor;
|
||
nextCursor(&this.iter, &this.next_cursor);
|
||
}
|
||
}
|
||
};
|
||
|
||
pub fn ShellCharIter(comptime encoding: StringEncoding) type {
|
||
return struct {
|
||
src: Src,
|
||
state: State = .Normal,
|
||
prev: ?InputChar = null,
|
||
current: ?InputChar = null,
|
||
|
||
pub const Src = switch (encoding) {
|
||
.ascii => SrcAscii,
|
||
.wtf8, .utf16 => SrcUnicode,
|
||
};
|
||
|
||
pub const CodepointType = if (encoding == .ascii) u7 else u32;
|
||
|
||
pub const InputChar = if (encoding == .ascii) SrcAscii.IndexValue else struct {
|
||
char: u32,
|
||
escaped: bool = false,
|
||
};
|
||
|
||
pub fn isWhitespace(char: InputChar) bool {
|
||
return switch (char.char) {
|
||
'\t', '\r', '\n', ' ' => true,
|
||
else => false,
|
||
};
|
||
}
|
||
|
||
pub const State = enum {
|
||
Normal,
|
||
Single,
|
||
Double,
|
||
};
|
||
|
||
pub fn init(bytes: []const u8) @This() {
|
||
const src = if (comptime encoding == .ascii)
|
||
SrcAscii.init(bytes)
|
||
else
|
||
SrcUnicode.init(bytes);
|
||
|
||
return .{
|
||
.src = src,
|
||
};
|
||
}
|
||
|
||
pub fn srcBytes(self: *@This()) []const u8 {
|
||
if (comptime encoding == .ascii) return self.src.bytes;
|
||
return self.src.iter.bytes;
|
||
}
|
||
|
||
pub fn srcBytesAtCursor(self: *@This()) []const u8 {
|
||
const bytes = self.srcBytes();
|
||
if (comptime encoding == .ascii) {
|
||
if (self.src.i >= bytes.len) return "";
|
||
return bytes[self.src.i..];
|
||
}
|
||
|
||
if (self.src.iter.i >= bytes.len) return "";
|
||
return bytes[self.src.iter.i..];
|
||
}
|
||
|
||
pub fn cursorPos(self: *@This()) usize {
|
||
if (comptime encoding == .ascii) return self.src.i;
|
||
return self.src.iter.i;
|
||
}
|
||
|
||
pub fn eat(self: *@This()) ?InputChar {
|
||
if (self.read_char()) |result| {
|
||
self.prev = self.current;
|
||
self.current = result;
|
||
self.src.eat(result.escaped);
|
||
return result;
|
||
}
|
||
return null;
|
||
}
|
||
|
||
pub fn peek(self: *@This()) ?InputChar {
|
||
if (self.read_char()) |result| {
|
||
return result;
|
||
}
|
||
|
||
return null;
|
||
}
|
||
|
||
pub fn read_char(self: *@This()) ?InputChar {
|
||
const indexed_value = self.src.index() orelse return null;
|
||
var char = indexed_value.char;
|
||
if (char != '\\' or self.state == .Single) return .{ .char = char };
|
||
|
||
// Handle backslash
|
||
switch (self.state) {
|
||
.Normal => {
|
||
const peeked = self.src.indexNext() orelse return null;
|
||
char = peeked.char;
|
||
},
|
||
.Double => {
|
||
const peeked = self.src.indexNext() orelse return null;
|
||
switch (peeked.char) {
|
||
// Backslash only applies to these characters
|
||
'$', '`', '"', '\\', '\n', '#' => {
|
||
char = peeked.char;
|
||
},
|
||
else => return .{ .char = char, .escaped = false },
|
||
}
|
||
},
|
||
// We checked `self.state == .Single` above so this is impossible
|
||
.Single => unreachable,
|
||
}
|
||
|
||
return .{ .char = char, .escaped = true };
|
||
}
|
||
};
|
||
}
|
||
|
||
/// Only these charaters allowed:
|
||
/// - a-ZA-Z
|
||
/// - _
|
||
/// - 0-9 (but can't be first char)
|
||
pub fn isValidVarName(var_name: []const u8) bool {
|
||
if (isAllAscii(var_name)) return isValidVarNameAscii(var_name);
|
||
|
||
if (var_name.len == 0) return false;
|
||
var iter = CodepointIterator.init(var_name);
|
||
var cursor = CodepointIterator.Cursor{};
|
||
|
||
if (!iter.next(&cursor)) return false;
|
||
|
||
switch (cursor.c) {
|
||
'=', '0'...'9' => {
|
||
return false;
|
||
},
|
||
'a'...'z', 'A'...'Z', '_' => {},
|
||
else => return false,
|
||
}
|
||
|
||
while (iter.next(&cursor)) {
|
||
switch (cursor.c) {
|
||
'0'...'9', 'a'...'z', 'A'...'Z', '_' => {},
|
||
else => return false,
|
||
}
|
||
}
|
||
|
||
return true;
|
||
}
|
||
fn isValidVarNameAscii(var_name: []const u8) bool {
|
||
if (var_name.len == 0) return false;
|
||
switch (var_name[0]) {
|
||
'=', '0'...'9' => {
|
||
return false;
|
||
},
|
||
'a'...'z', 'A'...'Z', '_' => {
|
||
if (var_name.len == 1) return true;
|
||
},
|
||
else => return false,
|
||
}
|
||
for (var_name) |c| {
|
||
switch (c) {
|
||
'0'...'9', 'a'...'z', 'A'...'Z', '_' => {},
|
||
else => return false,
|
||
}
|
||
}
|
||
return true;
|
||
}
|
||
|
||
var stderr_mutex = bun.Mutex{};
|
||
|
||
pub fn hasEqSign(str: []const u8) ?u32 {
|
||
if (isAllAscii(str)) {
|
||
return bun.strings.indexOfChar(str, '=');
|
||
}
|
||
|
||
// TODO actually i think that this can also use the simd stuff
|
||
var iter = CodepointIterator.init(str);
|
||
var cursor = CodepointIterator.Cursor{};
|
||
while (iter.next(&cursor)) {
|
||
if (cursor.c == '=') {
|
||
return @intCast(cursor.i);
|
||
}
|
||
}
|
||
|
||
return null;
|
||
}
|
||
|
||
pub const CmdEnvIter = struct {
|
||
env: *const bun.StringArrayHashMap([:0]const u8),
|
||
iter: bun.StringArrayHashMap([:0]const u8).Iterator,
|
||
|
||
const Entry = struct {
|
||
key: Key,
|
||
value: Value,
|
||
};
|
||
|
||
const Value = struct {
|
||
val: [:0]const u8,
|
||
|
||
pub fn format(self: Value, writer: *std.Io.Writer) !void {
|
||
try writer.writeAll(self.val);
|
||
}
|
||
};
|
||
|
||
const Key = struct {
|
||
val: []const u8,
|
||
|
||
pub fn format(self: Key, writer: *std.Io.Writer) !void {
|
||
try writer.writeAll(self.val);
|
||
}
|
||
|
||
pub fn eqlComptime(this: Key, comptime str: []const u8) bool {
|
||
return bun.strings.eqlComptime(this.val, str);
|
||
}
|
||
};
|
||
|
||
pub fn fromEnv(env: *const bun.StringArrayHashMap([:0]const u8)) CmdEnvIter {
|
||
const iter = env.iterator();
|
||
return .{
|
||
.env = env,
|
||
.iter = iter,
|
||
};
|
||
}
|
||
|
||
pub fn len(self: *const CmdEnvIter) usize {
|
||
return self.env.unmanaged.entries.len;
|
||
}
|
||
|
||
pub fn next(self: *CmdEnvIter) !?Entry {
|
||
const entry = self.iter.next() orelse return null;
|
||
return .{
|
||
.key = .{ .val = entry.key_ptr.* },
|
||
.value = .{ .val = entry.value_ptr.* },
|
||
};
|
||
}
|
||
};
|
||
|
||
pub const Test = struct {
|
||
pub const TestToken = union(TokenTag) {
|
||
// |
|
||
Pipe,
|
||
// ||
|
||
DoublePipe,
|
||
// &
|
||
Ampersand,
|
||
// &&
|
||
DoubleAmpersand,
|
||
|
||
// >
|
||
Redirect: AST.RedirectFlags,
|
||
|
||
// $
|
||
Dollar,
|
||
// *
|
||
Asterisk,
|
||
DoubleAsterisk,
|
||
// =
|
||
Eq,
|
||
Semicolon,
|
||
Newline,
|
||
|
||
BraceBegin,
|
||
Comma,
|
||
BraceEnd,
|
||
CmdSubstBegin,
|
||
CmdSubstQuoted,
|
||
CmdSubstEnd,
|
||
OpenParen,
|
||
CloseParen,
|
||
|
||
Var: []const u8,
|
||
VarArgv: u8,
|
||
Text: []const u8,
|
||
SingleQuotedText: []const u8,
|
||
DoubleQuotedText: []const u8,
|
||
JSObjRef: u32,
|
||
|
||
DoubleBracketOpen,
|
||
DoubleBracketClose,
|
||
|
||
Delimit,
|
||
Eof,
|
||
|
||
pub fn from_real(the_token: Token, buf: []const u8) TestToken {
|
||
switch (the_token) {
|
||
.Var => |txt| return .{ .Var = buf[txt.start..txt.end] },
|
||
.VarArgv => |int| return .{ .VarArgv = int },
|
||
.Text => |txt| return .{ .Text = buf[txt.start..txt.end] },
|
||
.SingleQuotedText => |txt| return .{ .SingleQuotedText = buf[txt.start..txt.end] },
|
||
.DoubleQuotedText => |txt| return .{ .DoubleQuotedText = buf[txt.start..txt.end] },
|
||
.JSObjRef => |val| return .{ .JSObjRef = val },
|
||
.Pipe => return .Pipe,
|
||
.DoublePipe => return .DoublePipe,
|
||
.Ampersand => return .Ampersand,
|
||
.DoubleAmpersand => return .DoubleAmpersand,
|
||
.Redirect => |r| return .{ .Redirect = r },
|
||
.Dollar => return .Dollar,
|
||
.Asterisk => return .Asterisk,
|
||
.DoubleAsterisk => return .DoubleAsterisk,
|
||
.Eq => return .Eq,
|
||
.Semicolon => return .Semicolon,
|
||
.Newline => return .Newline,
|
||
.BraceBegin => return .BraceBegin,
|
||
.Comma => return .Comma,
|
||
.BraceEnd => return .BraceEnd,
|
||
.CmdSubstBegin => return .CmdSubstBegin,
|
||
.CmdSubstQuoted => return .CmdSubstQuoted,
|
||
.CmdSubstEnd => return .CmdSubstEnd,
|
||
.OpenParen => return .OpenParen,
|
||
.CloseParen => return .CloseParen,
|
||
.DoubleBracketOpen => return .DoubleBracketOpen,
|
||
.DoubleBracketClose => return .DoubleBracketClose,
|
||
.Delimit => return .Delimit,
|
||
.Eof => return .Eof,
|
||
}
|
||
}
|
||
};
|
||
};
|
||
|
||
pub fn shellCmdFromJS(
|
||
globalThis: *jsc.JSGlobalObject,
|
||
string_args: JSValue,
|
||
template_args: *jsc.JSArrayIterator,
|
||
out_jsobjs: *std.array_list.Managed(JSValue),
|
||
jsstrings: *std.array_list.Managed(bun.String),
|
||
out_script: *std.array_list.Managed(u8),
|
||
marked_argument_buffer: *jsc.MarkedArgumentBuffer,
|
||
) bun.JSError!void {
|
||
var builder = ShellSrcBuilder.init(globalThis, out_script, jsstrings);
|
||
var jsobjref_buf: [128]u8 = [_]u8{0} ** 128;
|
||
|
||
var string_iter = try string_args.arrayIterator(globalThis);
|
||
var i: u32 = 0;
|
||
const last = string_iter.len -| 1;
|
||
while (try string_iter.next()) |js_value| {
|
||
defer i += 1;
|
||
if (!try builder.appendJSValueStr(js_value, false)) {
|
||
return globalThis.throw("Shell script string contains invalid UTF-16", .{});
|
||
}
|
||
// const str = js_value.getZigString(globalThis);
|
||
// try script.appendSlice(str.full());
|
||
if (i < last) {
|
||
const template_value = try template_args.next() orelse {
|
||
return globalThis.throw("Shell script is missing JSValue arg", .{});
|
||
};
|
||
try handleTemplateValue(globalThis, template_value, out_jsobjs, out_script, jsstrings, jsobjref_buf[0..], marked_argument_buffer);
|
||
}
|
||
}
|
||
return;
|
||
}
|
||
|
||
pub fn handleTemplateValue(
|
||
globalThis: *jsc.JSGlobalObject,
|
||
template_value: JSValue,
|
||
out_jsobjs: *std.array_list.Managed(JSValue),
|
||
out_script: *std.array_list.Managed(u8),
|
||
jsstrings: *std.array_list.Managed(bun.String),
|
||
jsobjref_buf: []u8,
|
||
marked_argument_buffer: *jsc.MarkedArgumentBuffer,
|
||
) bun.JSError!void {
|
||
var builder = ShellSrcBuilder.init(globalThis, out_script, jsstrings);
|
||
if (template_value != .zero) {
|
||
if (template_value.asArrayBuffer(globalThis)) |array_buffer| {
|
||
_ = array_buffer;
|
||
const idx = out_jsobjs.items.len;
|
||
marked_argument_buffer.append(template_value);
|
||
try out_jsobjs.append(template_value);
|
||
const slice = std.fmt.bufPrint(jsobjref_buf[0..], "{s}{d}", .{ LEX_JS_OBJREF_PREFIX, idx }) catch return globalThis.throwOutOfMemory();
|
||
try out_script.appendSlice(slice);
|
||
return;
|
||
}
|
||
|
||
if (template_value.as(jsc.WebCore.Blob)) |blob| {
|
||
if (blob.store) |store| {
|
||
if (store.data == .file) {
|
||
if (store.data.file.pathlike == .path) {
|
||
const path = store.data.file.pathlike.path.slice();
|
||
if (!try builder.appendUTF8(path, true)) {
|
||
return globalThis.throw("Shell script string contains invalid UTF-16", .{});
|
||
}
|
||
return;
|
||
}
|
||
}
|
||
}
|
||
|
||
const idx = out_jsobjs.items.len;
|
||
marked_argument_buffer.append(template_value);
|
||
try out_jsobjs.append(template_value);
|
||
const slice = std.fmt.bufPrint(jsobjref_buf[0..], "{s}{d}", .{ LEX_JS_OBJREF_PREFIX, idx }) catch return globalThis.throwOutOfMemory();
|
||
try out_script.appendSlice(slice);
|
||
return;
|
||
}
|
||
|
||
if (try jsc.WebCore.ReadableStream.fromJS(template_value, globalThis)) |rstream| {
|
||
_ = rstream;
|
||
|
||
const idx = out_jsobjs.items.len;
|
||
marked_argument_buffer.append(template_value);
|
||
try out_jsobjs.append(template_value);
|
||
const slice = std.fmt.bufPrint(jsobjref_buf[0..], "{s}{d}", .{ LEX_JS_OBJREF_PREFIX, idx }) catch return globalThis.throwOutOfMemory();
|
||
try out_script.appendSlice(slice);
|
||
return;
|
||
}
|
||
|
||
if (template_value.as(jsc.WebCore.Response)) |req| {
|
||
_ = req;
|
||
|
||
const idx = out_jsobjs.items.len;
|
||
marked_argument_buffer.append(template_value);
|
||
try out_jsobjs.append(template_value);
|
||
const slice = std.fmt.bufPrint(jsobjref_buf[0..], "{s}{d}", .{ LEX_JS_OBJREF_PREFIX, idx }) catch return globalThis.throwOutOfMemory();
|
||
try out_script.appendSlice(slice);
|
||
return;
|
||
}
|
||
|
||
if (template_value.isString()) {
|
||
if (!try builder.appendJSValueStr(template_value, true)) {
|
||
return globalThis.throw("Shell script string contains invalid UTF-16", .{});
|
||
}
|
||
return;
|
||
}
|
||
|
||
if (template_value.jsType().isArray()) {
|
||
var array = try template_value.arrayIterator(globalThis);
|
||
const last = array.len -| 1;
|
||
var i: u32 = 0;
|
||
while (try array.next()) |arr| : (i += 1) {
|
||
try handleTemplateValue(globalThis, arr, out_jsobjs, out_script, jsstrings, jsobjref_buf, marked_argument_buffer);
|
||
if (i < last) {
|
||
const str = bun.String.static(" ");
|
||
if (!try builder.appendBunStr(str, false)) {
|
||
return globalThis.throw("Shell script string contains invalid UTF-16", .{});
|
||
}
|
||
}
|
||
}
|
||
return;
|
||
}
|
||
|
||
if (template_value.isObject()) {
|
||
if (try template_value.getOwnTruthy(globalThis, "raw")) |maybe_str| {
|
||
const bunstr = try maybe_str.toBunString(globalThis);
|
||
defer bunstr.deref();
|
||
if (!try builder.appendBunStr(bunstr, false)) {
|
||
return globalThis.throw("Shell script string contains invalid UTF-16", .{});
|
||
}
|
||
return;
|
||
}
|
||
}
|
||
|
||
if (template_value.isPrimitive()) {
|
||
if (!try builder.appendJSValueStr(template_value, true)) {
|
||
return globalThis.throw("Shell script string contains invalid UTF-16", .{});
|
||
}
|
||
return;
|
||
}
|
||
|
||
if (try template_value.implementsToString(globalThis)) {
|
||
if (!try builder.appendJSValueStr(template_value, true)) {
|
||
return globalThis.throw("Shell script string contains invalid UTF-16", .{});
|
||
}
|
||
return;
|
||
}
|
||
|
||
return globalThis.throw("Invalid JS object used in shell: {f}, you might need to call `.toString()` on it", .{template_value.fmtString(globalThis)});
|
||
}
|
||
|
||
return;
|
||
}
|
||
|
||
pub const ShellSrcBuilder = struct {
|
||
globalThis: *jsc.JSGlobalObject,
|
||
outbuf: *std.array_list.Managed(u8),
|
||
jsstrs_to_escape: *std.array_list.Managed(bun.String),
|
||
jsstr_ref_buf: [128]u8 = [_]u8{0} ** 128,
|
||
|
||
pub fn init(
|
||
globalThis: *jsc.JSGlobalObject,
|
||
outbuf: *std.array_list.Managed(u8),
|
||
jsstrs_to_escape: *std.array_list.Managed(bun.String),
|
||
) ShellSrcBuilder {
|
||
return .{
|
||
.globalThis = globalThis,
|
||
.outbuf = outbuf,
|
||
.jsstrs_to_escape = jsstrs_to_escape,
|
||
};
|
||
}
|
||
|
||
pub fn appendJSValueStr(this: *ShellSrcBuilder, jsval: JSValue, comptime allow_escape: bool) bun.JSError!bool {
|
||
const bunstr = try jsval.toBunString(this.globalThis);
|
||
defer bunstr.deref();
|
||
|
||
return try this.appendBunStr(bunstr, allow_escape);
|
||
}
|
||
|
||
pub fn appendBunStr(
|
||
this: *ShellSrcBuilder,
|
||
bunstr: bun.String,
|
||
comptime allow_escape: bool,
|
||
) bun.OOM!bool {
|
||
const invalid = (bunstr.isUTF16() and !bun.simdutf.validate.utf16le(bunstr.utf16())) or (bunstr.isUTF8() and !bun.simdutf.validate.utf8(bunstr.byteSlice()));
|
||
if (invalid) return false;
|
||
if (allow_escape) {
|
||
if (needsEscapeBunstr(bunstr)) {
|
||
try this.appendJSStrRef(bunstr);
|
||
return true;
|
||
}
|
||
}
|
||
if (bunstr.isUTF16()) {
|
||
try this.appendUTF16Impl(bunstr.utf16());
|
||
return true;
|
||
}
|
||
if (bunstr.isUTF8() or bun.strings.isAllASCII(bunstr.byteSlice())) {
|
||
try this.appendUTF8Impl(bunstr.byteSlice());
|
||
return true;
|
||
}
|
||
try this.appendLatin1Impl(bunstr.byteSlice());
|
||
return true;
|
||
}
|
||
|
||
pub fn appendUTF8(this: *ShellSrcBuilder, utf8: []const u8, comptime allow_escape: bool) !bool {
|
||
const invalid = bun.simdutf.validate.utf8(utf8);
|
||
if (!invalid) return false;
|
||
if (allow_escape) {
|
||
if (needsEscapeUtf8AsciiLatin1(utf8)) {
|
||
const bunstr = bun.String.cloneUTF8(utf8);
|
||
defer bunstr.deref();
|
||
try this.appendJSStrRef(bunstr);
|
||
return true;
|
||
}
|
||
}
|
||
|
||
try this.appendUTF8Impl(utf8);
|
||
return true;
|
||
}
|
||
|
||
pub fn appendUTF16Impl(this: *ShellSrcBuilder, utf16: []const u16) !void {
|
||
const size = bun.simdutf.simdutf__utf8_length_from_utf16le(utf16.ptr, utf16.len);
|
||
try this.outbuf.ensureUnusedCapacity(size);
|
||
try bun.strings.convertUTF16ToUTF8Append(this.outbuf, utf16);
|
||
}
|
||
|
||
pub fn appendUTF8Impl(this: *ShellSrcBuilder, utf8: []const u8) !void {
|
||
try this.outbuf.appendSlice(utf8);
|
||
}
|
||
|
||
pub fn appendLatin1Impl(this: *ShellSrcBuilder, latin1: []const u8) !void {
|
||
const non_ascii_idx = bun.strings.firstNonASCII(latin1) orelse 0;
|
||
|
||
if (non_ascii_idx > 0) {
|
||
try this.appendUTF8Impl(latin1[0..non_ascii_idx]);
|
||
}
|
||
|
||
this.outbuf.* = try bun.strings.allocateLatin1IntoUTF8WithList(this.outbuf.*, this.outbuf.items.len, latin1);
|
||
}
|
||
|
||
pub fn appendJSStrRef(this: *ShellSrcBuilder, bunstr: bun.String) bun.OOM!void {
|
||
const idx = this.jsstrs_to_escape.items.len;
|
||
const str = std.fmt.bufPrint(this.jsstr_ref_buf[0..], "{s}{d}", .{ LEX_JS_STRING_PREFIX, idx }) catch {
|
||
@panic("Impossible");
|
||
};
|
||
try this.outbuf.appendSlice(str);
|
||
bunstr.ref();
|
||
try this.jsstrs_to_escape.append(bunstr);
|
||
}
|
||
};
|
||
|
||
/// Characters that need to escaped
|
||
const SPECIAL_CHARS = [_]u8{ '~', '[', ']', '#', ';', '\n', '*', '{', ',', '}', '`', '$', '=', '(', ')', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '|', '>', '<', '&', '\'', '"', ' ', '\\' };
|
||
const SPECIAL_CHARS_TABLE: bun.bit_set.IntegerBitSet(256) = brk: {
|
||
var table = bun.bit_set.IntegerBitSet(256).initEmpty();
|
||
for (SPECIAL_CHARS) |c| {
|
||
table.set(c);
|
||
}
|
||
break :brk table;
|
||
};
|
||
pub fn assertSpecialChar(comptime c: u8) void {
|
||
bun.assert(SPECIAL_CHARS_TABLE.isSet(c));
|
||
}
|
||
/// Characters that need to be backslashed inside double quotes
|
||
const BACKSLASHABLE_CHARS = [_]u8{ '$', '`', '"', '\\' };
|
||
|
||
pub fn escapeBunStr(bunstr: bun.String, outbuf: *std.array_list.Managed(u8), comptime add_quotes: bool) bun.OOM!bool {
|
||
if (bunstr.isUTF16()) {
|
||
const res = try escapeUtf16(bunstr.utf16(), outbuf, add_quotes);
|
||
return !res.is_invalid;
|
||
}
|
||
// otherwise should be utf-8, latin-1, or ascii
|
||
try escape8Bit(bunstr.byteSlice(), outbuf, add_quotes);
|
||
return true;
|
||
}
|
||
|
||
/// works for utf-8, latin-1, and ascii
|
||
pub fn escape8Bit(str: []const u8, outbuf: *std.array_list.Managed(u8), comptime add_quotes: bool) !void {
|
||
try outbuf.ensureUnusedCapacity(str.len);
|
||
|
||
if (add_quotes) try outbuf.append('\"');
|
||
|
||
loop: for (str) |c| {
|
||
inline for (BACKSLASHABLE_CHARS) |spc| {
|
||
if (spc == c) {
|
||
try outbuf.appendSlice(&.{
|
||
'\\',
|
||
c,
|
||
});
|
||
continue :loop;
|
||
}
|
||
}
|
||
try outbuf.append(c);
|
||
}
|
||
|
||
if (add_quotes) try outbuf.append('\"');
|
||
}
|
||
|
||
pub fn escapeUtf16(str: []const u16, outbuf: *std.array_list.Managed(u8), comptime add_quotes: bool) !struct { is_invalid: bool = false } {
|
||
if (add_quotes) try outbuf.append('"');
|
||
|
||
const non_ascii = bun.strings.firstNonASCII16(str) orelse 0;
|
||
var cp_buf: [4]u8 = undefined;
|
||
|
||
var i: usize = 0;
|
||
loop: while (i < str.len) {
|
||
const char: u32 = brk: {
|
||
if (i < non_ascii) {
|
||
defer i += 1;
|
||
break :brk str[i];
|
||
}
|
||
const ret = bun.strings.utf16Codepoint(str[i..]);
|
||
if (ret.fail) return .{ .is_invalid = true };
|
||
i += ret.len;
|
||
break :brk ret.code_point;
|
||
};
|
||
|
||
inline for (BACKSLASHABLE_CHARS) |bchar| {
|
||
if (@as(u32, @intCast(bchar)) == char) {
|
||
try outbuf.appendSlice(&[_]u8{ '\\', @intCast(char) });
|
||
continue :loop;
|
||
}
|
||
}
|
||
|
||
const len = bun.strings.encodeWTF8RuneT(&cp_buf, u32, char);
|
||
try outbuf.appendSlice(cp_buf[0..len]);
|
||
}
|
||
if (add_quotes) try outbuf.append('"');
|
||
return .{ .is_invalid = false };
|
||
}
|
||
|
||
pub fn needsEscapeBunstr(bunstr: bun.String) bool {
|
||
if (bunstr.isUTF16()) return needsEscapeUTF16(bunstr.utf16());
|
||
// Otherwise is utf-8, ascii, or latin-1
|
||
return needsEscapeUtf8AsciiLatin1(bunstr.byteSlice());
|
||
}
|
||
|
||
pub fn needsEscapeUTF16(str: []const u16) bool {
|
||
for (str) |codeunit| {
|
||
if (codeunit < 0xff and SPECIAL_CHARS_TABLE.isSet(codeunit)) return true;
|
||
}
|
||
|
||
return false;
|
||
}
|
||
|
||
/// Checks for the presence of any char from `SPECIAL_CHARS` in `str`. This
|
||
/// indicates the *possibility* that the string must be escaped, so it can have
|
||
/// false positives, but it is faster than running the shell lexer through the
|
||
/// input string for a more correct implementation.
|
||
pub fn needsEscapeUtf8AsciiLatin1(str: []const u8) bool {
|
||
for (str) |c| {
|
||
if (SPECIAL_CHARS_TABLE.isSet(c)) return true;
|
||
}
|
||
return false;
|
||
}
|
||
|
||
/// A list that can store its items inlined, and promote itself to a heap allocated bun.ByteList
|
||
pub fn SmolList(comptime T: type, comptime INLINED_MAX: comptime_int) type {
|
||
return union(enum) {
|
||
inlined: Inlined,
|
||
heap: ByteList,
|
||
|
||
const ByteList = bun.BabyList(T);
|
||
|
||
pub fn initWith(val: T) @This() {
|
||
var this: @This() = @This().zeroes;
|
||
this.inlined.items[0] = val;
|
||
this.inlined.len += 1;
|
||
return this;
|
||
}
|
||
|
||
pub fn memoryCost(this: *const @This()) usize {
|
||
var cost: usize = @sizeOf(@This());
|
||
switch (this.*) {
|
||
.inlined => |*inlined| {
|
||
if (comptime bun.trait.isContainer(T) and @hasDecl(T, "memoryCost")) {
|
||
for (inlined.slice()) |*item| {
|
||
cost += item.memoryCost();
|
||
}
|
||
} else {
|
||
cost += std.mem.sliceAsBytes(inlined.allocatedSlice()).len;
|
||
}
|
||
},
|
||
.heap => {
|
||
if (comptime bun.trait.isContainer(T) and @hasDecl(T, "memoryCost")) {
|
||
for (this.heap.slice()) |*item| {
|
||
cost += item.memoryCost();
|
||
}
|
||
cost += this.heap.memoryCost();
|
||
} else {
|
||
cost += std.mem.sliceAsBytes(this.heap.allocatedSlice()).len;
|
||
}
|
||
},
|
||
}
|
||
|
||
return cost;
|
||
}
|
||
|
||
pub fn initWithSlice(vals: []const T) @This() {
|
||
if (bun.Environment.allow_assert) assert(vals.len <= std.math.maxInt(u32));
|
||
if (vals.len <= INLINED_MAX) {
|
||
var this: @This() = @This().zeroes;
|
||
@memcpy(this.inlined.items[0..vals.len], vals);
|
||
this.inlined.len += @intCast(vals.len);
|
||
return this;
|
||
}
|
||
var this: @This() = .{
|
||
.heap = bun.handleOom(ByteList.initCapacity(bun.default_allocator, vals.len)),
|
||
};
|
||
this.heap.appendSliceAssumeCapacity(vals);
|
||
return this;
|
||
}
|
||
|
||
pub fn format(this: *const @This(), writer: *std.Io.Writer) !void {
|
||
const slc = this.slice();
|
||
try writer.print("{}", .{slc});
|
||
}
|
||
|
||
pub fn jsonStringify(this: *const @This(), writer: anytype) !void {
|
||
const slc = this.slice();
|
||
try writer.write(slc);
|
||
}
|
||
|
||
pub const zeroes: @This() = .{
|
||
.inlined = .{},
|
||
};
|
||
|
||
pub const Inlined = struct {
|
||
items: [INLINED_MAX]T = undefined,
|
||
len: u32 = 0,
|
||
|
||
pub fn slice(this: *const Inlined) []const T {
|
||
return this.items[0..this.len];
|
||
}
|
||
|
||
pub fn allocatedSlice(this: *const Inlined) []const T {
|
||
return &this.items;
|
||
}
|
||
|
||
pub fn promote(this: *Inlined, n: usize, new: T) bun.BabyList(T) {
|
||
var list = bun.handleOom(bun.BabyList(T).initCapacity(bun.default_allocator, n));
|
||
bun.handleOom(list.appendSlice(bun.default_allocator, this.items[0..INLINED_MAX]));
|
||
bun.handleOom(list.append(bun.default_allocator, new));
|
||
return list;
|
||
}
|
||
|
||
pub fn orderedRemove(this: *Inlined, idx: usize) T {
|
||
if (this.len - 1 == idx) return this.pop();
|
||
const slice_to_shift = this.items[idx + 1 .. this.len];
|
||
std.mem.copyForwards(T, this.items[idx .. this.len - 1], slice_to_shift);
|
||
this.len -= 1;
|
||
}
|
||
|
||
pub fn swapRemove(this: *Inlined, idx: usize) T {
|
||
if (this.len - 1 == idx) return this.pop();
|
||
|
||
const old_item = this.items[idx];
|
||
this.items[idx] = this.pop();
|
||
return old_item;
|
||
}
|
||
|
||
pub fn pop(this: *Inlined) T {
|
||
const ret = this.items[this.items.len - 1];
|
||
this.len -= 1;
|
||
return ret;
|
||
}
|
||
};
|
||
|
||
pub inline fn len(this: *const @This()) usize {
|
||
return switch (this.*) {
|
||
.inlined => this.inlined.len,
|
||
.heap => this.heap.len,
|
||
};
|
||
}
|
||
|
||
pub fn orderedRemove(this: *@This(), idx: usize) void {
|
||
switch (this.*) {
|
||
.heap => {
|
||
_ = this.heap.orderedRemove(idx);
|
||
},
|
||
.inlined => {
|
||
_ = this.inlined.orderedRemove(idx);
|
||
},
|
||
}
|
||
}
|
||
|
||
pub fn pop(this: *@This()) T {
|
||
switch (this.*) {
|
||
.heap => {
|
||
return this.heap.pop().?;
|
||
},
|
||
.inlined => {
|
||
const val = this.inlined.items[this.inlined.len - 1];
|
||
this.inlined.len -= 1;
|
||
return val;
|
||
},
|
||
}
|
||
}
|
||
|
||
pub fn swapRemove(this: *@This(), idx: usize) void {
|
||
switch (this.*) {
|
||
.heap => {
|
||
_ = this.heap.swapRemove(idx);
|
||
},
|
||
.inlined => {
|
||
_ = this.inlined.swapRemove(idx);
|
||
},
|
||
}
|
||
}
|
||
|
||
pub fn truncate(this: *@This(), starting_idx: usize) void {
|
||
switch (this.*) {
|
||
.inlined => {
|
||
if (starting_idx >= this.inlined.len) return;
|
||
const slice_to_move = this.inlined.items[starting_idx..this.inlined.len];
|
||
bun.copy(T, this.inlined.items[0..starting_idx], slice_to_move);
|
||
this.inlined.len = @intCast(slice_to_move.len);
|
||
},
|
||
.heap => {
|
||
const slc = this.heap.ptr[starting_idx..this.heap.len];
|
||
bun.copy(T, this.heap.ptr[0..slc.len], slc);
|
||
this.heap.len = @intCast(slc.len);
|
||
},
|
||
}
|
||
}
|
||
|
||
pub inline fn sliceMutable(this: *@This()) []T {
|
||
return switch (this.*) {
|
||
.inlined => {
|
||
if (this.inlined.len == 0) return &[_]T{};
|
||
return this.inlined.items[0..this.inlined.len];
|
||
},
|
||
.heap => {
|
||
if (this.heap.len == 0) return &[_]T{};
|
||
return this.heap.slice();
|
||
},
|
||
};
|
||
}
|
||
|
||
pub inline fn slice(this: *const @This()) []const T {
|
||
return switch (this.*) {
|
||
.inlined => {
|
||
if (this.inlined.len == 0) return &[_]T{};
|
||
return this.inlined.items[0..this.inlined.len];
|
||
},
|
||
.heap => {
|
||
if (this.heap.len == 0) return &[_]T{};
|
||
return this.heap.slice();
|
||
},
|
||
};
|
||
}
|
||
|
||
pub inline fn get(this: *@This(), idx: usize) *T {
|
||
return switch (this.*) {
|
||
.inlined => {
|
||
if (bun.Environment.allow_assert) {
|
||
if (idx >= this.inlined.len) @panic("Index out of bounds");
|
||
}
|
||
return &this.inlined.items[idx];
|
||
},
|
||
.heap => &this.heap.ptr[idx],
|
||
};
|
||
}
|
||
|
||
pub inline fn getConst(this: *const @This(), idx: usize) *const T {
|
||
return switch (this.*) {
|
||
.inlined => {
|
||
if (bun.Environment.allow_assert) {
|
||
if (idx >= this.inlined.len) @panic("Index out of bounds");
|
||
}
|
||
return &this.inlined.items[idx];
|
||
},
|
||
.heap => &this.heap.ptr[idx],
|
||
};
|
||
}
|
||
|
||
pub fn append(this: *@This(), new: T) void {
|
||
switch (this.*) {
|
||
.inlined => {
|
||
if (this.inlined.len == INLINED_MAX) {
|
||
const promoted = this.inlined.promote(INLINED_MAX, new);
|
||
this.* = .{ .heap = promoted };
|
||
return;
|
||
}
|
||
this.inlined.items[this.inlined.len] = new;
|
||
this.inlined.len += 1;
|
||
},
|
||
.heap => {
|
||
bun.handleOom(this.heap.append(bun.default_allocator, new));
|
||
},
|
||
}
|
||
}
|
||
|
||
pub fn clearRetainingCapacity(this: *@This()) void {
|
||
switch (this.*) {
|
||
.inlined => {
|
||
this.inlined.len = 0;
|
||
},
|
||
.heap => {
|
||
this.heap.clearRetainingCapacity();
|
||
},
|
||
}
|
||
}
|
||
|
||
pub fn last(this: *@This()) ?*T {
|
||
if (this.len() == 0) return null;
|
||
return this.get(this.len() - 1);
|
||
}
|
||
|
||
pub fn lastUnchecked(this: *@This()) *T {
|
||
return this.get(this.len() - 1);
|
||
}
|
||
|
||
pub fn lastUncheckedConst(this: *const @This()) *const T {
|
||
return this.getConst(this.len() - 1);
|
||
}
|
||
};
|
||
}
|
||
|
||
/// Used in JS tests, see `internal-for-testing.ts` and shell tests.
|
||
pub const TestingAPIs = struct {
|
||
pub fn disabledOnThisPlatform(globalThis: *jsc.JSGlobalObject, callframe: *jsc.CallFrame) bun.JSError!jsc.JSValue {
|
||
if (comptime bun.Environment.isWindows) return .false;
|
||
|
||
const arguments_ = callframe.arguments_old(1);
|
||
var arguments = jsc.CallFrame.ArgumentsSlice.init(globalThis.bunVM(), arguments_.slice());
|
||
const string = arguments.nextEat() orelse {
|
||
return globalThis.throw("shellInternals.disabledOnPosix: expected 1 arguments, got 0", .{});
|
||
};
|
||
|
||
const bunstr = try string.toBunString(globalThis);
|
||
defer bunstr.deref();
|
||
const utf8str = bunstr.toUTF8(bun.default_allocator);
|
||
defer utf8str.deinit();
|
||
|
||
inline for (Interpreter.Builtin.Kind.DISABLED_ON_POSIX) |disabled| {
|
||
if (bun.strings.eqlComptime(utf8str.byteSlice(), @tagName(disabled))) {
|
||
return .true;
|
||
}
|
||
}
|
||
return .false;
|
||
}
|
||
|
||
pub const shellLex = jsc.MarkedArgumentBuffer.wrap(shellLexImpl);
|
||
|
||
fn shellLexImpl(
|
||
globalThis: *jsc.JSGlobalObject,
|
||
callframe: *jsc.CallFrame,
|
||
marked_argument_buffer: *jsc.MarkedArgumentBuffer,
|
||
) bun.JSError!jsc.JSValue {
|
||
const arguments_ = callframe.arguments_old(2);
|
||
var arguments = jsc.CallFrame.ArgumentsSlice.init(globalThis.bunVM(), arguments_.slice());
|
||
const string_args = arguments.nextEat() orelse {
|
||
return globalThis.throw("shell_parse: expected 2 arguments, got 0", .{});
|
||
};
|
||
|
||
var arena = std.heap.ArenaAllocator.init(bun.default_allocator);
|
||
defer arena.deinit();
|
||
|
||
const template_args_js = arguments.nextEat() orelse {
|
||
return globalThis.throw("shell: expected 2 arguments, got 0", .{});
|
||
};
|
||
var template_args = try template_args_js.arrayIterator(globalThis);
|
||
var stack_alloc = std.heap.stackFallback(@sizeOf(bun.String) * 4, arena.allocator());
|
||
var jsstrings = try std.array_list.Managed(bun.String).initCapacity(stack_alloc.get(), 4);
|
||
defer {
|
||
for (jsstrings.items[0..]) |bunstr| {
|
||
bunstr.deref();
|
||
}
|
||
jsstrings.deinit();
|
||
}
|
||
var jsobjs = std.array_list.Managed(JSValue).init(arena.allocator());
|
||
defer jsobjs.deinit();
|
||
|
||
var script = std.array_list.Managed(u8).init(arena.allocator());
|
||
try shellCmdFromJS(globalThis, string_args, &template_args, &jsobjs, &jsstrings, &script, marked_argument_buffer);
|
||
|
||
const lex_result = brk: {
|
||
if (bun.strings.isAllASCII(script.items[0..])) {
|
||
var lexer = LexerAscii.new(arena.allocator(), script.items[0..], jsstrings.items[0..]);
|
||
lexer.lex() catch |err| {
|
||
return globalThis.throwError(err, "failed to lex shell");
|
||
};
|
||
break :brk lexer.get_result();
|
||
}
|
||
var lexer = LexerUnicode.new(arena.allocator(), script.items[0..], jsstrings.items[0..]);
|
||
lexer.lex() catch |err| {
|
||
return globalThis.throwError(err, "failed to lex shell");
|
||
};
|
||
break :brk lexer.get_result();
|
||
};
|
||
|
||
if (lex_result.errors.len > 0) {
|
||
const str = lex_result.combineErrors(arena.allocator());
|
||
return globalThis.throwPretty("{s}", .{str});
|
||
}
|
||
|
||
var test_tokens = try std.array_list.Managed(Test.TestToken).initCapacity(arena.allocator(), lex_result.tokens.len);
|
||
for (lex_result.tokens) |tok| {
|
||
const test_tok = Test.TestToken.from_real(tok, lex_result.strpool);
|
||
try test_tokens.append(test_tok);
|
||
}
|
||
|
||
const str = bun.handleOom(std.fmt.allocPrint(globalThis.bunVM().allocator, "{f}", .{std.json.fmt(test_tokens.items[0..], .{})}));
|
||
|
||
defer globalThis.bunVM().allocator.free(str);
|
||
var bun_str = bun.String.fromBytes(str);
|
||
return bun_str.toJS(globalThis);
|
||
}
|
||
|
||
pub const shellParse = jsc.MarkedArgumentBuffer.wrap(shellParseImpl);
|
||
|
||
fn shellParseImpl(
|
||
globalThis: *jsc.JSGlobalObject,
|
||
callframe: *jsc.CallFrame,
|
||
marked_argument_buffer: *jsc.MarkedArgumentBuffer,
|
||
) bun.JSError!jsc.JSValue {
|
||
const arguments_ = callframe.arguments_old(2);
|
||
var arguments = jsc.CallFrame.ArgumentsSlice.init(globalThis.bunVM(), arguments_.slice());
|
||
const string_args = arguments.nextEat() orelse {
|
||
return globalThis.throw("shell_parse: expected 2 arguments, got 0", .{});
|
||
};
|
||
|
||
var arena = bun.ArenaAllocator.init(bun.default_allocator);
|
||
defer arena.deinit();
|
||
|
||
const template_args_js = arguments.nextEat() orelse {
|
||
return globalThis.throw("shell: expected 2 arguments, got 0", .{});
|
||
};
|
||
var template_args = try template_args_js.arrayIterator(globalThis);
|
||
var stack_alloc = std.heap.stackFallback(@sizeOf(bun.String) * 4, arena.allocator());
|
||
var jsstrings = try std.array_list.Managed(bun.String).initCapacity(stack_alloc.get(), 4);
|
||
defer {
|
||
for (jsstrings.items[0..]) |bunstr| {
|
||
bunstr.deref();
|
||
}
|
||
jsstrings.deinit();
|
||
}
|
||
var jsobjs = std.array_list.Managed(JSValue).init(arena.allocator());
|
||
defer jsobjs.deinit();
|
||
var script = std.array_list.Managed(u8).init(arena.allocator());
|
||
try shellCmdFromJS(globalThis, string_args, &template_args, &jsobjs, &jsstrings, &script, marked_argument_buffer);
|
||
|
||
var out_parser: ?Parser = null;
|
||
var out_lex_result: ?LexResult = null;
|
||
|
||
const script_ast = Interpreter.parse(arena.allocator(), script.items[0..], jsobjs.items[0..], jsstrings.items[0..], &out_parser, &out_lex_result) catch |err| {
|
||
if (err == ParseError.Lex) {
|
||
if (bun.Environment.allow_assert) assert(out_lex_result != null);
|
||
const str = out_lex_result.?.combineErrors(arena.allocator());
|
||
return globalThis.throwPretty("{s}", .{str});
|
||
}
|
||
|
||
if (out_parser) |*p| {
|
||
const errstr = p.combineErrors();
|
||
return globalThis.throwPretty("{s}", .{errstr});
|
||
}
|
||
|
||
return globalThis.throwError(err, "failed to lex/parse shell");
|
||
};
|
||
|
||
const str = bun.handleOom(std.fmt.allocPrint(globalThis.bunVM().allocator, "{f}", .{std.json.fmt(script_ast, .{})}));
|
||
|
||
defer globalThis.bunVM().allocator.free(str);
|
||
return bun.String.createUTF8ForJS(globalThis, str);
|
||
}
|
||
};
|
||
|
||
pub const ShellSubprocess = @import("./subproc.zig").ShellSubprocess;
|
||
|
||
const Syscall = @import("../sys.zig");
|
||
const builtin = @import("builtin");
|
||
const std = @import("std");
|
||
const Allocator = std.mem.Allocator;
|
||
const ArrayList = std.array_list.Managed;
|
||
|
||
const bun = @import("bun");
|
||
const assert = bun.assert;
|
||
|
||
const jsc = bun.jsc;
|
||
const JSGlobalObject = bun.jsc.JSGlobalObject;
|
||
const JSValue = bun.jsc.JSValue;
|
||
|
||
const CodepointIterator = bun.strings.UnsignedCodepointIterator;
|
||
const isAllAscii = bun.strings.isAllASCII;
|