mirror of
https://github.com/oven-sh/bun
synced 2026-02-10 02:48:50 +00:00
2742 lines
93 KiB
Zig
2742 lines
93 KiB
Zig
const bun = @import("root").bun;
|
|
const std = @import("std");
|
|
const builtin = @import("builtin");
|
|
const Arena = std.heap.ArenaAllocator;
|
|
const Allocator = std.mem.Allocator;
|
|
const ArrayList = std.ArrayList;
|
|
const JSC = bun.JSC;
|
|
const JSValue = bun.JSC.JSValue;
|
|
const JSPromise = bun.JSC.JSPromise;
|
|
const JSGlobalObject = bun.JSC.JSGlobalObject;
|
|
const Which = @import("../which.zig");
|
|
const Braces = @import("./braces.zig");
|
|
const Syscall = @import("../sys.zig");
|
|
const Glob = @import("../glob.zig");
|
|
const ResolvePath = @import("../resolver/resolve_path.zig");
|
|
const DirIterator = @import("../bun.js/node/dir_iterator.zig");
|
|
const CodepointIterator = @import("../string_immutable.zig").PackedCodepointIterator;
|
|
const isAllAscii = @import("../string_immutable.zig").isAllASCII;
|
|
const TaggedPointerUnion = @import("../tagged_pointer.zig").TaggedPointerUnion;
|
|
|
|
pub const eval = @import("./interpreter.zig");
|
|
pub const interpret = @import("./interpreter.zig");
|
|
pub const subproc = @import("./subproc.zig");
|
|
|
|
pub const EnvMap = interpret.EnvMap;
|
|
pub const EnvStr = interpret.EnvStr;
|
|
pub const Interpreter = eval.Interpreter;
|
|
pub const InterpreterMini = eval.InterpreterMini;
|
|
pub const Subprocess = subproc.ShellSubprocess;
|
|
pub const SubprocessMini = subproc.ShellSubprocessMini;
|
|
|
|
const GlobWalker = Glob.GlobWalker_(null, true);
|
|
// const GlobWalker = Glob.BunGlobWalker;
|
|
|
|
pub const SUBSHELL_TODO_ERROR = "Subshells are not implemented, please open GitHub issue.";
|
|
|
|
/// The strings in this type are allocated with event loop ctx allocator
|
|
pub const ShellErr = union(enum) {
|
|
sys: JSC.SystemError,
|
|
custom: []const u8,
|
|
invalid_arguments: struct { val: []const u8 = "" },
|
|
todo: []const u8,
|
|
|
|
pub fn newSys(e: Syscall.Error) @This() {
|
|
return .{
|
|
.sys = e.toSystemError(),
|
|
};
|
|
}
|
|
|
|
pub fn throwJS(this: @This(), globalThis: *JSC.JSGlobalObject) void {
|
|
switch (this) {
|
|
.sys => {
|
|
const err = this.sys.toErrorInstance(globalThis);
|
|
globalThis.throwValue(err);
|
|
},
|
|
.custom => {
|
|
var str = JSC.ZigString.init(this.custom);
|
|
str.markUTF8();
|
|
const err_value = str.toErrorInstance(globalThis);
|
|
globalThis.vm().throwError(globalThis, err_value);
|
|
// this.bunVM().allocator.free(JSC.ZigString.untagged(str._unsafe_ptr_do_not_use)[0..str.len]);
|
|
},
|
|
.invalid_arguments => {
|
|
globalThis.throwInvalidArguments("{s}", .{this.invalid_arguments.val});
|
|
},
|
|
.todo => {
|
|
globalThis.throwTODO(this.todo);
|
|
},
|
|
}
|
|
}
|
|
|
|
pub fn throwMini(this: @This()) void {
|
|
switch (this) {
|
|
.sys => {
|
|
const err = this.sys;
|
|
const str = std.fmt.allocPrint(bun.default_allocator, "bunsh: {s}: {}", .{ err.message, err.path }) catch bun.outOfMemory();
|
|
bun.Output.prettyErrorln("<r><red>error<r>: Failed to due to error <b>{s}<r>", .{str});
|
|
bun.Global.exit(1);
|
|
},
|
|
.custom => {
|
|
bun.Output.prettyErrorln("<r><red>error<r>: Failed to due to error <b>{s}<r>", .{this.custom});
|
|
bun.Global.exit(1);
|
|
},
|
|
.invalid_arguments => {
|
|
const str = std.fmt.allocPrint(bun.default_allocator, "bunsh: invalid arguments: {s}", .{this.invalid_arguments.val}) catch bun.outOfMemory();
|
|
bun.Output.prettyErrorln("<r><red>error<r>: Failed to due to error <b>{s}<r>", .{str});
|
|
bun.Global.exit(1);
|
|
},
|
|
.todo => {
|
|
bun.Output.prettyErrorln("<r><red>error<r>: Failed to due to error <b>TODO: {s}<r>", .{this.todo});
|
|
bun.Global.exit(1);
|
|
},
|
|
}
|
|
}
|
|
|
|
pub fn deinit(this: @This(), allocator: Allocator) void {
|
|
switch (this) {
|
|
.sys => {
|
|
// this.sys.
|
|
},
|
|
.custom => allocator.free(this.custom),
|
|
.invalid_arguments => {},
|
|
.todo => allocator.free(this.todo),
|
|
}
|
|
}
|
|
};
|
|
|
|
pub fn Result(comptime T: anytype) type {
|
|
return union(enum) {
|
|
result: T,
|
|
err: ShellErr,
|
|
|
|
pub const success: @This() = @This(){
|
|
.result = std.mem.zeroes(T),
|
|
};
|
|
};
|
|
}
|
|
|
|
pub const ShellError = error{ Init, Process, GlobalThisThrown, Spawn };
|
|
pub const ParseError = error{
|
|
Expected,
|
|
Unknown,
|
|
Lex,
|
|
};
|
|
|
|
extern "C" fn setenv(name: [*:0]const u8, value: [*:0]const u8, overwrite: i32) i32;
|
|
|
|
fn setEnv(name: [*:0]const u8, value: [*:0]const u8) void {
|
|
// TODO: windows
|
|
_ = setenv(name, value, 1);
|
|
}
|
|
|
|
/// [0] => read end
|
|
/// [1] => write end
|
|
pub const Pipe = [2]bun.FileDescriptor;
|
|
|
|
const log = bun.Output.scoped(.SHELL, false);
|
|
const logsys = bun.Output.scoped(.SYS, false);
|
|
|
|
pub const GlobalJS = struct {
|
|
globalThis: *JSC.JSGlobalObject,
|
|
|
|
pub inline fn init(g: *JSC.JSGlobalObject) GlobalJS {
|
|
return .{
|
|
.globalThis = g,
|
|
};
|
|
}
|
|
|
|
pub inline fn allocator(this: @This()) Allocator {
|
|
return this.globalThis.bunVM().allocator;
|
|
}
|
|
|
|
pub inline fn eventLoopCtx(this: @This()) *JSC.VirtualMachine {
|
|
return this.globalThis.bunVM();
|
|
}
|
|
|
|
pub inline fn throwInvalidArguments(this: @This(), comptime fmt: []const u8, args: anytype) bun.shell.ShellErr {
|
|
return .{
|
|
.invalid_arguments = .{ .val = std.fmt.allocPrint(this.globalThis.bunVM().allocator, fmt, args) catch bun.outOfMemory() },
|
|
};
|
|
}
|
|
|
|
pub inline fn throwTODO(this: @This(), msg: []const u8) bun.shell.ShellErr {
|
|
return .{
|
|
.todo = std.fmt.allocPrint(this.globalThis.bunVM().allocator, "{s}", .{msg}) catch bun.outOfMemory(),
|
|
};
|
|
}
|
|
|
|
pub inline fn throwError(this: @This(), err: bun.sys.Error) void {
|
|
this.globalThis.throwValue(err.toJSC(this.globalThis));
|
|
}
|
|
|
|
pub inline fn handleError(this: @This(), err: anytype, comptime fmt: []const u8) bun.shell.ShellErr {
|
|
const str = std.fmt.allocPrint(this.globalThis.bunVM().allocator, "{s} " ++ fmt, .{@errorName(err)}) catch bun.outOfMemory();
|
|
return .{
|
|
.custom = str,
|
|
};
|
|
}
|
|
|
|
pub inline fn throw(this: @This(), comptime fmt: []const u8, args: anytype) bun.shell.ShellErr {
|
|
const str = std.fmt.allocPrint(this.globalThis.bunVM().allocator, fmt, args) catch bun.outOfMemory();
|
|
return .{
|
|
.custom = str,
|
|
};
|
|
}
|
|
|
|
pub inline fn createNullDelimitedEnvMap(this: @This(), alloc: Allocator) ![:null]?[*:0]u8 {
|
|
return this.globalThis.bunVM().bundler.env.map.createNullDelimitedEnvMap(alloc);
|
|
}
|
|
|
|
pub inline fn getAllocator(this: @This()) Allocator {
|
|
return this.globalThis.bunVM().allocator;
|
|
}
|
|
|
|
pub inline fn enqueueTaskConcurrentWaitPid(this: @This(), task: anytype) void {
|
|
this.globalThis.bunVMConcurrently().enqueueTaskConcurrent(JSC.ConcurrentTask.create(JSC.Task.init(task)));
|
|
}
|
|
|
|
pub inline fn topLevelDir(this: @This()) []const u8 {
|
|
return this.globalThis.bunVM().bundler.fs.top_level_dir;
|
|
}
|
|
|
|
pub inline fn env(this: @This()) *bun.DotEnv.Loader {
|
|
return this.globalThis.bunVM().bundler.env;
|
|
}
|
|
|
|
pub inline fn platformEventLoop(this: @This()) *JSC.PlatformEventLoop {
|
|
const loop = JSC.AbstractVM(this.eventLoopCtx());
|
|
return loop.platformEventLoop();
|
|
}
|
|
|
|
pub inline fn actuallyThrow(this: @This(), shellerr: bun.shell.ShellErr) void {
|
|
shellerr.throwJS(this.globalThis);
|
|
}
|
|
};
|
|
|
|
pub const GlobalMini = struct {
|
|
mini: *JSC.MiniEventLoop,
|
|
|
|
pub inline fn init(g: *JSC.MiniEventLoop) @This() {
|
|
return .{
|
|
.mini = g,
|
|
};
|
|
}
|
|
|
|
pub inline fn env(this: @This()) *bun.DotEnv.Loader {
|
|
return this.mini.env.?;
|
|
}
|
|
|
|
pub inline fn allocator(this: @This()) Allocator {
|
|
return this.mini.allocator;
|
|
}
|
|
|
|
pub inline fn eventLoopCtx(this: @This()) *JSC.MiniEventLoop {
|
|
return this.mini;
|
|
}
|
|
|
|
// pub inline fn throwShellErr(this: @This(), shell_err: bun.shell.ShellErr
|
|
|
|
pub inline fn throwTODO(this: @This(), msg: []const u8) bun.shell.ShellErr {
|
|
return .{
|
|
.todo = std.fmt.allocPrint(this.mini.allocator, "{s}", .{msg}) catch bun.outOfMemory(),
|
|
};
|
|
}
|
|
|
|
pub inline fn throwInvalidArguments(this: @This(), comptime fmt: []const u8, args: anytype) bun.shell.ShellErr {
|
|
return .{
|
|
.invalid_arguments = .{ .val = std.fmt.allocPrint(this.allocator(), fmt, args) catch bun.outOfMemory() },
|
|
};
|
|
}
|
|
|
|
pub inline fn handleError(this: @This(), err: anytype, comptime fmt: []const u8) bun.shell.ShellErr {
|
|
const str = std.fmt.allocPrint(this.mini.allocator, "{s} " ++ fmt, .{@errorName(err)}) catch bun.outOfMemory();
|
|
return .{
|
|
.custom = str,
|
|
};
|
|
}
|
|
|
|
pub inline fn createNullDelimitedEnvMap(this: @This(), alloc: Allocator) ![:null]?[*:0]u8 {
|
|
return this.mini.env.?.map.createNullDelimitedEnvMap(alloc);
|
|
}
|
|
|
|
pub inline fn getAllocator(this: @This()) Allocator {
|
|
return this.mini.allocator;
|
|
}
|
|
|
|
pub inline fn enqueueTaskConcurrentWaitPid(this: @This(), task: anytype) void {
|
|
var anytask = bun.default_allocator.create(JSC.AnyTaskWithExtraContext) catch bun.outOfMemory();
|
|
_ = anytask.from(task, "runFromMainThreadMini");
|
|
this.mini.enqueueTaskConcurrent(anytask);
|
|
}
|
|
|
|
pub inline fn topLevelDir(this: @This()) []const u8 {
|
|
return this.mini.top_level_dir;
|
|
}
|
|
|
|
pub inline fn throw(this: @This(), comptime fmt: []const u8, args: anytype) bun.shell.ShellErr {
|
|
const str = std.fmt.allocPrint(this.allocator(), fmt, args) catch bun.outOfMemory();
|
|
return .{
|
|
.custom = str,
|
|
};
|
|
}
|
|
|
|
pub inline fn actuallyThrow(this: @This(), shellerr: bun.shell.ShellErr) void {
|
|
_ = this; // autofix
|
|
shellerr.throwMini();
|
|
}
|
|
|
|
pub inline fn platformEventLoop(this: @This()) *JSC.PlatformEventLoop {
|
|
const loop = JSC.AbstractVM(this.eventLoopCtx());
|
|
return loop.platformEventLoop();
|
|
}
|
|
};
|
|
|
|
// const GlobalHandle = if (JSC.EventLoopKind == .js) GlobalJS else GlobalMini;
|
|
|
|
pub const AST = struct {
|
|
pub const Script = struct {
|
|
stmts: []Stmt,
|
|
};
|
|
|
|
pub const Stmt = struct {
|
|
exprs: []Expr,
|
|
};
|
|
|
|
pub const Expr = union(Expr.Tag) {
|
|
assign: []Assign,
|
|
cond: *Conditional,
|
|
pipeline: *Pipeline,
|
|
cmd: *Cmd,
|
|
subshell: Script,
|
|
|
|
pub fn asPipelineItem(this: *Expr) ?PipelineItem {
|
|
return switch (this.*) {
|
|
.assign => .{ .assigns = this.assign },
|
|
.cmd => .{ .cmd = this.cmd },
|
|
.subshell => .{ .subshell = this.subshell },
|
|
else => null,
|
|
};
|
|
}
|
|
|
|
pub const Tag = enum { assign, cond, pipeline, cmd, subshell };
|
|
};
|
|
|
|
pub const Conditional = struct {
|
|
op: Op,
|
|
left: Expr,
|
|
right: Expr,
|
|
|
|
const Op = enum { And, Or };
|
|
};
|
|
|
|
pub const Pipeline = struct {
|
|
items: []PipelineItem,
|
|
};
|
|
|
|
pub const PipelineItem = union(enum) {
|
|
cmd: *Cmd,
|
|
assigns: []Assign,
|
|
subshell: Script,
|
|
};
|
|
|
|
pub const CmdOrAssigns = union(CmdOrAssigns.Tag) {
|
|
cmd: Cmd,
|
|
assigns: []Assign,
|
|
|
|
pub const Tag = enum { cmd, assigns };
|
|
|
|
pub fn to_pipeline_item(this: CmdOrAssigns, alloc: Allocator) PipelineItem {
|
|
switch (this) {
|
|
.cmd => |cmd| {
|
|
const cmd_ptr = try alloc.create(Cmd);
|
|
cmd_ptr.* = cmd;
|
|
return .{ .cmd = cmd_ptr };
|
|
},
|
|
.assigns => |assigns| {
|
|
return .{ .assign = assigns };
|
|
},
|
|
}
|
|
}
|
|
|
|
pub fn to_expr(this: CmdOrAssigns, alloc: Allocator) !Expr {
|
|
switch (this) {
|
|
.cmd => |cmd| {
|
|
const cmd_ptr = try alloc.create(Cmd);
|
|
cmd_ptr.* = cmd;
|
|
return .{ .cmd = cmd_ptr };
|
|
},
|
|
.assigns => |assigns| {
|
|
return .{ .assign = assigns };
|
|
},
|
|
}
|
|
}
|
|
};
|
|
|
|
/// A "buffer" from a JS object can be piped from and to, and also have
|
|
/// output from commands redirected into it. Only BunFile, ArrayBufferView
|
|
/// are supported.
|
|
pub const JSBuf = struct {
|
|
idx: u32,
|
|
|
|
pub fn new(idx: u32) JSBuf {
|
|
return .{ .idx = idx };
|
|
}
|
|
};
|
|
|
|
/// A Subprocess from JS
|
|
pub const JSProc = struct { idx: JSValue };
|
|
|
|
pub const Assign = struct {
|
|
label: []const u8,
|
|
value: Atom,
|
|
|
|
pub fn new(label: []const u8, value: Atom) Assign {
|
|
return .{
|
|
.label = label,
|
|
.value = value,
|
|
};
|
|
}
|
|
};
|
|
|
|
pub const Cmd = struct {
|
|
assigns: []Assign,
|
|
name_and_args: []Atom,
|
|
redirect: RedirectFlags = .{},
|
|
redirect_file: ?Redirect = null,
|
|
|
|
/// Bit flags for redirects:
|
|
/// - `>` = Redirect.Stdout
|
|
/// - `1>` = Redirect.Stdout
|
|
/// - `2>` = Redirect.Stderr
|
|
/// - `&>` = Redirect.Stdout | Redirect.Stderr
|
|
/// - `>>` = Redirect.Append | Redirect.Stdout
|
|
/// - `1>>` = Redirect.Append | Redirect.Stdout
|
|
/// - `2>>` = Redirect.Append | Redirect.Stderr
|
|
/// - `&>>` = Redirect.Append | Redirect.Stdout | Redirect.Stderr
|
|
///
|
|
/// Multiple redirects and redirecting stdin is not supported yet.
|
|
pub const RedirectFlags = packed struct(u8) {
|
|
stdin: bool = false,
|
|
stdout: bool = false,
|
|
stderr: bool = false,
|
|
append: bool = false,
|
|
__unused: u4 = 0,
|
|
|
|
pub fn @"<"() RedirectFlags {
|
|
return .{ .stdin = true };
|
|
}
|
|
|
|
pub fn @"<<"() RedirectFlags {
|
|
return .{ .stdin = true, .append = true };
|
|
}
|
|
|
|
pub fn @">"() RedirectFlags {
|
|
return .{ .stdout = true };
|
|
}
|
|
|
|
pub fn @">>"() RedirectFlags {
|
|
return .{ .append = true, .stdout = true };
|
|
}
|
|
|
|
pub fn @"&>"() RedirectFlags {
|
|
return .{ .stdout = true, .stderr = true };
|
|
}
|
|
|
|
pub fn @"&>>"() RedirectFlags {
|
|
return .{ .append = true, .stdout = true, .stderr = true };
|
|
}
|
|
|
|
pub fn merge(a: RedirectFlags, b: RedirectFlags) RedirectFlags {
|
|
const anum: u8 = @bitCast(a);
|
|
const bnum: u8 = @bitCast(b);
|
|
return @bitCast(anum | bnum);
|
|
}
|
|
};
|
|
|
|
pub const Redirect = union(enum) {
|
|
atom: Atom,
|
|
jsbuf: JSBuf,
|
|
};
|
|
};
|
|
|
|
pub const Atom = union(Atom.Tag) {
|
|
simple: SimpleAtom,
|
|
compound: CompoundAtom,
|
|
|
|
pub const Tag = enum(u8) { simple, compound };
|
|
|
|
pub fn atomsLen(this: *const Atom) u32 {
|
|
return switch (this.*) {
|
|
.simple => 1,
|
|
.compound => @intCast(this.compound.atoms.len),
|
|
};
|
|
}
|
|
|
|
pub fn new_simple(atom: SimpleAtom) Atom {
|
|
return .{ .simple = atom };
|
|
}
|
|
|
|
pub fn is_compound(self: *const Atom) bool {
|
|
switch (self.*) {
|
|
.compound => return true,
|
|
else => return false,
|
|
}
|
|
}
|
|
|
|
pub fn has_expansions(self: *const Atom) bool {
|
|
return self.has_glob_expansion() or self.has_brace_expansion();
|
|
}
|
|
|
|
pub fn has_glob_expansion(self: *const Atom) bool {
|
|
return switch (self.*) {
|
|
.simple => self.simple.glob_hint(),
|
|
.compound => self.compound.glob_hint,
|
|
};
|
|
}
|
|
|
|
pub fn has_brace_expansion(self: *const Atom) bool {
|
|
return switch (self.*) {
|
|
.simple => false,
|
|
.compound => self.compound.brace_expansion_hint,
|
|
};
|
|
}
|
|
};
|
|
|
|
pub const SimpleAtom = union(enum) {
|
|
Var: []const u8,
|
|
Text: []const u8,
|
|
asterisk,
|
|
double_asterisk,
|
|
brace_begin,
|
|
brace_end,
|
|
comma,
|
|
cmd_subst: struct {
|
|
script: Script,
|
|
quoted: bool = false,
|
|
},
|
|
|
|
pub fn glob_hint(this: SimpleAtom) bool {
|
|
return switch (this) {
|
|
.asterisk, .double_asterisk => true,
|
|
else => false,
|
|
};
|
|
}
|
|
|
|
pub fn mightNeedIO(this: SimpleAtom) bool {
|
|
return switch (this) {
|
|
.asterisk, .double_asterisk, .cmd_subst => true,
|
|
else => false,
|
|
};
|
|
}
|
|
};
|
|
|
|
pub const CompoundAtom = struct {
|
|
atoms: []SimpleAtom,
|
|
brace_expansion_hint: bool = false,
|
|
glob_hint: bool = false,
|
|
};
|
|
};
|
|
|
|
pub const Parser = struct {
|
|
strpool: []const u8,
|
|
tokens: []const Token,
|
|
alloc: Allocator,
|
|
jsobjs: []JSValue,
|
|
current: u32 = 0,
|
|
errors: std.ArrayList(Error),
|
|
inside_subshell: ?SubshellKind = null,
|
|
|
|
const SubshellKind = enum {
|
|
cmd_subst,
|
|
normal,
|
|
pub fn closing_tok(this: SubshellKind) TokenTag {
|
|
return switch (this) {
|
|
.cmd_subst => TokenTag.CmdSubstEnd,
|
|
.normal => TokenTag.CloseParen,
|
|
};
|
|
}
|
|
};
|
|
|
|
// FIXME error location
|
|
const Error = struct { msg: []const u8 };
|
|
|
|
pub fn new(
|
|
allocator: Allocator,
|
|
lex_result: LexResult,
|
|
jsobjs: []JSValue,
|
|
) !Parser {
|
|
return .{
|
|
.strpool = lex_result.strpool,
|
|
.tokens = lex_result.tokens,
|
|
.alloc = allocator,
|
|
.jsobjs = jsobjs,
|
|
.errors = std.ArrayList(Error).init(allocator),
|
|
};
|
|
}
|
|
|
|
pub fn make_subparser(this: *Parser, kind: SubshellKind) Parser {
|
|
const subparser = .{
|
|
.strpool = this.strpool,
|
|
.tokens = this.tokens,
|
|
.alloc = this.alloc,
|
|
.jsobjs = this.jsobjs,
|
|
.current = this.current,
|
|
// We replace the old Parser's struct with the updated error list
|
|
// when this subparser is done
|
|
.errors = this.errors,
|
|
.inside_subshell = kind,
|
|
};
|
|
return subparser;
|
|
}
|
|
|
|
pub fn continue_from_subparser(this: *Parser, subparser: *Parser) void {
|
|
// this.current = if (this.tokens[subparser.current] == .Eof) subparser.current else subparser;
|
|
this.current =
|
|
if (subparser.current >= this.tokens.len) subparser.current else subparser.current + 1;
|
|
this.errors = subparser.errors;
|
|
}
|
|
|
|
pub fn parse(self: *Parser) !AST.Script {
|
|
// Check for subshell syntax which is not supported rn
|
|
for (self.tokens) |tok| {
|
|
switch (tok) {
|
|
.OpenParen => {
|
|
try self.add_error("Unexpected `(`, subshells are currently not supported right now. Escape the `(` or open a GitHub issue.", .{});
|
|
return ParseError.Expected;
|
|
},
|
|
.CloseParen => {
|
|
try self.add_error("Unexpected `(`, subshells are currently not supported right now. Escape the `(` or open a GitHub issue.", .{});
|
|
return ParseError.Expected;
|
|
},
|
|
else => {},
|
|
}
|
|
}
|
|
|
|
return try self.parse_impl();
|
|
}
|
|
|
|
pub fn parse_impl(self: *Parser) !AST.Script {
|
|
var stmts = ArrayList(AST.Stmt).init(self.alloc);
|
|
if (self.tokens.len == 0 or self.tokens.len == 1 and self.tokens[0] == .Eof)
|
|
return .{ .stmts = stmts.items[0..stmts.items.len] };
|
|
|
|
while (if (self.inside_subshell == null)
|
|
!self.match(.Eof)
|
|
else
|
|
!self.match_any(&.{ .Eof, self.inside_subshell.?.closing_tok() }))
|
|
{
|
|
try stmts.append(try self.parse_stmt());
|
|
}
|
|
if (self.inside_subshell) |kind| {
|
|
_ = self.expect_any(&.{ .Eof, kind.closing_tok() });
|
|
} else {
|
|
_ = self.expect(.Eof);
|
|
}
|
|
return .{ .stmts = stmts.items[0..stmts.items.len] };
|
|
}
|
|
|
|
pub fn parse_stmt(self: *Parser) !AST.Stmt {
|
|
var exprs = std.ArrayList(AST.Expr).init(self.alloc);
|
|
|
|
while (if (self.inside_subshell == null)
|
|
!self.match_any_comptime(&.{ .Semicolon, .Newline, .Eof })
|
|
else
|
|
!self.match_any(&.{ .Semicolon, .Newline, .Eof, self.inside_subshell.?.closing_tok() }))
|
|
{
|
|
const expr = try self.parse_expr();
|
|
try exprs.append(expr);
|
|
}
|
|
|
|
return .{
|
|
.exprs = exprs.items[0..],
|
|
};
|
|
}
|
|
|
|
fn parse_expr(self: *Parser) !AST.Expr {
|
|
return self.parse_cond();
|
|
}
|
|
|
|
fn parse_cond(self: *Parser) !AST.Expr {
|
|
var left = try self.parse_pipeline();
|
|
while (self.match_any_comptime(&.{ .DoubleAmpersand, .DoublePipe })) {
|
|
const op: AST.Conditional.Op = op: {
|
|
const previous = @as(TokenTag, self.prev());
|
|
switch (previous) {
|
|
.DoubleAmpersand => break :op .And,
|
|
.DoublePipe => break :op .Or,
|
|
else => unreachable,
|
|
}
|
|
};
|
|
|
|
const right = try self.parse_pipeline();
|
|
const conditional = try self.allocate(AST.Conditional, .{ .op = op, .left = left, .right = right });
|
|
left = .{ .cond = conditional };
|
|
}
|
|
|
|
return left;
|
|
}
|
|
|
|
fn parse_pipeline(self: *Parser) !AST.Expr {
|
|
var expr = try self.parse_subshell();
|
|
|
|
if (self.peek() == .Pipe) {
|
|
var pipeline_items = std.ArrayList(AST.PipelineItem).init(self.alloc);
|
|
try pipeline_items.append(expr.asPipelineItem() orelse {
|
|
try self.add_error_expected_pipeline_item(@as(AST.Expr.Tag, expr));
|
|
return ParseError.Expected;
|
|
});
|
|
|
|
while (self.match(.Pipe)) {
|
|
expr = try self.parse_subshell();
|
|
try pipeline_items.append(expr.asPipelineItem() orelse {
|
|
try self.add_error_expected_pipeline_item(@as(AST.Expr.Tag, expr));
|
|
return ParseError.Expected;
|
|
});
|
|
}
|
|
const pipeline = try self.allocate(AST.Pipeline, .{ .items = pipeline_items.items[0..] });
|
|
return .{ .pipeline = pipeline };
|
|
}
|
|
|
|
return expr;
|
|
}
|
|
|
|
/// Placeholder for when we fully support subshells
|
|
fn parse_subshell(self: *Parser) anyerror!AST.Expr {
|
|
// if (self.peek() == .OpenParen) {
|
|
// _ = self.expect(.OpenParen);
|
|
// const script = try self.parse_impl(true);
|
|
// _ = self.expect(.CloseParen);
|
|
// return .{ .subshell = script };
|
|
// }
|
|
// return (try self.parse_cmd_or_assigns()).to_expr(self.alloc);
|
|
return (try self.parse_cmd_or_assigns()).to_expr(self.alloc);
|
|
}
|
|
|
|
fn parse_cmd_or_assigns(self: *Parser) !AST.CmdOrAssigns {
|
|
var assigns = std.ArrayList(AST.Assign).init(self.alloc);
|
|
while (if (self.inside_subshell == null)
|
|
!self.check_any_comptime(&.{ .Semicolon, .Newline, .Eof })
|
|
else
|
|
!self.check_any(&.{ .Semicolon, .Newline, .Eof, self.inside_subshell.?.closing_tok() }))
|
|
{
|
|
if (try self.parse_assign()) |assign| {
|
|
try assigns.append(assign);
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (if (self.inside_subshell == null)
|
|
self.match_any_comptime(&.{ .Semicolon, .Newline, .Eof })
|
|
else
|
|
self.match_any(&.{ .Semicolon, .Newline, .Eof, self.inside_subshell.?.closing_tok() }))
|
|
{
|
|
if (assigns.items.len == 0) {
|
|
try self.add_error("expected a command or assignment", .{});
|
|
return ParseError.Expected;
|
|
}
|
|
return .{ .assigns = assigns.items[0..] };
|
|
}
|
|
|
|
const name = try self.parse_atom() orelse {
|
|
if (assigns.items.len == 0) {
|
|
try self.add_error("expected a command or assignment but got: \"{s}\"", .{@tagName(self.peek())});
|
|
return ParseError.Expected;
|
|
}
|
|
return .{ .assigns = assigns.items[0..] };
|
|
};
|
|
|
|
var name_and_args = std.ArrayList(AST.Atom).init(self.alloc);
|
|
try name_and_args.append(name);
|
|
while (try self.parse_atom()) |arg| {
|
|
try name_and_args.append(arg);
|
|
}
|
|
|
|
// TODO Parse redirects (need to update lexer to have tokens for different parts e.g. &>>)
|
|
const has_redirect = self.match(.Redirect);
|
|
const redirect = if (has_redirect) self.prev().Redirect else AST.Cmd.RedirectFlags{};
|
|
const redirect_file: ?AST.Cmd.Redirect = redirect_file: {
|
|
if (has_redirect) {
|
|
if (self.match(.JSObjRef)) {
|
|
const obj_ref = self.prev().JSObjRef;
|
|
break :redirect_file .{ .jsbuf = AST.JSBuf.new(obj_ref) };
|
|
}
|
|
|
|
const redirect_file = try self.parse_atom() orelse {
|
|
try self.add_error("Redirection with no file", .{});
|
|
return ParseError.Expected;
|
|
};
|
|
break :redirect_file .{ .atom = redirect_file };
|
|
}
|
|
break :redirect_file null;
|
|
};
|
|
// TODO check for multiple redirects and error
|
|
|
|
return .{ .cmd = .{
|
|
.assigns = assigns.items[0..],
|
|
.name_and_args = name_and_args.items[0..],
|
|
.redirect = redirect,
|
|
.redirect_file = redirect_file,
|
|
} };
|
|
}
|
|
|
|
/// Try to parse an assignment. If no assignment could be parsed then return
|
|
/// null and backtrack the parser state
|
|
fn parse_assign(self: *Parser) !?AST.Assign {
|
|
const old = self.current;
|
|
_ = old;
|
|
switch (self.peek()) {
|
|
.Text => |txtrng| {
|
|
const start_idx = self.current;
|
|
_ = self.expect(.Text);
|
|
const txt = self.text(txtrng);
|
|
const var_decl: ?AST.Assign = var_decl: {
|
|
if (hasEqSign(txt)) |eq_idx| {
|
|
// If it starts with = then it's not valid assignment (e.g. `=FOO`)
|
|
if (eq_idx == 0) break :var_decl null;
|
|
const label = txt[0..eq_idx];
|
|
if (!isValidVarName(label)) {
|
|
break :var_decl null;
|
|
}
|
|
|
|
if (eq_idx == txt.len - 1) {
|
|
if (self.peek() == .Delimit) {
|
|
_ = self.expect_delimit();
|
|
break :var_decl .{
|
|
.label = label,
|
|
.value = .{ .simple = .{ .Text = "" } },
|
|
};
|
|
}
|
|
const atom = try self.parse_atom() orelse {
|
|
try self.add_error("Expected an atom", .{});
|
|
return ParseError.Expected;
|
|
};
|
|
break :var_decl .{
|
|
.label = label,
|
|
.value = atom,
|
|
};
|
|
}
|
|
|
|
const txt_value = txt[eq_idx + 1 .. txt.len];
|
|
_ = self.expect_delimit();
|
|
break :var_decl .{
|
|
.label = label,
|
|
.value = .{ .simple = .{ .Text = txt_value } },
|
|
};
|
|
}
|
|
break :var_decl null;
|
|
};
|
|
|
|
if (var_decl) |vd| {
|
|
return vd;
|
|
}
|
|
|
|
// Rollback
|
|
self.current = start_idx;
|
|
return null;
|
|
},
|
|
else => return null,
|
|
}
|
|
}
|
|
|
|
fn parse_atom(self: *Parser) !?AST.Atom {
|
|
var array_alloc = std.heap.stackFallback(@sizeOf(AST.SimpleAtom), self.alloc);
|
|
var atoms = try std.ArrayList(AST.SimpleAtom).initCapacity(array_alloc.get(), 1);
|
|
var has_brace_open = false;
|
|
var has_brace_close = false;
|
|
var has_comma = false;
|
|
var has_glob_syntax = false;
|
|
{
|
|
while (switch (self.peek()) {
|
|
.Delimit => brk: {
|
|
_ = self.expect(.Delimit);
|
|
break :brk false;
|
|
},
|
|
.Eof, .Semicolon, .Newline => false,
|
|
else => |t| brk: {
|
|
if (self.inside_subshell != null and self.inside_subshell.?.closing_tok() == t) break :brk false;
|
|
break :brk true;
|
|
},
|
|
}) {
|
|
const next = self.peek_n(1);
|
|
const next_delimits = self.delimits(next);
|
|
const peeked = self.peek();
|
|
const should_break = next_delimits;
|
|
switch (peeked) {
|
|
.Asterisk => {
|
|
has_glob_syntax = true;
|
|
_ = self.expect(.Asterisk);
|
|
try atoms.append(.asterisk);
|
|
if (next_delimits) {
|
|
_ = self.match(.Delimit);
|
|
break;
|
|
}
|
|
},
|
|
.DoubleAsterisk => {
|
|
has_glob_syntax = true;
|
|
_ = self.expect(.DoubleAsterisk);
|
|
try atoms.append(.double_asterisk);
|
|
if (next_delimits) {
|
|
_ = self.match(.Delimit);
|
|
break;
|
|
}
|
|
},
|
|
.BraceBegin => {
|
|
has_brace_open = true;
|
|
_ = self.expect(.BraceBegin);
|
|
try atoms.append(.brace_begin);
|
|
// TODO in this case we know it can't possibly be the beginning of a brace expansion so maybe its faster to just change it to text here
|
|
if (next_delimits) {
|
|
_ = self.match(.Delimit);
|
|
if (should_break) break;
|
|
}
|
|
},
|
|
.BraceEnd => {
|
|
has_brace_close = true;
|
|
_ = self.expect(.BraceEnd);
|
|
try atoms.append(.brace_end);
|
|
if (next_delimits) {
|
|
_ = self.match(.Delimit);
|
|
break;
|
|
}
|
|
},
|
|
.Comma => {
|
|
has_comma = true;
|
|
_ = self.expect(.Comma);
|
|
try atoms.append(.comma);
|
|
if (next_delimits) {
|
|
_ = self.match(.Delimit);
|
|
if (should_break) break;
|
|
}
|
|
},
|
|
.CmdSubstBegin => {
|
|
_ = self.expect(.CmdSubstBegin);
|
|
const is_quoted = self.match(.CmdSubstQuoted);
|
|
var subparser = self.make_subparser(.cmd_subst);
|
|
const script = try subparser.parse_impl();
|
|
try atoms.append(.{ .cmd_subst = .{
|
|
.script = script,
|
|
.quoted = is_quoted,
|
|
} });
|
|
self.continue_from_subparser(&subparser);
|
|
if (self.delimits(self.peek())) {
|
|
_ = self.match(.Delimit);
|
|
if (should_break) break;
|
|
}
|
|
},
|
|
.Text => |txtrng| {
|
|
_ = self.expect(.Text);
|
|
const txt = self.text(txtrng);
|
|
try atoms.append(.{ .Text = txt });
|
|
if (next_delimits) {
|
|
_ = self.match(.Delimit);
|
|
if (should_break) break;
|
|
}
|
|
},
|
|
.Var => |txtrng| {
|
|
_ = self.expect(.Var);
|
|
const txt = self.text(txtrng);
|
|
try atoms.append(.{ .Var = txt });
|
|
if (next_delimits) {
|
|
_ = self.match(.Delimit);
|
|
if (should_break) break;
|
|
}
|
|
},
|
|
.OpenParen, .CloseParen => {
|
|
try self.add_error("Unexpected token: `{s}`", .{if (peeked == .OpenParen) "(" else ")"});
|
|
return null;
|
|
},
|
|
else => return null,
|
|
}
|
|
}
|
|
}
|
|
|
|
return switch (atoms.items.len) {
|
|
0 => null,
|
|
1 => {
|
|
std.debug.assert(atoms.capacity == 1);
|
|
return AST.Atom.new_simple(atoms.items[0]);
|
|
},
|
|
else => .{ .compound = .{
|
|
.atoms = atoms.items[0..atoms.items.len],
|
|
.brace_expansion_hint = has_brace_open and has_brace_close and has_comma,
|
|
.glob_hint = has_glob_syntax,
|
|
} },
|
|
};
|
|
}
|
|
|
|
fn allocate(self: *const Parser, comptime T: type, val: T) !*T {
|
|
const heap = try self.alloc.create(T);
|
|
heap.* = val;
|
|
return heap;
|
|
}
|
|
|
|
fn text(self: *const Parser, range: Token.TextRange) []const u8 {
|
|
return self.strpool[range.start..range.end];
|
|
}
|
|
|
|
fn advance(self: *Parser) Token {
|
|
if (!self.is_at_end()) {
|
|
self.current += 1;
|
|
}
|
|
return self.prev();
|
|
}
|
|
|
|
fn is_at_end(self: *Parser) bool {
|
|
return self.peek() == .Eof or self.inside_subshell != null and self.inside_subshell.?.closing_tok() == self.peek();
|
|
}
|
|
|
|
fn expect(self: *Parser, toktag: TokenTag) Token {
|
|
std.debug.assert(toktag == @as(TokenTag, self.peek()));
|
|
if (self.check(toktag)) {
|
|
return self.advance();
|
|
}
|
|
unreachable;
|
|
}
|
|
|
|
fn expect_any(self: *Parser, toktags: []const TokenTag) Token {
|
|
// std.debug.assert(toktag == @as(TokenTag, self.peek()));
|
|
|
|
const peeked = self.peek();
|
|
for (toktags) |toktag| {
|
|
if (toktag == @as(TokenTag, peeked)) return self.advance();
|
|
}
|
|
|
|
unreachable;
|
|
}
|
|
|
|
fn delimits(self: *Parser, tok: Token) bool {
|
|
return tok == .Delimit or tok == .Semicolon or tok == .Semicolon or tok == .Eof or (self.inside_subshell != null and tok == self.inside_subshell.?.closing_tok());
|
|
}
|
|
|
|
fn expect_delimit(self: *Parser) Token {
|
|
std.debug.assert(self.delimits(self.peek()));
|
|
if (self.check(.Delimit) or self.check(.Semicolon) or self.check(.Newline) or self.check(.Eof) or (self.inside_subshell != null and self.check(self.inside_subshell.?.closing_tok()))) {
|
|
return self.advance();
|
|
}
|
|
unreachable;
|
|
}
|
|
|
|
/// Consumes token if it matches
|
|
fn match(self: *Parser, toktag: TokenTag) bool {
|
|
if (@as(TokenTag, self.peek()) == toktag) {
|
|
_ = self.advance();
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
fn match_any_comptime(self: *Parser, comptime toktags: []const TokenTag) bool {
|
|
const peeked = @as(TokenTag, self.peek());
|
|
inline for (toktags) |tag| {
|
|
if (peeked == tag) {
|
|
_ = self.advance();
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
fn match_any(self: *Parser, toktags: []const TokenTag) bool {
|
|
const peeked = @as(TokenTag, self.peek());
|
|
for (toktags) |tag| {
|
|
if (peeked == tag) {
|
|
_ = self.advance();
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
fn check_any_comptime(self: *Parser, comptime toktags: []const TokenTag) bool {
|
|
const peeked = @as(TokenTag, self.peek());
|
|
inline for (toktags) |tag| {
|
|
if (peeked == tag) {
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
fn check_any(self: *Parser, toktags: []const TokenTag) bool {
|
|
const peeked = @as(TokenTag, self.peek());
|
|
for (toktags) |tag| {
|
|
if (peeked == tag) {
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
fn check(self: *Parser, toktag: TokenTag) bool {
|
|
return @as(TokenTag, self.peek()) == @as(TokenTag, toktag);
|
|
}
|
|
|
|
fn peek(self: *Parser) Token {
|
|
return self.tokens[self.current];
|
|
}
|
|
|
|
fn peek_n(self: *Parser, n: u32) Token {
|
|
if (self.current + n >= self.tokens.len) {
|
|
return self.tokens[self.tokens.len - 1];
|
|
}
|
|
|
|
return self.tokens[self.current + n];
|
|
}
|
|
|
|
fn prev(self: *Parser) Token {
|
|
return self.tokens[self.current - 1];
|
|
}
|
|
|
|
pub fn combineErrors(self: *Parser) []const u8 {
|
|
const errors = self.errors.items[0..];
|
|
const str = str: {
|
|
const size = size: {
|
|
var i: usize = 0;
|
|
for (errors) |e| {
|
|
i += e.msg.len;
|
|
}
|
|
break :size i;
|
|
};
|
|
var buf = self.alloc.alloc(u8, size) catch bun.outOfMemory();
|
|
var i: usize = 0;
|
|
for (errors) |e| {
|
|
@memcpy(buf[i .. i + e.msg.len], e.msg);
|
|
i += e.msg.len;
|
|
}
|
|
break :str buf;
|
|
};
|
|
return str;
|
|
}
|
|
|
|
fn add_error(self: *Parser, comptime fmt: []const u8, args: anytype) !void {
|
|
const error_msg = try std.fmt.allocPrint(self.alloc, fmt, args);
|
|
try self.errors.append(.{ .msg = error_msg });
|
|
}
|
|
|
|
fn add_error_expected_pipeline_item(self: *Parser, kind: AST.Expr.Tag) !void {
|
|
const error_msg = try std.fmt.allocPrint(self.alloc, "Expected a command, assignment, or subshell but got: {s}", .{@tagName(kind)});
|
|
try self.errors.append(.{ .msg = error_msg });
|
|
}
|
|
};
|
|
|
|
pub const TokenTag = enum {
|
|
Pipe,
|
|
DoublePipe,
|
|
Ampersand,
|
|
DoubleAmpersand,
|
|
Redirect,
|
|
Dollar,
|
|
Asterisk,
|
|
DoubleAsterisk,
|
|
Eq,
|
|
Semicolon,
|
|
Newline,
|
|
// Comment,
|
|
BraceBegin,
|
|
Comma,
|
|
BraceEnd,
|
|
CmdSubstBegin,
|
|
CmdSubstQuoted,
|
|
CmdSubstEnd,
|
|
OpenParen,
|
|
CloseParen,
|
|
Var,
|
|
Text,
|
|
JSObjRef,
|
|
Delimit,
|
|
Eof,
|
|
};
|
|
|
|
pub const Token = union(TokenTag) {
|
|
/// |
|
|
Pipe,
|
|
/// ||
|
|
DoublePipe,
|
|
/// &
|
|
Ampersand,
|
|
/// &&
|
|
DoubleAmpersand,
|
|
|
|
Redirect: AST.Cmd.RedirectFlags,
|
|
|
|
/// $
|
|
Dollar,
|
|
// `*`
|
|
Asterisk,
|
|
DoubleAsterisk,
|
|
|
|
/// =
|
|
Eq,
|
|
/// ;
|
|
Semicolon,
|
|
/// \n (unescaped newline)
|
|
Newline,
|
|
|
|
BraceBegin,
|
|
Comma,
|
|
BraceEnd,
|
|
CmdSubstBegin,
|
|
/// When cmd subst is wrapped in quotes, then it should be interpreted as literal string, not word split-ed arguments to a cmd.
|
|
/// We lose quotation context in the AST, so we don't know how to disambiguate that.
|
|
/// So this is a quick hack to give the AST that context.
|
|
///
|
|
/// This matches this shell behaviour:
|
|
/// echo test$(echo "1 2") -> test1 2\n
|
|
/// echo "test$(echo "1 2")" -> test1 2\n
|
|
CmdSubstQuoted,
|
|
CmdSubstEnd,
|
|
OpenParen,
|
|
CloseParen,
|
|
|
|
Var: TextRange,
|
|
Text: TextRange,
|
|
JSObjRef: u32,
|
|
|
|
Delimit,
|
|
Eof,
|
|
|
|
pub const TextRange = struct {
|
|
start: u32,
|
|
end: u32,
|
|
};
|
|
|
|
pub fn asHumanReadable(self: Token, strpool: []const u8) []const u8 {
|
|
switch (self) {
|
|
.Pipe => "`|`",
|
|
.DoublePipe => "`||`",
|
|
.Ampersand => "`&`",
|
|
.DoubleAmpersand => "`&&`",
|
|
.Redirect => "`>`",
|
|
.Dollar => "`$`",
|
|
.Asterisk => "`*`",
|
|
.DoubleAsterisk => "`**`",
|
|
.Eq => "`+`",
|
|
.Semicolon => "`;`",
|
|
.Newline => "`\\n`",
|
|
// Comment,
|
|
.BraceBegin => "`{`",
|
|
.Comma => "`,`",
|
|
.BraceEnd => "`}`",
|
|
.CmdSubstBegin => "`$(`",
|
|
.CmdSubstQuoted => "CmdSubstQuoted",
|
|
.CmdSubstEnd => "`)`",
|
|
.OpenParen => "`(`",
|
|
.CloseParen => "`)",
|
|
.Var => strpool[self.Var.start..self.Var.end],
|
|
.Text => strpool[self.Text.start..self.Text.end],
|
|
.JSObjRef => "JSObjRef",
|
|
.Delimit => "Delimit",
|
|
.Eof => "EOF",
|
|
}
|
|
}
|
|
|
|
pub fn debug(self: Token, buf: []const u8) void {
|
|
switch (self) {
|
|
.Var => |txt| {
|
|
std.debug.print("(var) {s}\n", .{buf[txt.start..txt.end]});
|
|
},
|
|
.Text => |txt| {
|
|
std.debug.print("(txt) {s}\n", .{buf[txt.start..txt.end]});
|
|
},
|
|
else => {
|
|
std.debug.print("{s}\n", .{@tagName(self)});
|
|
},
|
|
}
|
|
}
|
|
};
|
|
|
|
pub const LexerAscii = NewLexer(.ascii);
|
|
pub const LexerUnicode = NewLexer(.wtf8);
|
|
pub const LexResult = struct {
|
|
errors: []LexError,
|
|
tokens: []const Token,
|
|
strpool: []const u8,
|
|
|
|
pub fn combineErrors(this: *const LexResult, arena: Allocator) []const u8 {
|
|
const errors = this.errors;
|
|
const str = str: {
|
|
const size = size: {
|
|
var i: usize = 0;
|
|
for (errors) |e| {
|
|
i += e.msg.len;
|
|
}
|
|
break :size i;
|
|
};
|
|
var buf = arena.alloc(u8, size) catch bun.outOfMemory();
|
|
var i: usize = 0;
|
|
for (errors) |e| {
|
|
@memcpy(buf[i .. i + e.msg.len], e.msg);
|
|
i += e.msg.len;
|
|
}
|
|
break :str buf;
|
|
};
|
|
return str;
|
|
}
|
|
};
|
|
pub const LexError = struct {
|
|
/// Allocated with lexer arena
|
|
msg: []const u8,
|
|
};
|
|
pub const LEX_JS_OBJREF_PREFIX = "$__bun_";
|
|
|
|
pub fn NewLexer(comptime encoding: StringEncoding) type {
|
|
const Chars = ShellCharIter(encoding);
|
|
return struct {
|
|
chars: Chars,
|
|
|
|
/// Tell us the beginning of a "word", indexes into the string pool (`buf`)
|
|
/// Anytime a word is added, this needs to be updated
|
|
word_start: u32 = 0,
|
|
|
|
/// Keeps track of the end of a "word", indexes into the string pool (`buf`),
|
|
/// anytime characters are added to the string pool this needs to be updated
|
|
j: u32 = 0,
|
|
|
|
strpool: ArrayList(u8),
|
|
tokens: ArrayList(Token),
|
|
delimit_quote: bool = false,
|
|
in_subshell: ?SubShellKind = null,
|
|
errors: std.ArrayList(LexError),
|
|
|
|
const SubShellKind = enum {
|
|
/// (echo hi; echo hello)
|
|
normal,
|
|
/// `echo hi; echo hello`
|
|
backtick,
|
|
/// $(echo hi; echo hello)
|
|
dollar,
|
|
};
|
|
|
|
const LexerError = error{
|
|
OutOfMemory,
|
|
Utf8CannotEncodeSurrogateHalf,
|
|
Utf8InvalidStartByte,
|
|
CodepointTooLarge,
|
|
};
|
|
|
|
pub const js_objref_prefix = "$__bun_";
|
|
|
|
const State = Chars.State;
|
|
|
|
const InputChar = Chars.InputChar;
|
|
|
|
const BacktrackSnapshot = struct {
|
|
chars: Chars,
|
|
j: u32,
|
|
word_start: u32,
|
|
delimit_quote: bool,
|
|
};
|
|
|
|
pub fn new(alloc: Allocator, src: []const u8) @This() {
|
|
return .{
|
|
.chars = Chars.init(src),
|
|
.tokens = ArrayList(Token).init(alloc),
|
|
.strpool = ArrayList(u8).init(alloc),
|
|
.errors = ArrayList(LexError).init(alloc),
|
|
};
|
|
}
|
|
|
|
pub fn get_result(self: @This()) LexResult {
|
|
return .{
|
|
.tokens = self.tokens.items[0..],
|
|
.strpool = self.strpool.items[0..],
|
|
.errors = self.errors.items[0..],
|
|
};
|
|
}
|
|
|
|
pub fn add_error(self: *@This(), msg: []const u8) void {
|
|
const start = self.strpool.items.len;
|
|
self.strpool.appendSlice(msg) catch bun.outOfMemory();
|
|
const end = self.strpool.items.len;
|
|
self.errors.append(.{ .msg = self.strpool.items[start..end] }) catch bun.outOfMemory();
|
|
}
|
|
|
|
fn make_sublexer(self: *@This(), kind: SubShellKind) @This() {
|
|
log("[lex] make sublexer", .{});
|
|
var sublexer = .{
|
|
.chars = self.chars,
|
|
.strpool = self.strpool,
|
|
.tokens = self.tokens,
|
|
.errors = self.errors,
|
|
.in_subshell = kind,
|
|
|
|
.word_start = self.word_start,
|
|
.j = self.j,
|
|
};
|
|
sublexer.chars.state = .Normal;
|
|
return sublexer;
|
|
}
|
|
|
|
fn continue_from_sublexer(self: *@This(), sublexer: *@This()) void {
|
|
log("[lex] drop sublexer", .{});
|
|
self.strpool = sublexer.strpool;
|
|
self.tokens = sublexer.tokens;
|
|
self.errors = sublexer.errors;
|
|
|
|
self.chars = sublexer.chars;
|
|
self.word_start = sublexer.word_start;
|
|
self.j = sublexer.j;
|
|
self.delimit_quote = sublexer.delimit_quote;
|
|
}
|
|
|
|
fn make_snapshot(self: *@This()) BacktrackSnapshot {
|
|
return .{
|
|
.chars = self.chars,
|
|
.j = self.j,
|
|
.word_start = self.word_start,
|
|
.delimit_quote = self.delimit_quote,
|
|
};
|
|
}
|
|
|
|
fn backtrack(self: *@This(), snap: BacktrackSnapshot) void {
|
|
self.chars = snap.chars;
|
|
self.j = snap.j;
|
|
self.word_start = snap.word_start;
|
|
self.delimit_quote = snap.delimit_quote;
|
|
}
|
|
|
|
fn last_tok_tag(self: *@This()) ?TokenTag {
|
|
if (self.tokens.items.len == 0) return null;
|
|
return @as(TokenTag, self.tokens.items[self.tokens.items.len - 1]);
|
|
}
|
|
|
|
pub fn lex(self: *@This()) LexerError!void {
|
|
while (true) {
|
|
const input = self.eat() orelse {
|
|
try self.break_word(true);
|
|
break;
|
|
};
|
|
const char = input.char;
|
|
const escaped = input.escaped;
|
|
|
|
// Handle non-escaped chars:
|
|
// 1. special syntax (operators, etc.)
|
|
// 2. lexing state switchers (quotes)
|
|
// 3. word breakers (spaces, etc.)
|
|
if (!escaped) escaped: {
|
|
switch (char) {
|
|
'#' => {
|
|
if (self.chars.state == .Single or self.chars.state == .Double) break :escaped;
|
|
const whitespace_preceding =
|
|
if (self.chars.prev) |prev|
|
|
Chars.isWhitespace(prev)
|
|
else
|
|
true;
|
|
if (!whitespace_preceding) break :escaped;
|
|
try self.break_word(true);
|
|
self.eatComment();
|
|
continue;
|
|
},
|
|
';' => {
|
|
if (self.chars.state == .Single or self.chars.state == .Double) break :escaped;
|
|
try self.break_word(true);
|
|
try self.tokens.append(.Semicolon);
|
|
continue;
|
|
},
|
|
'\n' => {
|
|
if (self.chars.state == .Single or self.chars.state == .Double) break :escaped;
|
|
try self.break_word(true);
|
|
try self.tokens.append(.Newline);
|
|
continue;
|
|
},
|
|
|
|
// glob asterisks
|
|
'*' => {
|
|
if (self.chars.state == .Single or self.chars.state == .Double) break :escaped;
|
|
if (self.peek()) |next| {
|
|
if (!next.escaped and next.char == '*') {
|
|
_ = self.eat();
|
|
try self.break_word(false);
|
|
try self.tokens.append(.DoubleAsterisk);
|
|
continue;
|
|
}
|
|
}
|
|
try self.break_word(false);
|
|
try self.tokens.append(.Asterisk);
|
|
continue;
|
|
},
|
|
|
|
// brace expansion syntax
|
|
'{' => {
|
|
if (self.chars.state == .Single or self.chars.state == .Double) break :escaped;
|
|
try self.break_word(false);
|
|
try self.tokens.append(.BraceBegin);
|
|
continue;
|
|
},
|
|
',' => {
|
|
if (self.chars.state == .Single or self.chars.state == .Double) break :escaped;
|
|
try self.break_word(false);
|
|
try self.tokens.append(.Comma);
|
|
continue;
|
|
},
|
|
'}' => {
|
|
if (self.chars.state == .Single or self.chars.state == .Double) break :escaped;
|
|
try self.break_word(false);
|
|
try self.tokens.append(.BraceEnd);
|
|
continue;
|
|
},
|
|
|
|
// Command substitution
|
|
'`' => {
|
|
if (self.chars.state == .Single) break :escaped;
|
|
if (self.in_subshell == .backtick) {
|
|
try self.break_word(true);
|
|
if (self.last_tok_tag()) |toktag| {
|
|
if (toktag != .Delimit) try self.tokens.append(.Delimit);
|
|
}
|
|
try self.tokens.append(.CmdSubstEnd);
|
|
return;
|
|
} else {
|
|
try self.eat_subshell(.backtick);
|
|
}
|
|
},
|
|
// Command substitution/vars
|
|
'$' => {
|
|
if (self.chars.state == .Single) break :escaped;
|
|
|
|
const peeked = self.peek() orelse InputChar{ .char = 0 };
|
|
if (!peeked.escaped and peeked.char == '(') {
|
|
try self.break_word(false);
|
|
try self.eat_subshell(.dollar);
|
|
continue;
|
|
}
|
|
|
|
// const snapshot = self.make_snapshot();
|
|
// Handle variable
|
|
try self.break_word(false);
|
|
if (self.eat_js_obj_ref()) |ref| {
|
|
if (self.chars.state == .Double) {
|
|
try self.errors.append(.{ .msg = bun.default_allocator.dupe(u8, "JS object reference not allowed in double quotes") catch bun.outOfMemory() });
|
|
return;
|
|
}
|
|
try self.tokens.append(ref);
|
|
} else {
|
|
const var_tok = try self.eat_var();
|
|
// empty var
|
|
if (var_tok.start == var_tok.end) {
|
|
try self.appendCharToStrPool('$');
|
|
try self.break_word(false);
|
|
} else {
|
|
try self.tokens.append(.{ .Var = var_tok });
|
|
}
|
|
}
|
|
self.word_start = self.j;
|
|
continue;
|
|
},
|
|
'(' => {
|
|
if (self.chars.state == .Single or self.chars.state == .Double) break :escaped;
|
|
try self.break_word(true);
|
|
try self.eat_subshell(.normal);
|
|
continue;
|
|
},
|
|
')' => {
|
|
if (self.chars.state == .Single or self.chars.state == .Double) break :escaped;
|
|
if (self.in_subshell != .dollar and self.in_subshell != .normal) {
|
|
self.add_error("Unexpected ')'");
|
|
continue;
|
|
}
|
|
|
|
try self.break_word(true);
|
|
if (self.last_tok_tag()) |toktag| {
|
|
if (toktag != .Delimit) try self.tokens.append(.Delimit);
|
|
}
|
|
if (self.in_subshell == .dollar) {
|
|
try self.tokens.append(.CmdSubstEnd);
|
|
} else if (self.in_subshell == .normal) {
|
|
try self.tokens.append(.CloseParen);
|
|
}
|
|
return;
|
|
},
|
|
|
|
'0'...'9' => {
|
|
if (self.chars.state != .Normal) break :escaped;
|
|
const snapshot = self.make_snapshot();
|
|
if (self.eat_redirect(input)) |redirect| {
|
|
try self.break_word(true);
|
|
try self.tokens.append(.{ .Redirect = redirect });
|
|
continue;
|
|
}
|
|
self.backtrack(snapshot);
|
|
break :escaped;
|
|
},
|
|
|
|
// Operators
|
|
'|' => {
|
|
if (self.chars.state == .Single or self.chars.state == .Double) break :escaped;
|
|
try self.break_word(true);
|
|
|
|
const next = self.peek() orelse {
|
|
self.add_error("Unexpected EOF");
|
|
return;
|
|
};
|
|
if (!next.escaped and next.char == '&') {
|
|
self.add_error("Piping stdout and stderr (`|&`) is not supported yet. Please file an issue on GitHub.");
|
|
return;
|
|
}
|
|
if (next.escaped or next.char != '|') {
|
|
try self.tokens.append(.Pipe);
|
|
} else if (next.char == '|') {
|
|
_ = self.eat() orelse unreachable;
|
|
try self.tokens.append(.DoublePipe);
|
|
}
|
|
continue;
|
|
},
|
|
'>' => {
|
|
if (self.chars.state == .Single or self.chars.state == .Double) break :escaped;
|
|
try self.break_word_impl(true, false, true);
|
|
const redirect = self.eat_simple_redirect(.out);
|
|
try self.tokens.append(.{ .Redirect = redirect });
|
|
continue;
|
|
},
|
|
'<' => {
|
|
if (self.chars.state == .Single or self.chars.state == .Double) break :escaped;
|
|
try self.break_word_impl(true, false, true);
|
|
const redirect = self.eat_simple_redirect(.in);
|
|
try self.tokens.append(.{ .Redirect = redirect });
|
|
continue;
|
|
},
|
|
'&' => {
|
|
if (self.chars.state == .Single or self.chars.state == .Double) break :escaped;
|
|
try self.break_word(true);
|
|
|
|
const next = self.peek() orelse {
|
|
self.add_error("Unexpected EOF");
|
|
return;
|
|
};
|
|
if (next.char == '>' and !next.escaped) {
|
|
_ = self.eat();
|
|
const inner = if (self.eat_simple_redirect_operator(.out))
|
|
AST.Cmd.RedirectFlags.@"&>>"()
|
|
else
|
|
AST.Cmd.RedirectFlags.@"&>"();
|
|
try self.tokens.append(.{ .Redirect = inner });
|
|
} else if (next.escaped or next.char != '&') {
|
|
try self.tokens.append(.Ampersand);
|
|
} else if (next.char == '&') {
|
|
_ = self.eat() orelse unreachable;
|
|
try self.tokens.append(.DoubleAmpersand);
|
|
} else continue;
|
|
},
|
|
|
|
// 2. State switchers
|
|
'\'' => {
|
|
if (self.chars.state == .Single) {
|
|
self.chars.state = .Normal;
|
|
continue;
|
|
}
|
|
if (self.chars.state == .Normal) {
|
|
self.chars.state = .Single;
|
|
continue;
|
|
}
|
|
break :escaped;
|
|
},
|
|
'"' => {
|
|
if (self.chars.state == .Single) break :escaped;
|
|
if (self.chars.state == .Normal) {
|
|
try self.break_word(false);
|
|
self.chars.state = .Double;
|
|
} else if (self.chars.state == .Double) {
|
|
try self.break_word(false);
|
|
// self.delimit_quote = true;
|
|
self.chars.state = .Normal;
|
|
}
|
|
continue;
|
|
},
|
|
|
|
// 3. Word breakers
|
|
' ' => {
|
|
if (self.chars.state == .Normal) {
|
|
try self.break_word_impl(true, true, false);
|
|
continue;
|
|
}
|
|
break :escaped;
|
|
},
|
|
|
|
else => break :escaped,
|
|
}
|
|
continue;
|
|
}
|
|
|
|
try self.appendCharToStrPool(char);
|
|
}
|
|
|
|
if (self.in_subshell) |subshell_kind| {
|
|
switch (subshell_kind) {
|
|
.dollar, .backtick => self.add_error("Unclosed command substitution"),
|
|
.normal => self.add_error("Unclosed subshell"),
|
|
}
|
|
return;
|
|
}
|
|
|
|
try self.tokens.append(.Eof);
|
|
}
|
|
|
|
fn appendCharToStrPool(self: *@This(), char: Chars.CodepointType) !void {
|
|
if (comptime encoding == .ascii) {
|
|
try self.strpool.append(char);
|
|
self.j += 1;
|
|
} else {
|
|
if (char <= 0x7F) {
|
|
try self.strpool.append(@intCast(char));
|
|
self.j += 1;
|
|
return;
|
|
} else {
|
|
try self.appendUnicodeCharToStrPool(char);
|
|
}
|
|
}
|
|
}
|
|
|
|
fn appendUnicodeCharToStrPool(self: *@This(), char: Chars.CodepointType) !void {
|
|
@setCold(true);
|
|
|
|
const ichar: i32 = @intCast(char);
|
|
var bytes: [4]u8 = undefined;
|
|
const n = bun.strings.encodeWTF8Rune(&bytes, ichar);
|
|
self.j += n;
|
|
try self.strpool.appendSlice(bytes[0..n]);
|
|
}
|
|
|
|
fn break_word(self: *@This(), add_delimiter: bool) !void {
|
|
return try self.break_word_impl(add_delimiter, false, false);
|
|
}
|
|
|
|
fn break_word_impl(self: *@This(), add_delimiter: bool, in_normal_space: bool, in_redirect_operator: bool) !void {
|
|
const start: u32 = self.word_start;
|
|
const end: u32 = self.j;
|
|
if (start != end) {
|
|
try self.tokens.append(.{ .Text = .{ .start = start, .end = end } });
|
|
if (add_delimiter) {
|
|
try self.tokens.append(.Delimit);
|
|
}
|
|
} else if ((in_normal_space or in_redirect_operator) and self.tokens.items.len > 0 and
|
|
switch (self.tokens.items[self.tokens.items.len - 1]) {
|
|
.Var, .Text, .BraceBegin, .Comma, .BraceEnd, .CmdSubstEnd => true,
|
|
else => false,
|
|
}) {
|
|
try self.tokens.append(.Delimit);
|
|
self.delimit_quote = false;
|
|
}
|
|
self.word_start = self.j;
|
|
}
|
|
|
|
const RedirectDirection = enum { out, in };
|
|
|
|
fn eat_simple_redirect(self: *@This(), dir: RedirectDirection) AST.Cmd.RedirectFlags {
|
|
const is_double = self.eat_simple_redirect_operator(dir);
|
|
|
|
if (is_double) {
|
|
return switch (dir) {
|
|
.out => AST.Cmd.RedirectFlags.@">>"(),
|
|
.in => AST.Cmd.RedirectFlags.@"<<"(),
|
|
};
|
|
}
|
|
|
|
return switch (dir) {
|
|
.out => AST.Cmd.RedirectFlags.@">"(),
|
|
.in => AST.Cmd.RedirectFlags.@"<"(),
|
|
};
|
|
}
|
|
|
|
/// Returns true if the operator is "double one": >> or <<
|
|
/// Returns null if it is invalid: <> ><
|
|
fn eat_simple_redirect_operator(self: *@This(), dir: RedirectDirection) bool {
|
|
if (self.peek()) |peeked| {
|
|
if (peeked.escaped) return false;
|
|
switch (peeked.char) {
|
|
'>' => {
|
|
if (dir == .out) {
|
|
_ = self.eat();
|
|
return true;
|
|
}
|
|
return false;
|
|
},
|
|
'<' => {
|
|
if (dir == .in) {
|
|
_ = self.eat();
|
|
return true;
|
|
}
|
|
return false;
|
|
},
|
|
else => return false,
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
fn eat_redirect(self: *@This(), first: InputChar) ?AST.Cmd.RedirectFlags {
|
|
var flags: AST.Cmd.RedirectFlags = .{};
|
|
switch (first.char) {
|
|
'0'...'9' => {
|
|
// Codepoint int casts are safe here because the digits are in the ASCII range
|
|
var count: usize = 1;
|
|
var buf: [32]u8 = [_]u8{@intCast(first.char)} ** 32;
|
|
|
|
while (self.peek()) |peeked| {
|
|
const char = peeked.char;
|
|
switch (char) {
|
|
'0'...'9' => {
|
|
_ = self.eat();
|
|
buf[count] = @intCast(char);
|
|
count += 1;
|
|
continue;
|
|
},
|
|
else => break,
|
|
}
|
|
}
|
|
|
|
const num = std.fmt.parseInt(usize, buf[0..count], 10) catch {
|
|
// This means the number was really large, meaning it
|
|
// probably was supposed to be a string
|
|
return null;
|
|
};
|
|
|
|
switch (num) {
|
|
0 => {
|
|
flags.stdin = true;
|
|
},
|
|
1 => {
|
|
flags.stdout = true;
|
|
},
|
|
2 => {
|
|
flags.stderr = true;
|
|
},
|
|
else => {
|
|
// FIXME support redirection to any arbitrary fd
|
|
log("redirection to fd {d} is invalid\n", .{num});
|
|
return null;
|
|
},
|
|
}
|
|
},
|
|
'&' => {
|
|
if (first.escaped) return null;
|
|
flags.stdout = true;
|
|
flags.stderr = true;
|
|
_ = self.eat();
|
|
},
|
|
else => return null,
|
|
}
|
|
|
|
var dir: RedirectDirection = .out;
|
|
if (self.peek()) |input| {
|
|
if (input.escaped) return null;
|
|
switch (input.char) {
|
|
'>' => dir = .out,
|
|
'<' => dir = .in,
|
|
else => return null,
|
|
}
|
|
_ = self.eat();
|
|
} else return null;
|
|
|
|
const is_double = self.eat_simple_redirect_operator(dir);
|
|
if (is_double) {
|
|
flags.append = true;
|
|
}
|
|
|
|
return flags;
|
|
}
|
|
|
|
/// Assumes the first character of the literal has been eaten
|
|
/// Backtracks and returns false if unsuccessful
|
|
fn eat_literal(self: *@This(), comptime CodepointType: type, comptime literal: []const CodepointType) bool {
|
|
const literal_skip_first = literal[1..];
|
|
const snapshot = self.make_snapshot();
|
|
const slice = self.eat_slice(CodepointType, literal_skip_first.len) orelse {
|
|
self.backtrack(snapshot);
|
|
return false;
|
|
};
|
|
|
|
if (std.mem.eql(CodepointType, &slice, literal_skip_first))
|
|
return true;
|
|
|
|
self.backtrack(snapshot);
|
|
return false;
|
|
}
|
|
|
|
fn eat_number_word(self: *@This()) ?usize {
|
|
const snap = self.make_snapshot();
|
|
var count: usize = 0;
|
|
var buf: [32]u8 = [_]u8{0} ** 32;
|
|
|
|
while (self.eat()) |result| {
|
|
const char = result.char;
|
|
switch (char) {
|
|
'0'...'9' => {
|
|
// Safe to cast here because 0-8 is in ASCII range
|
|
buf[count] = @intCast(char);
|
|
count += 1;
|
|
continue;
|
|
},
|
|
else => {
|
|
break;
|
|
},
|
|
}
|
|
}
|
|
|
|
if (count == 0) {
|
|
self.backtrack(snap);
|
|
return null;
|
|
}
|
|
|
|
const num = std.fmt.parseInt(usize, buf[0..count], 10) catch {
|
|
self.backtrack(snap);
|
|
return null;
|
|
};
|
|
|
|
return num;
|
|
}
|
|
|
|
fn eat_subshell(self: *@This(), kind: SubShellKind) !void {
|
|
if (kind == .dollar) {
|
|
// Eat the open paren
|
|
_ = self.eat();
|
|
}
|
|
|
|
switch (kind) {
|
|
.dollar, .backtick => {
|
|
try self.tokens.append(.CmdSubstBegin);
|
|
if (self.chars.state == .Double) {
|
|
try self.tokens.append(.CmdSubstQuoted);
|
|
}
|
|
},
|
|
.normal => try self.tokens.append(.OpenParen),
|
|
}
|
|
var sublexer = self.make_sublexer(kind);
|
|
try sublexer.lex();
|
|
self.continue_from_sublexer(&sublexer);
|
|
}
|
|
|
|
fn eat_js_obj_ref(self: *@This()) ?Token {
|
|
const snap = self.make_snapshot();
|
|
if (self.eat_literal(u8, LEX_JS_OBJREF_PREFIX)) {
|
|
if (self.eat_number_word()) |num| {
|
|
if (num <= std.math.maxInt(u32)) {
|
|
return .{ .JSObjRef = @intCast(num) };
|
|
}
|
|
}
|
|
}
|
|
self.backtrack(snap);
|
|
return null;
|
|
}
|
|
|
|
fn eat_var(self: *@This()) !Token.TextRange {
|
|
const start = self.j;
|
|
var i: usize = 0;
|
|
// Eat until special character
|
|
while (self.peek()) |result| {
|
|
defer i += 1;
|
|
const char = result.char;
|
|
const escaped = result.escaped;
|
|
|
|
if (i == 0) {
|
|
switch (char) {
|
|
'=', '0'...'9' => return .{ .start = start, .end = self.j },
|
|
'a'...'z', 'A'...'Z', '_' => {},
|
|
else => return .{ .start = start, .end = self.j },
|
|
}
|
|
}
|
|
|
|
// if (char
|
|
switch (char) {
|
|
'{', '}', ';', '\'', '\"', ' ', '|', '&', '>', ',', '$' => {
|
|
return .{ .start = start, .end = self.j };
|
|
},
|
|
else => {
|
|
if (!escaped and
|
|
(self.in_subshell == .dollar and char == ')') or (self.in_subshell == .backtick and char == '`') or (self.in_subshell == .normal and char == ')'))
|
|
{
|
|
return .{ .start = start, .end = self.j };
|
|
}
|
|
switch (char) {
|
|
'0'...'9', 'a'...'z', 'A'...'Z', '_' => {
|
|
_ = self.eat() orelse unreachable;
|
|
try self.appendCharToStrPool(char);
|
|
},
|
|
else => return .{ .start = start, .end = self.j },
|
|
}
|
|
},
|
|
}
|
|
}
|
|
return .{ .start = start, .end = self.j };
|
|
}
|
|
|
|
fn eat(self: *@This()) ?InputChar {
|
|
return self.chars.eat();
|
|
}
|
|
|
|
fn eatComment(self: *@This()) void {
|
|
while (self.eat()) |peeked| {
|
|
if (peeked.escaped) {
|
|
continue;
|
|
}
|
|
if (peeked.char == '\n') break;
|
|
}
|
|
}
|
|
|
|
fn eat_slice(self: *@This(), comptime CodepointType: type, comptime N: usize) ?[N]CodepointType {
|
|
var slice = [_]CodepointType{0} ** N;
|
|
var i: usize = 0;
|
|
while (self.peek()) |result| {
|
|
// If we passed in codepoint range that is equal to the source
|
|
// string, or is greater than the codepoint range of source string than an int cast
|
|
// will not panic
|
|
if (CodepointType == Chars.CodepointType or std.math.maxInt(CodepointType) >= std.math.maxInt(Chars.CodepointType)) {
|
|
slice[i] = @intCast(result.char);
|
|
} else {
|
|
// Otherwise the codepoint range is smaller than the source, so we need to check that the chars are valid
|
|
if (result.char > std.math.maxInt(CodepointType)) {
|
|
return null;
|
|
}
|
|
slice[i] = @intCast(result.char);
|
|
}
|
|
|
|
i += 1;
|
|
_ = self.eat();
|
|
if (i == N) {
|
|
return slice;
|
|
}
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
fn peek(self: *@This()) ?InputChar {
|
|
return self.chars.peek();
|
|
}
|
|
|
|
fn read_char(self: *@This()) ?InputChar {
|
|
return self.chars.read_char();
|
|
}
|
|
|
|
fn debug_tokens(self: *const @This()) void {
|
|
std.debug.print("Tokens: \n", .{});
|
|
for (self.tokens.items, 0..) |tok, i| {
|
|
std.debug.print("{d}: ", .{i});
|
|
tok.debug(self.strpool.items[0..self.strpool.items.len]);
|
|
}
|
|
}
|
|
};
|
|
}
|
|
|
|
pub const StringEncoding = enum { ascii, wtf8, utf16 };
|
|
|
|
const SrcAscii = struct {
|
|
bytes: []const u8,
|
|
i: usize,
|
|
|
|
const IndexValue = packed struct {
|
|
char: u7,
|
|
escaped: bool = false,
|
|
};
|
|
|
|
fn init(bytes: []const u8) SrcAscii {
|
|
return .{
|
|
.bytes = bytes,
|
|
.i = 0,
|
|
};
|
|
}
|
|
|
|
inline fn index(this: *const SrcAscii) ?IndexValue {
|
|
if (this.i >= this.bytes.len) return null;
|
|
return .{ .char = @intCast(this.bytes[this.i]) };
|
|
}
|
|
|
|
inline fn indexNext(this: *const SrcAscii) ?IndexValue {
|
|
if (this.i + 1 >= this.bytes.len) return null;
|
|
return .{ .char = @intCast(this.bytes[this.i + 1]) };
|
|
}
|
|
|
|
inline fn eat(this: *SrcAscii, escaped: bool) void {
|
|
this.i += 1 + @as(u32, @intFromBool(escaped));
|
|
}
|
|
};
|
|
|
|
const SrcUnicode = struct {
|
|
iter: CodepointIterator,
|
|
cursor: CodepointIterator.Cursor,
|
|
next_cursor: CodepointIterator.Cursor,
|
|
|
|
const IndexValue = packed struct {
|
|
char: u29,
|
|
width: u3 = 0,
|
|
};
|
|
|
|
fn nextCursor(iter: *const CodepointIterator, cursor: *CodepointIterator.Cursor) void {
|
|
if (!iter.next(cursor)) {
|
|
// This will make `i > sourceBytes.len` so the condition in `index` will fail
|
|
cursor.i = @intCast(iter.bytes.len + 1);
|
|
cursor.width = 1;
|
|
cursor.c = CodepointIterator.ZeroValue;
|
|
}
|
|
}
|
|
|
|
fn init(bytes: []const u8) SrcUnicode {
|
|
var iter = CodepointIterator.init(bytes);
|
|
var cursor = CodepointIterator.Cursor{};
|
|
nextCursor(&iter, &cursor);
|
|
var next_cursor: CodepointIterator.Cursor = cursor;
|
|
nextCursor(&iter, &next_cursor);
|
|
return .{ .iter = iter, .cursor = cursor, .next_cursor = next_cursor };
|
|
}
|
|
|
|
inline fn index(this: *const SrcUnicode) ?IndexValue {
|
|
if (this.cursor.width + this.cursor.i > this.iter.bytes.len) return null;
|
|
return .{ .char = this.cursor.c, .width = this.cursor.width };
|
|
}
|
|
|
|
inline fn indexNext(this: *const SrcUnicode) ?IndexValue {
|
|
if (this.next_cursor.width + this.next_cursor.i > this.iter.bytes.len) return null;
|
|
return .{ .char = this.next_cursor.c, .width = this.next_cursor.width };
|
|
}
|
|
|
|
inline fn eat(this: *SrcUnicode, escaped: bool) void {
|
|
// eat two codepoints
|
|
if (escaped) {
|
|
nextCursor(&this.iter, &this.next_cursor);
|
|
this.cursor = this.next_cursor;
|
|
nextCursor(&this.iter, &this.next_cursor);
|
|
} else {
|
|
// eat one codepoint
|
|
this.cursor = this.next_cursor;
|
|
nextCursor(&this.iter, &this.next_cursor);
|
|
}
|
|
}
|
|
};
|
|
|
|
pub fn ShellCharIter(comptime encoding: StringEncoding) type {
|
|
return struct {
|
|
src: Src,
|
|
state: State = .Normal,
|
|
prev: ?InputChar = null,
|
|
current: ?InputChar = null,
|
|
|
|
pub const Src = switch (encoding) {
|
|
.ascii => SrcAscii,
|
|
.wtf8, .utf16 => SrcUnicode,
|
|
};
|
|
|
|
pub const CodepointType = if (encoding == .ascii) u7 else u32;
|
|
|
|
pub const InputChar = if (encoding == .ascii) SrcAscii.IndexValue else struct {
|
|
char: u32,
|
|
escaped: bool = false,
|
|
};
|
|
|
|
pub fn isWhitespace(char: InputChar) bool {
|
|
return switch (char.char) {
|
|
'\t', '\r', '\n', ' ' => true,
|
|
else => false,
|
|
};
|
|
}
|
|
|
|
pub const State = enum {
|
|
Normal,
|
|
Single,
|
|
Double,
|
|
};
|
|
|
|
pub fn init(bytes: []const u8) @This() {
|
|
const src = if (comptime encoding == .ascii)
|
|
SrcAscii.init(bytes)
|
|
else
|
|
SrcUnicode.init(bytes);
|
|
|
|
return .{
|
|
.src = src,
|
|
};
|
|
}
|
|
|
|
pub fn eat(self: *@This()) ?InputChar {
|
|
if (self.read_char()) |result| {
|
|
self.prev = self.current;
|
|
self.current = result;
|
|
self.src.eat(result.escaped);
|
|
return result;
|
|
}
|
|
return null;
|
|
}
|
|
|
|
pub fn peek(self: *@This()) ?InputChar {
|
|
if (self.read_char()) |result| {
|
|
return result;
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
pub fn read_char(self: *@This()) ?InputChar {
|
|
const indexed_value = self.src.index() orelse return null;
|
|
var char = indexed_value.char;
|
|
if (char != '\\' or self.state == .Single) return .{ .char = char };
|
|
|
|
// Handle backslash
|
|
switch (self.state) {
|
|
.Normal => {
|
|
const peeked = self.src.indexNext() orelse return null;
|
|
char = peeked.char;
|
|
},
|
|
.Double => {
|
|
const peeked = self.src.indexNext() orelse return null;
|
|
switch (peeked.char) {
|
|
// Backslash only applies to these characters
|
|
'$', '`', '"', '\\', '\n', '#' => {
|
|
char = peeked.char;
|
|
},
|
|
else => return .{ .char = char, .escaped = false },
|
|
}
|
|
},
|
|
else => unreachable,
|
|
}
|
|
|
|
return .{ .char = char, .escaped = true };
|
|
}
|
|
};
|
|
}
|
|
|
|
/// Only these charaters allowed:
|
|
/// - a-ZA-Z
|
|
/// - _
|
|
/// - 0-9 (but can't be first char)
|
|
pub fn isValidVarName(var_name: []const u8) bool {
|
|
if (isAllAscii(var_name)) return isValidVarNameAscii(var_name);
|
|
|
|
if (var_name.len == 0) return false;
|
|
var iter = CodepointIterator.init(var_name);
|
|
var cursor = CodepointIterator.Cursor{};
|
|
|
|
if (!iter.next(&cursor)) return false;
|
|
|
|
switch (cursor.c) {
|
|
'=', '0'...'9' => {
|
|
return false;
|
|
},
|
|
'a'...'z', 'A'...'Z', '_' => {},
|
|
else => return false,
|
|
}
|
|
|
|
while (iter.next(&cursor)) {
|
|
switch (cursor.c) {
|
|
'0'...'9', 'a'...'z', 'A'...'Z', '_' => {},
|
|
else => return false,
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
fn isValidVarNameAscii(var_name: []const u8) bool {
|
|
if (var_name.len == 0) return false;
|
|
switch (var_name[0]) {
|
|
'=', '0'...'9' => {
|
|
return false;
|
|
},
|
|
'a'...'z', 'A'...'Z', '_' => {},
|
|
else => return false,
|
|
}
|
|
|
|
if (var_name.len - 1 < 16)
|
|
return isValidVarNameSlowAscii(var_name);
|
|
|
|
const upper_a: @Vector(16, u8) = @splat('A');
|
|
const upper_z: @Vector(16, u8) = @splat('Z');
|
|
const lower_a: @Vector(16, u8) = @splat('a');
|
|
const lower_z: @Vector(16, u8) = @splat('z');
|
|
const zero: @Vector(16, u8) = @splat(0);
|
|
const nine: @Vector(16, u8) = @splat(9);
|
|
const underscore: @Vector(16, u8) = @splat('_');
|
|
|
|
const BoolVec = @Vector(16, u1);
|
|
|
|
var i: usize = 0;
|
|
while (i + 16 <= var_name.len) : (i += 16) {
|
|
const chars: @Vector(16, u8) = var_name[i..][0..16].*;
|
|
|
|
const in_upper = @as(BoolVec, @bitCast(chars > upper_a)) & @as(BoolVec, @bitCast(chars < upper_z));
|
|
const in_lower = @as(BoolVec, @bitCast(chars > lower_a)) & @as(BoolVec, @bitCast(chars < lower_z));
|
|
const in_digit = @as(BoolVec, @bitCast(chars > zero)) & @as(BoolVec, @bitCast(chars < nine));
|
|
const is_underscore = @as(BoolVec, @bitCast(chars == underscore));
|
|
|
|
const merged = @as(@Vector(16, bool), @bitCast(in_upper | in_lower | in_digit | is_underscore));
|
|
if (std.simd.countTrues(merged) != 16) return false;
|
|
}
|
|
|
|
return isValidVarNameSlowAscii(var_name[i..]);
|
|
}
|
|
|
|
fn isValidVarNameSlowAscii(var_name: []const u8) bool {
|
|
for (var_name) |c| {
|
|
switch (c) {
|
|
'0'...'9', 'a'...'z', 'A'...'Z', '_' => {},
|
|
else => return false,
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
var stderr_mutex = std.Thread.Mutex{};
|
|
pub fn closefd(fd: bun.FileDescriptor) void {
|
|
if (Syscall.close2(fd)) |err| {
|
|
_ = err;
|
|
log("ERR closefd: {d}\n", .{fd});
|
|
// stderr_mutex.lock();
|
|
// defer stderr_mutex.unlock();
|
|
// const stderr = std.io.getStdErr().writer();
|
|
// err.toSystemError().format("error", .{}, stderr) catch @panic("damn");
|
|
}
|
|
}
|
|
|
|
pub fn hasEqSign(str: []const u8) ?u32 {
|
|
if (isAllAscii(str)) {
|
|
if (str.len < 16)
|
|
return hasEqSignAsciiSlow(str);
|
|
|
|
const needles: @Vector(16, u8) = @splat('=');
|
|
|
|
var i: u32 = 0;
|
|
while (i + 16 <= str.len) : (i += 16) {
|
|
const haystack = str[i..][0..16].*;
|
|
const result = haystack == needles;
|
|
|
|
if (std.simd.firstTrue(result)) |idx| {
|
|
return @intCast(i + idx);
|
|
}
|
|
}
|
|
|
|
return i + (hasEqSignAsciiSlow(str[i..]) orelse return null);
|
|
}
|
|
|
|
// TODO actually i think that this can also use the simd stuff
|
|
|
|
var iter = CodepointIterator.init(str);
|
|
var cursor = CodepointIterator.Cursor{};
|
|
while (iter.next(&cursor)) {
|
|
if (cursor.c == '=') {
|
|
return @intCast(cursor.i);
|
|
}
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
pub fn hasEqSignAsciiSlow(str: []const u8) ?u32 {
|
|
for (str, 0..) |c, i| if (c == '=') return @intCast(i);
|
|
return null;
|
|
}
|
|
|
|
pub const CmdEnvIter = struct {
|
|
env: *const std.StringArrayHashMap([:0]const u8),
|
|
iter: std.StringArrayHashMap([:0]const u8).Iterator,
|
|
|
|
const Entry = struct {
|
|
key: Key,
|
|
value: Value,
|
|
};
|
|
|
|
const Value = struct {
|
|
val: [:0]const u8,
|
|
|
|
pub fn format(self: Value, comptime _: []const u8, _: std.fmt.FormatOptions, writer: anytype) !void {
|
|
try writer.writeAll(self.val);
|
|
}
|
|
};
|
|
|
|
const Key = struct {
|
|
val: []const u8,
|
|
|
|
pub fn format(self: Key, comptime _: []const u8, _: std.fmt.FormatOptions, writer: anytype) !void {
|
|
try writer.writeAll(self.val);
|
|
}
|
|
|
|
pub fn eqlComptime(this: Key, comptime str: []const u8) bool {
|
|
return bun.strings.eqlComptime(this.val, str);
|
|
}
|
|
};
|
|
|
|
pub fn fromEnv(env: *const std.StringArrayHashMap([:0]const u8)) CmdEnvIter {
|
|
const iter = env.iterator();
|
|
return .{
|
|
.env = env,
|
|
.iter = iter,
|
|
};
|
|
}
|
|
|
|
pub fn len(self: *const CmdEnvIter) usize {
|
|
return self.env.unmanaged.entries.len;
|
|
}
|
|
|
|
pub fn next(self: *CmdEnvIter) !?Entry {
|
|
const entry = self.iter.next() orelse return null;
|
|
return .{
|
|
.key = .{ .val = entry.key_ptr.* },
|
|
.value = .{ .val = entry.value_ptr.* },
|
|
};
|
|
}
|
|
};
|
|
|
|
const ExpansionStr = union(enum) {};
|
|
|
|
pub const Test = struct {
|
|
pub const TestToken = union(TokenTag) {
|
|
// |
|
|
Pipe,
|
|
// ||
|
|
DoublePipe,
|
|
// &
|
|
Ampersand,
|
|
// &&
|
|
DoubleAmpersand,
|
|
|
|
// >
|
|
Redirect: AST.Cmd.RedirectFlags,
|
|
|
|
// $
|
|
Dollar,
|
|
// *
|
|
Asterisk,
|
|
DoubleAsterisk,
|
|
// =
|
|
Eq,
|
|
Semicolon,
|
|
Newline,
|
|
|
|
BraceBegin,
|
|
Comma,
|
|
BraceEnd,
|
|
CmdSubstBegin,
|
|
CmdSubstQuoted,
|
|
CmdSubstEnd,
|
|
OpenParen,
|
|
CloseParen,
|
|
|
|
Var: []const u8,
|
|
Text: []const u8,
|
|
JSObjRef: u32,
|
|
|
|
Delimit,
|
|
Eof,
|
|
|
|
pub fn from_real(the_token: Token, buf: []const u8) TestToken {
|
|
switch (the_token) {
|
|
.Var => |txt| return .{ .Var = buf[txt.start..txt.end] },
|
|
.Text => |txt| return .{ .Text = buf[txt.start..txt.end] },
|
|
.JSObjRef => |val| return .{ .JSObjRef = val },
|
|
.Pipe => return .Pipe,
|
|
.DoublePipe => return .DoublePipe,
|
|
.Ampersand => return .Ampersand,
|
|
.DoubleAmpersand => return .DoubleAmpersand,
|
|
.Redirect => |r| return .{ .Redirect = r },
|
|
.Dollar => return .Dollar,
|
|
.Asterisk => return .Asterisk,
|
|
.DoubleAsterisk => return .DoubleAsterisk,
|
|
.Eq => return .Eq,
|
|
.Semicolon => return .Semicolon,
|
|
.Newline => return .Newline,
|
|
.BraceBegin => return .BraceBegin,
|
|
.Comma => return .Comma,
|
|
.BraceEnd => return .BraceEnd,
|
|
.CmdSubstBegin => return .CmdSubstBegin,
|
|
.CmdSubstQuoted => return .CmdSubstQuoted,
|
|
.CmdSubstEnd => return .CmdSubstEnd,
|
|
.OpenParen => return .OpenParen,
|
|
.CloseParen => return .CloseParen,
|
|
.Delimit => return .Delimit,
|
|
.Eof => return .Eof,
|
|
}
|
|
}
|
|
};
|
|
};
|
|
|
|
pub fn shellCmdFromJS(
|
|
globalThis: *JSC.JSGlobalObject,
|
|
string_args: JSValue,
|
|
template_args: []const JSValue,
|
|
out_jsobjs: *std.ArrayList(JSValue),
|
|
out_script: *std.ArrayList(u8),
|
|
) !bool {
|
|
var jsobjref_buf: [128]u8 = [_]u8{0} ** 128;
|
|
|
|
var string_iter = string_args.arrayIterator(globalThis);
|
|
var i: u32 = 0;
|
|
const last = string_iter.len -| 1;
|
|
while (string_iter.next()) |js_value| {
|
|
defer i += 1;
|
|
if (!try appendJSValueStr(globalThis, js_value, out_script, false)) {
|
|
globalThis.throw("Shell script string contains invalid UTF-16", .{});
|
|
return false;
|
|
}
|
|
// const str = js_value.getZigString(globalThis);
|
|
// try script.appendSlice(str.full());
|
|
if (i < last) {
|
|
const template_value = template_args[i];
|
|
if (!(try handleTemplateValue(globalThis, template_value, out_jsobjs, out_script, jsobjref_buf[0..]))) return false;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
pub fn handleTemplateValue(
|
|
globalThis: *JSC.JSGlobalObject,
|
|
template_value: JSValue,
|
|
out_jsobjs: *std.ArrayList(JSValue),
|
|
out_script: *std.ArrayList(u8),
|
|
jsobjref_buf: []u8,
|
|
) !bool {
|
|
if (!template_value.isEmpty()) {
|
|
if (template_value.asArrayBuffer(globalThis)) |array_buffer| {
|
|
_ = array_buffer;
|
|
const idx = out_jsobjs.items.len;
|
|
template_value.protect();
|
|
try out_jsobjs.append(template_value);
|
|
const slice = try std.fmt.bufPrint(jsobjref_buf[0..], "{s}{d}", .{ bun.shell.LEX_JS_OBJREF_PREFIX, idx });
|
|
try out_script.appendSlice(slice);
|
|
return true;
|
|
}
|
|
|
|
if (template_value.as(JSC.WebCore.Blob)) |blob| {
|
|
if (blob.store) |store| {
|
|
if (store.data == .file) {
|
|
if (store.data.file.pathlike == .path) {
|
|
const path = store.data.file.pathlike.path.slice();
|
|
if (!try appendUTF8Text(path, out_script, true)) {
|
|
globalThis.throw("Shell script string contains invalid UTF-16", .{});
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
}
|
|
}
|
|
|
|
const idx = out_jsobjs.items.len;
|
|
template_value.protect();
|
|
try out_jsobjs.append(template_value);
|
|
const slice = try std.fmt.bufPrint(jsobjref_buf[0..], "{s}{d}", .{ LEX_JS_OBJREF_PREFIX, idx });
|
|
try out_script.appendSlice(slice);
|
|
return true;
|
|
}
|
|
|
|
if (JSC.WebCore.ReadableStream.fromJS(template_value, globalThis)) |rstream| {
|
|
_ = rstream;
|
|
|
|
const idx = out_jsobjs.items.len;
|
|
template_value.protect();
|
|
try out_jsobjs.append(template_value);
|
|
const slice = try std.fmt.bufPrint(jsobjref_buf[0..], "{s}{d}", .{ LEX_JS_OBJREF_PREFIX, idx });
|
|
try out_script.appendSlice(slice);
|
|
return true;
|
|
}
|
|
|
|
if (template_value.as(JSC.WebCore.Response)) |req| {
|
|
_ = req;
|
|
|
|
const idx = out_jsobjs.items.len;
|
|
template_value.protect();
|
|
try out_jsobjs.append(template_value);
|
|
const slice = try std.fmt.bufPrint(jsobjref_buf[0..], "{s}{d}", .{ LEX_JS_OBJREF_PREFIX, idx });
|
|
try out_script.appendSlice(slice);
|
|
return true;
|
|
}
|
|
|
|
if (template_value.isString()) {
|
|
if (!try appendJSValueStr(globalThis, template_value, out_script, true)) {
|
|
globalThis.throw("Shell script string contains invalid UTF-16", .{});
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
if (template_value.jsType().isArray()) {
|
|
var array = template_value.arrayIterator(globalThis);
|
|
const last = array.len -| 1;
|
|
var i: u32 = 0;
|
|
while (array.next()) |arr| : (i += 1) {
|
|
if (!(try handleTemplateValue(globalThis, arr, out_jsobjs, out_script, jsobjref_buf))) return false;
|
|
if (i < last) {
|
|
const str = bun.String.init(" ");
|
|
if (!try appendBunStr(str, out_script, false)) return false;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
if (template_value.isObject()) {
|
|
if (template_value.getTruthy(globalThis, "raw")) |maybe_str| {
|
|
const bunstr = maybe_str.toBunString(globalThis);
|
|
defer bunstr.deref();
|
|
if (!try appendBunStr(bunstr, out_script, false)) {
|
|
globalThis.throw("Shell script string contains invalid UTF-16", .{});
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
}
|
|
|
|
if (template_value.isPrimitive()) {
|
|
if (!try appendJSValueStr(globalThis, template_value, out_script, true)) {
|
|
globalThis.throw("Shell script string contains invalid UTF-16", .{});
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
if (template_value.implementsToString(globalThis)) {
|
|
if (!try appendJSValueStr(globalThis, template_value, out_script, true)) {
|
|
globalThis.throw("Shell script string contains invalid UTF-16", .{});
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
globalThis.throw("Invalid JS object used in shell: {}, you might need to call `.toString()` on it", .{template_value.fmtString(globalThis)});
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
/// This will disallow invalid surrogate pairs
|
|
pub fn appendJSValueStr(globalThis: *JSC.JSGlobalObject, jsval: JSValue, outbuf: *std.ArrayList(u8), comptime allow_escape: bool) !bool {
|
|
const bunstr = jsval.toBunString(globalThis);
|
|
defer bunstr.deref();
|
|
|
|
return try appendBunStr(bunstr, outbuf, allow_escape);
|
|
}
|
|
|
|
pub fn appendUTF8Text(slice: []const u8, outbuf: *std.ArrayList(u8), comptime allow_escape: bool) !bool {
|
|
if (!bun.simdutf.validate.utf8(slice)) {
|
|
return false;
|
|
}
|
|
|
|
if (allow_escape and needsEscape(slice)) {
|
|
try escape(slice, outbuf);
|
|
} else {
|
|
try outbuf.appendSlice(slice);
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
pub fn appendBunStr(bunstr: bun.String, outbuf: *std.ArrayList(u8), comptime allow_escape: bool) !bool {
|
|
const str = bunstr.toUTF8WithoutRef(bun.default_allocator);
|
|
defer str.deinit();
|
|
|
|
// TODO: toUTF8 already validates. We shouldn't have to do this twice!
|
|
const is_ascii = str.isAllocated();
|
|
if (!is_ascii and !bun.simdutf.validate.utf8(str.slice())) {
|
|
return false;
|
|
}
|
|
|
|
if (allow_escape and needsEscape(str.slice())) {
|
|
try escape(str.slice(), outbuf);
|
|
} else {
|
|
try outbuf.appendSlice(str.slice());
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
/// Characters that need to escaped
|
|
const SPECIAL_CHARS = [_]u8{ '$', '`', '"', '\\', '>', '&', '|', '=', ';', '\n', '{', '}', ',', '(', ')', ' ' };
|
|
/// Characters that need to be backslashed inside double quotes
|
|
const BACKSLASHABLE_CHARS = [_]u8{ '$', '`', '"', '\\' };
|
|
|
|
/// assumes WTF-8
|
|
pub fn escape(str: []const u8, outbuf: *std.ArrayList(u8)) !void {
|
|
try outbuf.ensureUnusedCapacity(str.len);
|
|
|
|
try outbuf.append('\"');
|
|
|
|
loop: for (str) |c| {
|
|
inline for (BACKSLASHABLE_CHARS) |spc| {
|
|
if (spc == c) {
|
|
try outbuf.appendSlice(&.{
|
|
'\\',
|
|
c,
|
|
});
|
|
continue :loop;
|
|
}
|
|
}
|
|
try outbuf.append(c);
|
|
}
|
|
|
|
try outbuf.append('\"');
|
|
}
|
|
|
|
pub fn escapeUnicode(str: []const u8, outbuf: *std.ArrayList(u8)) !void {
|
|
try outbuf.ensureUnusedCapacity(str.len);
|
|
|
|
var bytes: [8]u8 = undefined;
|
|
var n = bun.strings.encodeWTF8Rune(bytes[0..4], '"');
|
|
try outbuf.appendSlice(bytes[0..n]);
|
|
|
|
loop: for (str) |c| {
|
|
inline for (BACKSLASHABLE_CHARS) |spc| {
|
|
if (spc == c) {
|
|
n = bun.strings.encodeWTF8Rune(bytes[0..4], '\\');
|
|
var next: [4]u8 = bytes[n..][0..4].*;
|
|
n += bun.strings.encodeWTF8Rune(&next, @intCast(c));
|
|
try outbuf.appendSlice(bytes[0..n]);
|
|
// try outbuf.appendSlice(&.{
|
|
// '\\',
|
|
// c,
|
|
// });
|
|
continue :loop;
|
|
}
|
|
}
|
|
n = bun.strings.encodeWTF8Rune(bytes[0..4], @intCast(c));
|
|
try outbuf.appendSlice(bytes[0..n]);
|
|
}
|
|
|
|
n = bun.strings.encodeWTF8Rune(bytes[0..4], '"');
|
|
try outbuf.appendSlice(bytes[0..n]);
|
|
}
|
|
|
|
pub fn needsEscapeUTF16(str: []const u16) bool {
|
|
for (str) |char| {
|
|
switch (char) {
|
|
'$', '>', '&', '|', '=', ';', '\n', '{', '}', ',', '(', ')', '\\', '\"', ' ' => return true,
|
|
else => {},
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
/// Checks for the presence of any char from `SPECIAL_CHARS` in `str`. This
|
|
/// indicates the *possibility* that the string must be escaped, so it can have
|
|
/// false positives, but it is faster than running the shell lexer through the
|
|
/// input string for a more correct implementation.
|
|
pub fn needsEscape(str: []const u8) bool {
|
|
if (str.len < 128) return needsEscapeSlow(str);
|
|
|
|
const needles = comptime brk: {
|
|
var needles: [SPECIAL_CHARS.len]@Vector(16, u8) = undefined;
|
|
for (SPECIAL_CHARS, 0..) |c, i| {
|
|
needles[i] = @splat(c);
|
|
}
|
|
break :brk needles;
|
|
};
|
|
|
|
var i: usize = 0;
|
|
while (i + 16 <= str.len) : (i += 16) {
|
|
const haystack: @Vector(16, u8) = str[i..][0..16].*;
|
|
|
|
inline for (needles) |needle| {
|
|
const result = haystack == needle;
|
|
if (std.simd.firstTrue(result) != null) return true;
|
|
}
|
|
}
|
|
|
|
if (i < str.len) return needsEscapeSlow(str[i..]);
|
|
|
|
return false;
|
|
}
|
|
|
|
pub fn needsEscapeSlow(str: []const u8) bool {
|
|
for (str) |c| {
|
|
inline for (SPECIAL_CHARS) |spc| {
|
|
if (spc == c) return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|