Merge branch 'main' into jarred/process-change

2026-02-17 22:32:06 +00:00 · 2024-02-17 02:44:16 -08:00
parent 8d5e528aa8 c34bbb2e3f
commit c8a5fc3dcf
61 changed files with 4545 additions and 1753 deletions
--- a/src/shell/interpreter.zig
+++ b/src/shell/interpreter.zig
@@ -206,7 +206,7 @@ pub const EnvStr = packed struct {
    tag: Tag,
    len: usize = 0,

-    const print = bun.Output.scoped(.EnvStr, false);
+    const print = bun.Output.scoped(.EnvStr, true);

    const Tag = enum(u16) {
        /// Dealloced by reference counting
@@ -841,9 +841,20 @@ pub fn NewInterpreter(comptime EventLoopKind: JSC.EventLoopKind) type {
            };

            const template_args = callframe.argumentsPtr()[1..callframe.argumentsCount()];
+            var stack_alloc = std.heap.stackFallback(@sizeOf(bun.String) * 4, arena.allocator());
+            var jsstrings = std.ArrayList(bun.String).initCapacity(stack_alloc.get(), 4) catch {
+                globalThis.throwOutOfMemory();
+                return null;
+            };
+            defer {
+                for (jsstrings.items[0..]) |bunstr| {
+                    bunstr.deref();
+                }
+                jsstrings.deinit();
+            }
            var jsobjs = std.ArrayList(JSValue).init(arena.allocator());
            var script = std.ArrayList(u8).init(arena.allocator());
-            if (!(bun.shell.shellCmdFromJS(globalThis, string_args, template_args, &jsobjs, &script) catch {
+            if (!(bun.shell.shellCmdFromJS(globalThis, string_args, template_args, &jsobjs, &jsstrings, &script) catch {
                globalThis.throwOutOfMemory();
                return null;
            })) {
@@ -856,6 +867,7 @@ pub fn NewInterpreter(comptime EventLoopKind: JSC.EventLoopKind) type {
                &arena,
                script.items[0..],
                jsobjs.items[0..],
+                jsstrings.items[0..],
                &parser,
                &lex_result,
            ) catch |err| {
@@ -901,14 +913,21 @@ pub fn NewInterpreter(comptime EventLoopKind: JSC.EventLoopKind) type {
            return interpreter;
        }

-        pub fn parse(arena: *bun.ArenaAllocator, script: []const u8, jsobjs: []JSValue, out_parser: *?bun.shell.Parser, out_lex_result: *?shell.LexResult) !ast.Script {
+        pub fn parse(
+            arena: *bun.ArenaAllocator,
+            script: []const u8,
+            jsobjs: []JSValue,
+            jsstrings_to_escape: []bun.String,
+            out_parser: *?bun.shell.Parser,
+            out_lex_result: *?shell.LexResult,
+        ) !ast.Script {
            const lex_result = brk: {
                if (bun.strings.isAllASCII(script)) {
-                    var lexer = bun.shell.LexerAscii.new(arena.allocator(), script);
+                    var lexer = bun.shell.LexerAscii.new(arena.allocator(), script, jsstrings_to_escape);
                    try lexer.lex();
                    break :brk lexer.get_result();
                }
-                var lexer = bun.shell.LexerUnicode.new(arena.allocator(), script);
+                var lexer = bun.shell.LexerUnicode.new(arena.allocator(), script, jsstrings_to_escape);
                try lexer.lex();
                break :brk lexer.get_result();
            };
@@ -1028,7 +1047,14 @@ pub fn NewInterpreter(comptime EventLoopKind: JSC.EventLoopKind) type {
            const jsobjs: []JSValue = &[_]JSValue{};
            var out_parser: ?bun.shell.Parser = null;
            var out_lex_result: ?bun.shell.LexResult = null;
-            const script = ThisInterpreter.parse(&arena, src, jsobjs, &out_parser, &out_lex_result) catch |err| {
+            const script = ThisInterpreter.parse(
+                &arena,
+                src,
+                jsobjs,
+                &[_]bun.String{},
+                &out_parser,
+                &out_lex_result,
+            ) catch |err| {
                if (err == bun.shell.ParseError.Lex) {
                    std.debug.assert(out_lex_result != null);
                    const str = out_lex_result.?.combineErrors(arena.allocator());
@@ -1074,7 +1100,7 @@ pub fn NewInterpreter(comptime EventLoopKind: JSC.EventLoopKind) type {
            const jsobjs: []JSValue = &[_]JSValue{};
            var out_parser: ?bun.shell.Parser = null;
            var out_lex_result: ?bun.shell.LexResult = null;
-            const script = ThisInterpreter.parse(&arena, src, jsobjs, &out_parser, &out_lex_result) catch |err| {
+            const script = ThisInterpreter.parse(&arena, src, jsobjs, &[_]bun.String{}, &out_parser, &out_lex_result) catch |err| {
                if (err == bun.shell.ParseError.Lex) {
                    std.debug.assert(out_lex_result != null);
                    const str = out_lex_result.?.combineErrors(arena.allocator());
@@ -1156,6 +1182,7 @@ pub fn NewInterpreter(comptime EventLoopKind: JSC.EventLoopKind) type {

        fn finish(this: *ThisInterpreter, exit_code: ExitCode) void {
            log("finish", .{});
+            defer decrPendingActivityFlag(&this.has_pending_activity);
            if (comptime EventLoopKind == .js) {
                // defer this.deinit();
                // this.promise.resolve(this.global, JSValue.jsNumberFromInt32(@intCast(exit_code)));
@@ -1169,6 +1196,7 @@ pub fn NewInterpreter(comptime EventLoopKind: JSC.EventLoopKind) type {

        fn errored(this: *ThisInterpreter, the_error: ShellError) void {
            _ = the_error; // autofix
+            defer decrPendingActivityFlag(&this.has_pending_activity);

            if (comptime EventLoopKind == .js) {
                // defer this.deinit();
@@ -1319,6 +1347,7 @@ pub fn NewInterpreter(comptime EventLoopKind: JSC.EventLoopKind) type {
        pub fn finalize(
            this: *ThisInterpreter,
        ) callconv(.C) void {
+            log("Interpreter finalize", .{});
            this.deinit();
        }

@@ -1360,12 +1389,12 @@ pub fn NewInterpreter(comptime EventLoopKind: JSC.EventLoopKind) type {

            word_idx: u32,
            current_out: std.ArrayList(u8),
-            state: enum {
+            state: union(enum) {
                normal,
                braces,
                glob,
                done,
-                err,
+                err: bun.shell.ShellErr,
            },
            child_state: union(enum) {
                idle,
@@ -1582,6 +1611,12 @@ pub fn NewInterpreter(comptime EventLoopKind: JSC.EventLoopKind) type {
                    this.parent.childDone(this, 0);
                    return;
                }
+
+                // Parent will inspect the `this.state.err`
+                if (this.state == .err) {
+                    this.parent.childDone(this, 1);
+                    return;
+                }
            }

            fn transitionToGlobState(this: *Expansion) void {
@@ -1589,10 +1624,23 @@ pub fn NewInterpreter(comptime EventLoopKind: JSC.EventLoopKind) type {
                this.child_state = .{ .glob = .{ .walker = .{} } };
                const pattern = this.current_out.items[0..];

-                switch (GlobWalker.init(&this.child_state.glob.walker, &arena, pattern, false, false, false, false, false) catch bun.outOfMemory()) {
+                const cwd = this.base.shell.cwd();
+
+                switch (GlobWalker.initWithCwd(
+                    &this.child_state.glob.walker,
+                    &arena,
+                    pattern,
+                    cwd,
+                    false,
+                    false,
+                    false,
+                    false,
+                    false,
+                ) catch bun.outOfMemory()) {
                    .result => {},
                    .err => |e| {
-                        global_handle.get().actuallyThrow(bun.shell.ShellErr.newSys(e));
+                        this.state = .{ .err = bun.shell.ShellErr.newSys(e) };
+                        this.next();
                        return;
                    },
                }
@@ -1803,6 +1851,19 @@ pub fn NewInterpreter(comptime EventLoopKind: JSC.EventLoopKind) type {
                    }
                }

+                if (task.result.items.len == 0) {
+                    const msg = std.fmt.allocPrint(bun.default_allocator, "no matches found: {s}", .{this.child_state.glob.walker.pattern}) catch bun.outOfMemory();
+                    this.state = .{
+                        .err = bun.shell.ShellErr{
+                            .custom = msg,
+                        },
+                    };
+                    this.child_state.glob.walker.deinit(true);
+                    this.child_state = .idle;
+                    this.next();
+                    return;
+                }
+
                for (task.result.items) |sentinel_str| {
                    // The string is allocated in the glob walker arena and will be freed, so needs to be duped here
                    const duped = this.base.interpreter.allocator.dupeZ(u8, sentinel_str[0..sentinel_str.len]) catch bun.outOfMemory();
@@ -2172,10 +2233,13 @@ pub fn NewInterpreter(comptime EventLoopKind: JSC.EventLoopKind) type {
                }

                this.base.shell.deinit();
+                bun.default_allocator.destroy(this);
            }

            pub fn deinitFromInterpreter(this: *Script) void {
-                this.base.shell.deinitImpl(false, false);
+                // Let the interpreter deinitialize the shell state
+                // this.base.shell.deinitImpl(false, false);
+                bun.default_allocator.destroy(this);
            }
        };

@@ -2193,6 +2257,7 @@ pub fn NewInterpreter(comptime EventLoopKind: JSC.EventLoopKind) type {
                    current_expansion_result: std.ArrayList([:0]const u8),
                    expansion: Expansion,
                },
+                err: bun.shell.ShellErr,
                done,
            },
            ctx: AssignCtx,
@@ -2265,6 +2330,7 @@ pub fn NewInterpreter(comptime EventLoopKind: JSC.EventLoopKind) type {
                            return;
                        },
                        .done => unreachable,
+                        .err => return this.parent.childDone(this, 1),
                    }
                }

@@ -2272,9 +2338,14 @@ pub fn NewInterpreter(comptime EventLoopKind: JSC.EventLoopKind) type {
            }

            pub fn childDone(this: *Assigns, child: ChildPtr, exit_code: ExitCode) void {
-                _ = exit_code;
-
                if (child.ptr.is(Expansion)) {
+                    const expansion = child.ptr.as(Expansion);
+                    if (exit_code != 0) {
+                        this.state = .{
+                            .err = expansion.state.err,
+                        };
+                        return;
+                    }
                    var expanding = &this.state.expanding;

                    const label = this.node[expanding.idx].label;
@@ -3182,9 +3253,16 @@ pub fn NewInterpreter(comptime EventLoopKind: JSC.EventLoopKind) type {
            }

            pub fn childDone(this: *Cmd, child: ChildPtr, exit_code: ExitCode) void {
-                _ = exit_code; // autofix
-
                if (child.ptr.is(Assigns)) {
+                    if (exit_code != 0) {
+                        const err = this.state.expanding_assigns.state.err;
+                        defer err.deinit(bun.default_allocator);
+                        this.state.expanding_assigns.deinit();
+                        const buf = err.fmt();
+                        this.writeFailingError(buf, exit_code);
+                        return;
+                    }
+
                    this.state.expanding_assigns.deinit();
                    this.state = .{
                        .expanding_redirect = .{
@@ -3196,6 +3274,18 @@ pub fn NewInterpreter(comptime EventLoopKind: JSC.EventLoopKind) type {
                }

                if (child.ptr.is(Expansion)) {
+                    child.deinit();
+                    if (exit_code != 0) {
+                        const err = switch (this.state) {
+                            .expanding_redirect => this.state.expanding_redirect.expansion.state.err,
+                            .expanding_args => this.state.expanding_args.expansion.state.err,
+                            else => @panic("Invalid state"),
+                        };
+                        defer err.deinit(bun.default_allocator);
+                        const buf = err.fmt();
+                        this.writeFailingError(buf, exit_code);
+                        return;
+                    }
                    this.next();
                    return;
                }
@@ -3537,7 +3627,10 @@ pub fn NewInterpreter(comptime EventLoopKind: JSC.EventLoopKind) type {
                    this.exec = .none;
                }

-                if (!this.spawn_arena_freed) this.spawn_arena.deinit();
+                if (!this.spawn_arena_freed) {
+                    log("Spawn arena free", .{});
+                    this.spawn_arena.deinit();
+                }
                this.freed = true;
                this.base.interpreter.allocator.destroy(this);
            }
--- a/src/shell/shell.zig
+++ b/src/shell/shell.zig
@@ -47,6 +47,27 @@ pub const ShellErr = union(enum) {
        };
    }

+    pub fn fmt(this: @This()) []const u8 {
+        switch (this) {
+            .sys => {
+                const err = this.sys;
+                const str = std.fmt.allocPrint(bun.default_allocator, "bun: {s}: {}\n", .{ err.message, err.path }) catch bun.outOfMemory();
+                return str;
+            },
+            .custom => {
+                return std.fmt.allocPrint(bun.default_allocator, "bun: {s}\n", .{this.custom}) catch bun.outOfMemory();
+            },
+            .invalid_arguments => {
+                const str = std.fmt.allocPrint(bun.default_allocator, "bun: invalid arguments: {s}\n", .{this.invalid_arguments.val}) catch bun.outOfMemory();
+                return str;
+            },
+            .todo => {
+                const str = std.fmt.allocPrint(bun.default_allocator, "bun: TODO: {s}\n", .{this.invalid_arguments.val}) catch bun.outOfMemory();
+                return str;
+            },
+        }
+    }
+
    pub fn throwJS(this: @This(), globalThis: *JSC.JSGlobalObject) void {
        switch (this) {
            .sys => {
@@ -922,7 +943,7 @@ pub const Parser = struct {
                        self.continue_from_subparser(&subparser);
                        if (self.delimits(self.peek())) {
                            _ = self.match(.Delimit);
-                            if (should_break) break;
+                            break;
                        }
                    },
                    .Text => |txtrng| {
@@ -1279,7 +1300,8 @@ pub const LexError = struct {
    /// Allocated with lexer arena
    msg: []const u8,
 };
-pub const LEX_JS_OBJREF_PREFIX = "$__bun_";
+pub const LEX_JS_OBJREF_PREFIX = "~__bun_";
+pub const LEX_JS_STRING_PREFIX = "~__bunstr_";

 pub fn NewLexer(comptime encoding: StringEncoding) type {
    const Chars = ShellCharIter(encoding);
@@ -1300,6 +1322,10 @@ pub fn NewLexer(comptime encoding: StringEncoding) type {
        in_subshell: ?SubShellKind = null,
        errors: std.ArrayList(LexError),

+        /// Contains a list of strings we need to escape
+        /// Not owned by this struct
+        string_refs: []bun.String,
+
        const SubShellKind = enum {
            /// (echo hi; echo hello)
            normal,
@@ -1329,12 +1355,13 @@ pub fn NewLexer(comptime encoding: StringEncoding) type {
            delimit_quote: bool,
        };

-        pub fn new(alloc: Allocator, src: []const u8) @This() {
+        pub fn new(alloc: Allocator, src: []const u8, strings_to_escape: []bun.String) @This() {
            return .{
                .chars = Chars.init(src),
                .tokens = ArrayList(Token).init(alloc),
                .strpool = ArrayList(u8).init(alloc),
                .errors = ArrayList(LexError).init(alloc),
+                .string_refs = strings_to_escape,
            };
        }

@@ -1364,6 +1391,7 @@ pub fn NewLexer(comptime encoding: StringEncoding) type {

                .word_start = self.word_start,
                .j = self.j,
+                .string_refs = self.string_refs,
            };
            sublexer.chars.state = .Normal;
            return sublexer;
@@ -1411,11 +1439,31 @@ pub fn NewLexer(comptime encoding: StringEncoding) type {
                const char = input.char;
                const escaped = input.escaped;

+                // Special token to denote substituted JS variables
+                if (char == '~') {
+                    if (self.looksLikeJSStringRef()) {
+                        if (self.eatJSStringRef()) |bunstr| {
+                            try self.break_word(false);
+                            try self.handleJSStringRef(bunstr);
+                            continue;
+                        }
+                    } else if (self.looksLikeJSObjRef()) {
+                        if (self.eatJSObjRef()) |tok| {
+                            if (self.chars.state == .Double) {
+                                self.add_error("JS object reference not allowed in double quotes");
+                                return;
+                            }
+                            try self.break_word(false);
+                            try self.tokens.append(tok);
+                            continue;
+                        }
+                    }
+                }
                // Handle non-escaped chars:
                // 1. special syntax (operators, etc.)
                // 2. lexing state switchers (quotes)
                // 3. word breakers (spaces, etc.)
-                if (!escaped) escaped: {
+                else if (!escaped) escaped: {
                    switch (char) {
                        '#' => {
                            if (self.chars.state == .Single or self.chars.state == .Double) break :escaped;
@@ -1506,21 +1554,13 @@ pub fn NewLexer(comptime encoding: StringEncoding) type {
                            // const snapshot = self.make_snapshot();
                            // Handle variable
                            try self.break_word(false);
-                            if (self.eat_js_obj_ref()) |ref| {
-                                if (self.chars.state == .Double) {
-                                    try self.errors.append(.{ .msg = bun.default_allocator.dupe(u8, "JS object reference not allowed in double quotes") catch bun.outOfMemory() });
-                                    return;
-                                }
-                                try self.tokens.append(ref);
+                            const var_tok = try self.eat_var();
+                            // empty var
+                            if (var_tok.start == var_tok.end) {
+                                try self.appendCharToStrPool('$');
+                                try self.break_word(false);
                            } else {
-                                const var_tok = try self.eat_var();
-                                // empty var
-                                if (var_tok.start == var_tok.end) {
-                                    try self.appendCharToStrPool('$');
-                                    try self.break_word(false);
-                                } else {
-                                    try self.tokens.append(.{ .Var = var_tok });
-                                }
+                                try self.tokens.append(.{ .Var = var_tok });
                            }
                            self.word_start = self.j;
                            continue;
@@ -1778,6 +1818,9 @@ pub fn NewLexer(comptime encoding: StringEncoding) type {
                        switch (char) {
                            '0'...'9' => {
                                _ = self.eat();
+                                if (count >= 32) {
+                                    return null;
+                                }
                                buf[count] = @intCast(char);
                                count += 1;
                                continue;
@@ -1863,6 +1906,7 @@ pub fn NewLexer(comptime encoding: StringEncoding) type {
                const char = result.char;
                switch (char) {
                    '0'...'9' => {
+                        if (count >= 32) return null;
                        // Safe to cast here because 0-8 is in ASCII range
                        buf[count] = @intCast(char);
                        count += 1;
@@ -1907,19 +1951,146 @@ pub fn NewLexer(comptime encoding: StringEncoding) type {
            self.continue_from_sublexer(&sublexer);
        }

-        fn eat_js_obj_ref(self: *@This()) ?Token {
-            const snap = self.make_snapshot();
-            if (self.eat_literal(u8, LEX_JS_OBJREF_PREFIX)) {
-                if (self.eat_number_word()) |num| {
-                    if (num <= std.math.maxInt(u32)) {
-                        return .{ .JSObjRef = @intCast(num) };
+        fn appendStringToStrPool(self: *@This(), bunstr: bun.String) !void {
+            const start = self.strpool.items.len;
+            if (bunstr.is8Bit() or bunstr.isUTF8()) {
+                try self.strpool.appendSlice(bunstr.byteSlice());
+            } else {
+                const utf16 = bunstr.utf16();
+                const additional = bun.simdutf.simdutf__utf8_length_from_utf16le(utf16.ptr, utf16.len);
+                try self.strpool.ensureUnusedCapacity(additional);
+                try bun.strings.convertUTF16ToUTF8Append(&self.strpool, bunstr.utf16());
+            }
+            const end = self.strpool.items.len;
+            self.j += @intCast(end - start);
+        }
+
+        fn handleJSStringRef(self: *@This(), bunstr: bun.String) !void {
+            try self.appendStringToStrPool(bunstr);
+        }
+
+        fn looksLikeJSObjRef(self: *@This()) bool {
+            const bytes = self.chars.srcBytesAtCursor();
+            if (LEX_JS_OBJREF_PREFIX.len - 1 >= bytes.len) return false;
+            return std.mem.eql(u8, bytes[0 .. LEX_JS_OBJREF_PREFIX.len - 1], LEX_JS_OBJREF_PREFIX[1..]);
+        }
+
+        fn looksLikeJSStringRef(self: *@This()) bool {
+            const bytes = self.chars.srcBytesAtCursor();
+            if (LEX_JS_STRING_PREFIX.len - 1 >= bytes.len) return false;
+            return std.mem.eql(u8, bytes[0 .. LEX_JS_STRING_PREFIX.len - 1], LEX_JS_STRING_PREFIX[1..]);
+        }
+
+        fn eatJSSubstitutionIdx(self: *@This(), comptime literal: []const u8, comptime name: []const u8, comptime validate: *const fn (*@This(), usize) bool) ?usize {
+            const bytes = self.chars.srcBytesAtCursor();
+            if (literal.len - 1 >= bytes.len) return null;
+            if (std.mem.eql(u8, bytes[0 .. literal.len - 1], literal[1..])) {
+                var i: usize = 0;
+                var digit_buf: [32]u8 = undefined;
+                var digit_buf_count: u8 = 0;
+
+                i += literal.len - 1;
+
+                while (i < bytes.len) : (i += 1) {
+                    switch (bytes[i]) {
+                        '0'...'9' => {
+                            if (digit_buf_count >= digit_buf.len) {
+                                const ERROR_STR = "Invalid " ++ name ++ " (number too high): ";
+                                var error_buf: [ERROR_STR.len + digit_buf.len + 1]u8 = undefined;
+                                const error_msg = std.fmt.bufPrint(error_buf[0..], "{s} {s}{c}", .{ ERROR_STR, digit_buf[0..digit_buf_count], bytes[i] }) catch @panic("Should not happen");
+                                self.add_error(error_msg);
+                                return null;
+                            }
+                            digit_buf[digit_buf_count] = bytes[i];
+                            digit_buf_count += 1;
+                        },
+                        else => break,
                    }
                }
+
+                if (digit_buf_count == 0) {
+                    self.add_error("Invalid " ++ name ++ " (no idx)");
+                    return null;
+                }
+
+                const idx = std.fmt.parseInt(usize, digit_buf[0..digit_buf_count], 10) catch {
+                    self.add_error("Invalid " ++ name ++ " ref ");
+                    return null;
+                };
+
+                if (!validate(self, idx)) return null;
+                // if (idx >= self.string_refs.len) {
+                //     self.add_error("Invalid " ++ name ++ " (out of bounds");
+                //     return null;
+                // }
+
+                // Bump the cursor
+                brk: {
+                    const new_idx = self.chars.cursorPos() + i;
+                    const prev_ascii_char: ?u7 = if (digit_buf_count == 1) null else @truncate(digit_buf[digit_buf_count - 2]);
+                    const cur_ascii_char: u7 = @truncate(digit_buf[digit_buf_count - 1]);
+                    if (comptime encoding == .ascii) {
+                        self.chars.src.i = new_idx;
+                        if (prev_ascii_char) |pc| self.chars.prev = .{ .char = pc };
+                        self.chars.current = .{ .char = cur_ascii_char };
+                        break :brk;
+                    }
+                    self.chars.src.cursor = CodepointIterator.Cursor{
+                        .i = @intCast(new_idx),
+                        .c = cur_ascii_char,
+                        .width = 1,
+                    };
+                    self.chars.src.next_cursor = self.chars.src.cursor;
+                    SrcUnicode.nextCursor(&self.chars.src.iter, &self.chars.src.next_cursor);
+                    if (prev_ascii_char) |pc| self.chars.prev = .{ .char = pc };
+                    self.chars.current = .{ .char = cur_ascii_char };
+                }
+
+                // return self.string_refs[idx];
+                return idx;
            }
-            self.backtrack(snap);
            return null;
        }

+        /// __NOTE__: Do not store references to the returned bun.String, it does not have its ref count incremented
+        fn eatJSStringRef(self: *@This()) ?bun.String {
+            if (self.eatJSSubstitutionIdx(
+                LEX_JS_STRING_PREFIX,
+                "JS string ref",
+                validateJSStringRefIdx,
+            )) |idx| {
+                return self.string_refs[idx];
+            }
+            return null;
+        }
+
+        fn validateJSStringRefIdx(self: *@This(), idx: usize) bool {
+            if (idx >= self.string_refs.len) {
+                self.add_error("Invalid JS string ref (out of bounds");
+                return false;
+            }
+            return true;
+        }
+
+        fn eatJSObjRef(self: *@This()) ?Token {
+            if (self.eatJSSubstitutionIdx(
+                LEX_JS_OBJREF_PREFIX,
+                "JS object ref",
+                validateJSObjRefIdx,
+            )) |idx| {
+                return .{ .JSObjRef = @intCast(idx) };
+            }
+            return null;
+        }
+
+        fn validateJSObjRefIdx(self: *@This(), idx: usize) bool {
+            if (idx >= std.math.maxInt(u32)) {
+                self.add_error("Invalid JS object ref (out of bounds)");
+                return false;
+            }
+            return true;
+        }
+
        fn eat_var(self: *@This()) !Token.TextRange {
            const start = self.j;
            var i: usize = 0;
@@ -2087,7 +2258,7 @@ const SrcUnicode = struct {

    inline fn indexNext(this: *const SrcUnicode) ?IndexValue {
        if (this.next_cursor.width + this.next_cursor.i > this.iter.bytes.len) return null;
-        return .{ .char = this.next_cursor.c, .width = this.next_cursor.width };
+        return .{ .char = @intCast(this.next_cursor.c), .width = this.next_cursor.width };
    }

    inline fn eat(this: *SrcUnicode, escaped: bool) void {
@@ -2147,6 +2318,27 @@ pub fn ShellCharIter(comptime encoding: StringEncoding) type {
            };
        }

+        pub fn srcBytes(self: *@This()) []const u8 {
+            if (comptime encoding == .ascii) return self.src.bytes;
+            return self.src.iter.bytes;
+        }
+
+        pub fn srcBytesAtCursor(self: *@This()) []const u8 {
+            const bytes = self.srcBytes();
+            if (comptime encoding == .ascii) {
+                if (self.src.i >= bytes.len) return "";
+                return bytes[self.src.i..];
+            }
+
+            if (self.src.iter.i >= bytes.len) return "";
+            return bytes[self.src.iter.i..];
+        }
+
+        pub fn cursorPos(self: *@This()) usize {
+            if (comptime encoding == .ascii) return self.src.i;
+            return self.src.iter.i;
+        }
+
        pub fn eat(self: *@This()) ?InputChar {
            if (self.read_char()) |result| {
                self.prev = self.current;
@@ -2451,8 +2643,10 @@ pub fn shellCmdFromJS(
    string_args: JSValue,
    template_args: []const JSValue,
    out_jsobjs: *std.ArrayList(JSValue),
+    jsstrings: *std.ArrayList(bun.String),
    out_script: *std.ArrayList(u8),
 ) !bool {
+    var builder = ShellSrcBuilder.init(globalThis, out_script, jsstrings);
    var jsobjref_buf: [128]u8 = [_]u8{0} ** 128;

    var string_iter = string_args.arrayIterator(globalThis);
@@ -2460,7 +2654,7 @@ pub fn shellCmdFromJS(
    const last = string_iter.len -| 1;
    while (string_iter.next()) |js_value| {
        defer i += 1;
-        if (!try appendJSValueStr(globalThis, js_value, out_script, false)) {
+        if (!try builder.appendJSValueStr(js_value, false)) {
            globalThis.throw("Shell script string contains invalid UTF-16", .{});
            return false;
        }
@@ -2468,7 +2662,7 @@ pub fn shellCmdFromJS(
        // try script.appendSlice(str.full());
        if (i < last) {
            const template_value = template_args[i];
-            if (!(try handleTemplateValue(globalThis, template_value, out_jsobjs, out_script, jsobjref_buf[0..]))) return false;
+            if (!(try handleTemplateValue(globalThis, template_value, out_jsobjs, out_script, jsstrings, jsobjref_buf[0..]))) return false;
        }
    }
    return true;
@@ -2479,8 +2673,10 @@ pub fn handleTemplateValue(
    template_value: JSValue,
    out_jsobjs: *std.ArrayList(JSValue),
    out_script: *std.ArrayList(u8),
+    jsstrings: *std.ArrayList(bun.String),
    jsobjref_buf: []u8,
 ) !bool {
+    var builder = ShellSrcBuilder.init(globalThis, out_script, jsstrings);
    if (!template_value.isEmpty()) {
        if (template_value.asArrayBuffer(globalThis)) |array_buffer| {
            _ = array_buffer;
@@ -2497,7 +2693,7 @@ pub fn handleTemplateValue(
                if (store.data == .file) {
                    if (store.data.file.pathlike == .path) {
                        const path = store.data.file.pathlike.path.slice();
-                        if (!try appendUTF8Text(path, out_script, true)) {
+                        if (!try builder.appendUTF8(path, true)) {
                            globalThis.throw("Shell script string contains invalid UTF-16", .{});
                            return false;
                        }
@@ -2537,7 +2733,7 @@ pub fn handleTemplateValue(
        }

        if (template_value.isString()) {
-            if (!try appendJSValueStr(globalThis, template_value, out_script, true)) {
+            if (!try builder.appendJSValueStr(template_value, true)) {
                globalThis.throw("Shell script string contains invalid UTF-16", .{});
                return false;
            }
@@ -2549,10 +2745,10 @@ pub fn handleTemplateValue(
            const last = array.len -| 1;
            var i: u32 = 0;
            while (array.next()) |arr| : (i += 1) {
-                if (!(try handleTemplateValue(globalThis, arr, out_jsobjs, out_script, jsobjref_buf))) return false;
+                if (!(try handleTemplateValue(globalThis, arr, out_jsobjs, out_script, jsstrings, jsobjref_buf))) return false;
                if (i < last) {
-                    const str = bun.String.init(" ");
-                    if (!try appendBunStr(str, out_script, false)) return false;
+                    const str = bun.String.static(" ");
+                    if (!try builder.appendBunStr(str, false)) return false;
                }
            }
            return true;
@@ -2562,7 +2758,7 @@ pub fn handleTemplateValue(
            if (template_value.getTruthy(globalThis, "raw")) |maybe_str| {
                const bunstr = maybe_str.toBunString(globalThis);
                defer bunstr.deref();
-                if (!try appendBunStr(bunstr, out_script, false)) {
+                if (!try builder.appendBunStr(bunstr, false)) {
                    globalThis.throw("Shell script string contains invalid UTF-16", .{});
                    return false;
                }
@@ -2571,7 +2767,7 @@ pub fn handleTemplateValue(
        }

        if (template_value.isPrimitive()) {
-            if (!try appendJSValueStr(globalThis, template_value, out_script, true)) {
+            if (!try builder.appendJSValueStr(template_value, true)) {
                globalThis.throw("Shell script string contains invalid UTF-16", .{});
                return false;
            }
@@ -2579,7 +2775,7 @@ pub fn handleTemplateValue(
        }

        if (template_value.implementsToString(globalThis)) {
-            if (!try appendJSValueStr(globalThis, template_value, out_script, true)) {
+            if (!try builder.appendJSValueStr(template_value, true)) {
                globalThis.throw("Shell script string contains invalid UTF-16", .{});
                return false;
            }
@@ -2593,57 +2789,127 @@ pub fn handleTemplateValue(
    return true;
 }

-/// This will disallow invalid surrogate pairs
-pub fn appendJSValueStr(globalThis: *JSC.JSGlobalObject, jsval: JSValue, outbuf: *std.ArrayList(u8), comptime allow_escape: bool) !bool {
-    const bunstr = jsval.toBunString(globalThis);
-    defer bunstr.deref();
+pub const ShellSrcBuilder = struct {
+    globalThis: *JSC.JSGlobalObject,
+    outbuf: *std.ArrayList(u8),
+    jsstrs_to_escape: *std.ArrayList(bun.String),
+    jsstr_ref_buf: [128]u8 = [_]u8{0} ** 128,

-    return try appendBunStr(bunstr, outbuf, allow_escape);
-}
-
-pub fn appendUTF8Text(slice: []const u8, outbuf: *std.ArrayList(u8), comptime allow_escape: bool) !bool {
-    if (!bun.simdutf.validate.utf8(slice)) {
-        return false;
+    pub fn init(
+        globalThis: *JSC.JSGlobalObject,
+        outbuf: *std.ArrayList(u8),
+        jsstrs_to_escape: *std.ArrayList(bun.String),
+    ) ShellSrcBuilder {
+        return .{
+            .globalThis = globalThis,
+            .outbuf = outbuf,
+            .jsstrs_to_escape = jsstrs_to_escape,
+        };
    }

-    if (allow_escape and needsEscape(slice)) {
-        try escape(slice, outbuf);
-    } else {
-        try outbuf.appendSlice(slice);
+    pub fn appendJSValueStr(this: *ShellSrcBuilder, jsval: JSValue, comptime allow_escape: bool) !bool {
+        const bunstr = jsval.toBunString(this.globalThis);
+        defer bunstr.deref();
+
+        return try this.appendBunStr(bunstr, allow_escape);
    }

-    return true;
-}
-
-pub fn appendBunStr(bunstr: bun.String, outbuf: *std.ArrayList(u8), comptime allow_escape: bool) !bool {
-    const str = bunstr.toUTF8WithoutRef(bun.default_allocator);
-    defer str.deinit();
-
-    // TODO: toUTF8 already validates. We shouldn't have to do this twice!
-    const is_ascii = str.isAllocated();
-    if (!is_ascii and !bun.simdutf.validate.utf8(str.slice())) {
-        return false;
+    pub fn appendBunStr(
+        this: *ShellSrcBuilder,
+        bunstr: bun.String,
+        comptime allow_escape: bool,
+    ) !bool {
+        const invalid = (bunstr.isUTF16() and !bun.simdutf.validate.utf16le(bunstr.utf16())) or (bunstr.isUTF8() and !bun.simdutf.validate.utf8(bunstr.byteSlice()));
+        if (invalid) return false;
+        if (allow_escape) {
+            if (needsEscapeBunstr(bunstr)) {
+                try this.appendJSStrRef(bunstr);
+                return true;
+            }
+        }
+        if (bunstr.isUTF16()) {
+            try this.appendUTF16Impl(bunstr.utf16());
+            return true;
+        }
+        if (bunstr.isUTF8() or bun.strings.isAllASCII(bunstr.byteSlice())) {
+            try this.appendUTF8Impl(bunstr.byteSlice());
+            return true;
+        }
+        try this.appendLatin1Impl(bunstr.byteSlice());
+        return true;
    }

-    if (allow_escape and needsEscape(str.slice())) {
-        try escape(str.slice(), outbuf);
-    } else {
-        try outbuf.appendSlice(str.slice());
+    pub fn appendUTF8(this: *ShellSrcBuilder, utf8: []const u8, comptime allow_escape: bool) !bool {
+        const invalid = bun.simdutf.validate.utf8(utf8);
+        if (!invalid) return false;
+        if (allow_escape) {
+            if (needsEscapeUtf8AsciiLatin1(utf8)) {
+                const bunstr = bun.String.createUTF8(utf8);
+                defer bunstr.deref();
+                try this.appendJSStrRef(bunstr);
+                return true;
+            }
+        }
+
+        try this.appendUTF8Impl(utf8);
+        return true;
    }

-    return true;
-}
+    pub fn appendUTF16Impl(this: *ShellSrcBuilder, utf16: []const u16) !void {
+        const size = bun.simdutf.simdutf__utf8_length_from_utf16le(utf16.ptr, utf16.len);
+        try this.outbuf.ensureUnusedCapacity(size);
+        try bun.strings.convertUTF16ToUTF8Append(this.outbuf, utf16);
+    }
+
+    pub fn appendUTF8Impl(this: *ShellSrcBuilder, utf8: []const u8) !void {
+        try this.outbuf.appendSlice(utf8);
+    }
+
+    pub fn appendLatin1Impl(this: *ShellSrcBuilder, latin1: []const u8) !void {
+        const non_ascii_idx = bun.strings.firstNonASCII(latin1) orelse 0;
+
+        if (non_ascii_idx > 0) {
+            try this.appendUTF8Impl(latin1[0..non_ascii_idx]);
+        }
+
+        this.outbuf.* = try bun.strings.allocateLatin1IntoUTF8WithList(this.outbuf.*, this.outbuf.items.len, []const u8, latin1);
+    }
+
+    pub fn appendJSStrRef(this: *ShellSrcBuilder, bunstr: bun.String) !void {
+        const idx = this.jsstrs_to_escape.items.len;
+        const str = std.fmt.bufPrint(this.jsstr_ref_buf[0..], "{s}{d}", .{ LEX_JS_STRING_PREFIX, idx }) catch {
+            @panic("Impossible");
+        };
+        try this.outbuf.appendSlice(str);
+        bunstr.ref();
+        try this.jsstrs_to_escape.append(bunstr);
+    }
+};

 /// Characters that need to escaped
-const SPECIAL_CHARS = [_]u8{ '$', '>', '&', '|', '=', ';', '\n', '{', '}', ',', '(', ')', '\\', '\"', ' ' };
+const SPECIAL_CHARS = [_]u8{ '$', '>', '&', '|', '=', ';', '\n', '{', '}', ',', '(', ')', '\\', '\"', ' ', '\'' };
 /// Characters that need to be backslashed inside double quotes
 const BACKSLASHABLE_CHARS = [_]u8{ '$', '`', '"', '\\' };

-/// assumes WTF-8
-pub fn escape(str: []const u8, outbuf: *std.ArrayList(u8)) !void {
+pub fn escapeBunStr(bunstr: bun.String, outbuf: *std.ArrayList(u8), comptime add_quotes: bool) !bool {
+    // latin-1 or ascii
+    if (bunstr.is8Bit()) {
+        try escape8Bit(bunstr.byteSlice(), outbuf, add_quotes);
+        return true;
+    }
+    if (bunstr.isUTF16()) {
+        return try escapeUtf16(bunstr.utf16(), outbuf, add_quotes);
+    }
+    // Otherwise is utf-8
+    try escapeWTF8(bunstr.byteSlice(), outbuf, add_quotes);
+    return true;
+}
+
+/// works for latin-1 and ascii
+pub fn escape8Bit(str: []const u8, outbuf: *std.ArrayList(u8), comptime add_quotes: bool) !void {
    try outbuf.ensureUnusedCapacity(str.len);

-    try outbuf.append('\"');
+    if (add_quotes) try outbuf.append('\"');

    loop: for (str) |c| {
        inline for (BACKSLASHABLE_CHARS) |spc| {
@@ -2658,15 +2924,15 @@ pub fn escape(str: []const u8, outbuf: *std.ArrayList(u8)) !void {
        try outbuf.append(c);
    }

-    try outbuf.append('\"');
+    if (add_quotes) try outbuf.append('\"');
 }

-pub fn escapeUnicode(str: []const u8, outbuf: *std.ArrayList(u8)) !void {
+pub fn escapeWTF8(str: []const u8, outbuf: *std.ArrayList(u8), comptime add_quotes: bool) !void {
    try outbuf.ensureUnusedCapacity(str.len);

    var bytes: [8]u8 = undefined;
-    var n = bun.strings.encodeWTF8Rune(bytes[0..4], '"');
-    try outbuf.appendSlice(bytes[0..n]);
+    var n: u3 = if (add_quotes) bun.strings.encodeWTF8Rune(bytes[0..4], '"') else 0;
+    if (add_quotes) try outbuf.appendSlice(bytes[0..n]);

    loop: for (str) |c| {
        inline for (BACKSLASHABLE_CHARS) |spc| {
@@ -2686,18 +2952,84 @@ pub fn escapeUnicode(str: []const u8, outbuf: *std.ArrayList(u8)) !void {
        try outbuf.appendSlice(bytes[0..n]);
    }

-    n = bun.strings.encodeWTF8Rune(bytes[0..4], '"');
-    try outbuf.appendSlice(bytes[0..n]);
+    if (add_quotes) {
+        n = bun.strings.encodeWTF8Rune(bytes[0..4], '"');
+        try outbuf.appendSlice(bytes[0..n]);
+    }
+}
+
+pub fn escapeUtf16(str: []const u16, outbuf: *std.ArrayList(u8), comptime add_quotes: bool) !bool {
+    if (add_quotes) try outbuf.append('"');
+
+    const non_ascii = bun.strings.firstNonASCII16([]const u16, str) orelse 0;
+    var cp_buf: [4]u8 = undefined;
+
+    var i: usize = 0;
+    loop: while (i < str.len) {
+        const char: u32 = brk: {
+            if (i < non_ascii) {
+                i += 1;
+                break :brk str[i];
+            }
+            const ret = bun.strings.utf16Codepoint([]const u16, str[i..]);
+            if (ret.fail) return false;
+            i += ret.len;
+            break :brk ret.code_point;
+        };
+
+        inline for (BACKSLASHABLE_CHARS) |bchar| {
+            if (@as(u32, @intCast(bchar)) == char) {
+                try outbuf.appendSlice(&[_]u8{ '\\', @intCast(char) });
+                continue :loop;
+            }
+        }
+
+        const len = bun.strings.encodeWTF8RuneT(&cp_buf, u32, char);
+        try outbuf.appendSlice(cp_buf[0..len]);
+    }
+    if (add_quotes) try outbuf.append('"');
+    return true;
+}
+
+pub fn needsEscapeBunstr(bunstr: bun.String) bool {
+    if (bunstr.isUTF16()) return needsEscapeUTF16(bunstr.utf16());
+    // Otherwise is utf-8, ascii, or latin-1
+    return needsEscapeUtf8AsciiLatin1(bunstr.byteSlice());
+}
+
+pub fn needsEscapeUTF16Slow(str: []const u16) bool {
+    for (str) |codeunit| {
+        inline for (SPECIAL_CHARS) |spc| {
+            if (@as(u16, @intCast(spc)) == codeunit) return true;
+        }
+    }
+
+    return false;
 }

 pub fn needsEscapeUTF16(str: []const u16) bool {
-    for (str) |char| {
-        switch (char) {
-            '$', '>', '&', '|', '=', ';', '\n', '{', '}', ',', '(', ')', '\\', '\"', ' ' => return true,
-            else => {},
+    if (str.len < 64) return needsEscapeUTF16Slow(str);
+
+    const needles = comptime brk: {
+        var needles: [SPECIAL_CHARS.len]@Vector(8, u16) = undefined;
+        for (SPECIAL_CHARS, 0..) |c, i| {
+            needles[i] = @splat(@as(u16, @intCast(c)));
+        }
+        break :brk needles;
+    };
+
+    var i: usize = 0;
+    while (i + 8 <= str.len) : (i += 8) {
+        const haystack: @Vector(8, u16) = str[i..][0..8].*;
+
+        inline for (needles) |needle| {
+            const result = haystack == needle;
+            if (std.simd.firstTrue(result) != null) return true;
        }
    }

+    if (i < str.len) return needsEscapeUTF16Slow(str[i..]);
+
    return false;
 }

@@ -2705,8 +3037,8 @@ pub fn needsEscapeUTF16(str: []const u16) bool {
 /// indicates the *possibility* that the string must be escaped, so it can have
 /// false positives, but it is faster than running the shell lexer through the
 /// input string for a more correct implementation.
-pub fn needsEscape(str: []const u8) bool {
-    if (str.len < 128) return needsEscapeSlow(str);
+pub fn needsEscapeUtf8AsciiLatin1(str: []const u8) bool {
+    if (str.len < 128) return needsEscapeUtf8AsciiLatin1Slow(str);

    const needles = comptime brk: {
        var needles: [SPECIAL_CHARS.len]@Vector(16, u8) = undefined;
@@ -2726,12 +3058,12 @@ pub fn needsEscape(str: []const u8) bool {
        }
    }

-    if (i < str.len) return needsEscapeSlow(str[i..]);
+    if (i < str.len) return needsEscapeUtf8AsciiLatin1Slow(str[i..]);

    return false;
 }

-pub fn needsEscapeSlow(str: []const u8) bool {
+pub fn needsEscapeUtf8AsciiLatin1Slow(str: []const u8) bool {
    for (str) |c| {
        inline for (SPECIAL_CHARS) |spc| {
            if (spc == c) return true;