Fix backtick escaping and add more tests (#10980)

Co-authored-by: zackradisic <zackradisic@users.noreply.github.com> Co-authored-by: Georgijs <48869301+gvilums@users.noreply.github.com> Co-authored-by: Jarred Sumner <jarred@jarredsumner.com>
2026-02-09 10:28:47 +00:00 · 2024-05-14 23:23:12 +02:00
parent 8fbdf32d74
commit 60482b6e42
3 changed files with 108 additions and 62 deletions
--- a/src/shell/shell.zig
+++ b/src/shell/shell.zig
@@ -2345,6 +2345,8 @@ pub fn NewLexer(comptime encoding: StringEncoding) type {
                    switch (char) {
                        // possibly double bracket open
                        '[' => {
+                            comptime assertSpecialChar('[');
+
                            if (self.chars.state == .Single or self.chars.state == .Double) break :escaped;
                            if (self.peek()) |p| {
                                if (p.escaped or p.char != '[') break :escaped;
@@ -2371,6 +2373,8 @@ pub fn NewLexer(comptime encoding: StringEncoding) type {
                            break :escaped;
                        },
                        ']' => {
+                            comptime assertSpecialChar(']');
+
                            if (self.chars.state == .Single or self.chars.state == .Double) break :escaped;
                            if (self.peek()) |p| {
                                if (p.escaped or p.char != ']') break :escaped;
@@ -2398,6 +2402,8 @@ pub fn NewLexer(comptime encoding: StringEncoding) type {
                        },

                        '#' => {
+                            comptime assertSpecialChar('#');
+
                            if (self.chars.state == .Single or self.chars.state == .Double) break :escaped;
                            const whitespace_preceding =
                                if (self.chars.prev) |prev|
@@ -2410,12 +2416,16 @@ pub fn NewLexer(comptime encoding: StringEncoding) type {
                            continue;
                        },
                        ';' => {
+                            comptime assertSpecialChar(';');
+
                            if (self.chars.state == .Single or self.chars.state == .Double) break :escaped;
                            try self.break_word(true);
                            try self.tokens.append(.Semicolon);
                            continue;
                        },
                        '\n' => {
+                            comptime assertSpecialChar('\n');
+
                            if (self.chars.state == .Single or self.chars.state == .Double) break :escaped;
                            try self.break_word_impl(true, true, false);
                            try self.tokens.append(.Newline);
@@ -2424,6 +2434,8 @@ pub fn NewLexer(comptime encoding: StringEncoding) type {

                        // glob asterisks
                        '*' => {
+                            comptime assertSpecialChar('*');
+
                            if (self.chars.state == .Single or self.chars.state == .Double) break :escaped;
                            if (self.peek()) |next| {
                                if (!next.escaped and next.char == '*') {
@@ -2440,18 +2452,24 @@ pub fn NewLexer(comptime encoding: StringEncoding) type {

                        // brace expansion syntax
                        '{' => {
+                            comptime assertSpecialChar('{');
+
                            if (self.chars.state == .Single or self.chars.state == .Double) break :escaped;
                            try self.break_word(false);
                            try self.tokens.append(.BraceBegin);
                            continue;
                        },
                        ',' => {
+                            comptime assertSpecialChar(',');
+
                            if (self.chars.state == .Single or self.chars.state == .Double) break :escaped;
                            try self.break_word(false);
                            try self.tokens.append(.Comma);
                            continue;
                        },
                        '}' => {
+                            comptime assertSpecialChar('}');
+
                            if (self.chars.state == .Single or self.chars.state == .Double) break :escaped;
                            try self.break_word(false);
                            try self.tokens.append(.BraceEnd);
@@ -2460,6 +2478,8 @@ pub fn NewLexer(comptime encoding: StringEncoding) type {

                        // Command substitution
                        '`' => {
+                            comptime assertSpecialChar('`');
+
                            if (self.chars.state == .Single) break :escaped;
                            if (self.in_subshell == .backtick) {
                                try self.break_word(true);
@@ -2474,6 +2494,8 @@ pub fn NewLexer(comptime encoding: StringEncoding) type {
                        },
                        // Command substitution/vars
                        '$' => {
+                            comptime assertSpecialChar('$');
+
                            if (self.chars.state == .Single) break :escaped;

                            const peeked = self.peek() orelse InputChar{ .char = 0 };
@@ -2509,12 +2531,16 @@ pub fn NewLexer(comptime encoding: StringEncoding) type {
                            continue;
                        },
                        '(' => {
+                            comptime assertSpecialChar('(');
+
                            if (self.chars.state == .Single or self.chars.state == .Double) break :escaped;
                            try self.break_word(true);
                            try self.eat_subshell(.normal);
                            continue;
                        },
                        ')' => {
+                            comptime assertSpecialChar(')');
+
                            if (self.chars.state == .Single or self.chars.state == .Double) break :escaped;
                            if (self.in_subshell != .dollar and self.in_subshell != .normal) {
                                self.add_error("Unexpected ')'");
@@ -2544,6 +2570,8 @@ pub fn NewLexer(comptime encoding: StringEncoding) type {
                        },

                        '0'...'9' => {
+                            comptime for ('0'..'9') |c| assertSpecialChar(c);
+
                            if (self.chars.state != .Normal) break :escaped;
                            const snapshot = self.make_snapshot();
                            if (self.eat_redirect(input)) |redirect| {
@@ -2557,6 +2585,8 @@ pub fn NewLexer(comptime encoding: StringEncoding) type {

                        // Operators
                        '|' => {
+                            comptime assertSpecialChar('|');
+
                            if (self.chars.state == .Single or self.chars.state == .Double) break :escaped;
                            try self.break_word(true);

@@ -2577,6 +2607,8 @@ pub fn NewLexer(comptime encoding: StringEncoding) type {
                            continue;
                        },
                        '>' => {
+                            comptime assertSpecialChar('>');
+
                            if (self.chars.state == .Single or self.chars.state == .Double) break :escaped;
                            try self.break_word_impl(true, false, true);
                            const redirect = self.eat_simple_redirect(.out);
@@ -2584,6 +2616,8 @@ pub fn NewLexer(comptime encoding: StringEncoding) type {
                            continue;
                        },
                        '<' => {
+                            comptime assertSpecialChar('<');
+
                            if (self.chars.state == .Single or self.chars.state == .Double) break :escaped;
                            try self.break_word_impl(true, false, true);
                            const redirect = self.eat_simple_redirect(.in);
@@ -2591,6 +2625,8 @@ pub fn NewLexer(comptime encoding: StringEncoding) type {
                            continue;
                        },
                        '&' => {
+                            comptime assertSpecialChar('&');
+
                            if (self.chars.state == .Single or self.chars.state == .Double) break :escaped;
                            try self.break_word(true);

@@ -2619,6 +2655,8 @@ pub fn NewLexer(comptime encoding: StringEncoding) type {

                        // 2. State switchers
                        '\'' => {
+                            comptime assertSpecialChar('\'');
+
                            if (self.chars.state == .Single) {
                                self.chars.state = .Normal;
                                continue;
@@ -2630,6 +2668,8 @@ pub fn NewLexer(comptime encoding: StringEncoding) type {
                            break :escaped;
                        },
                        '"' => {
+                            comptime assertSpecialChar('"');
+
                            if (self.chars.state == .Single) break :escaped;
                            if (self.chars.state == .Normal) {
                                try self.break_word(false);
@@ -2644,6 +2684,8 @@ pub fn NewLexer(comptime encoding: StringEncoding) type {

                        // 3. Word breakers
                        ' ' => {
+                            comptime assertSpecialChar(' ');
+
                            if (self.chars.state == .Normal) {
                                try self.break_word_impl(true, true, false);
                                continue;
@@ -3961,7 +4003,18 @@ pub const ShellSrcBuilder = struct {
 };

 /// Characters that need to escaped
-const SPECIAL_CHARS = [_]u8{ '$', '>', '&', '|', '=', ';', '\n', '{', '}', ',', '(', ')', '\\', '\"', ' ', '\'' };
+const SPECIAL_CHARS = [_]u8{ '~', '[', ']', '#', ';', '\n', '*', '{', ',', '}', '`', '$', '=', '(', ')', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '|', '>', '<', '&', '\'', '"', ' ', '\\' };
+const SPECIAL_CHARS_TABLE: std.bit_set.IntegerBitSet(256) = brk: {
+    var table = std.bit_set.IntegerBitSet(256).initEmpty();
+    for (SPECIAL_CHARS) |c| {
+        table.set(c);
+    }
+    break :brk table;
+};
+pub fn assertSpecialChar(c: u8) void {
+    comptime bun.assert(@inComptime());
+    bun.assert(SPECIAL_CHARS_TABLE.isSet(c));
+}
 /// Characters that need to be backslashed inside double quotes
 const BACKSLASHABLE_CHARS = [_]u8{ '$', '`', '"', '\\' };

@@ -4070,39 +4123,11 @@ pub fn needsEscapeBunstr(bunstr: bun.String) bool {
    return needsEscapeUtf8AsciiLatin1(bunstr.byteSlice());
 }

-pub fn needsEscapeUTF16Slow(str: []const u16) bool {
-    for (str) |codeunit| {
-        inline for (SPECIAL_CHARS) |spc| {
-            if (@as(u16, @intCast(spc)) == codeunit) return true;
-        }
-    }
-
-    return false;
-}
-
 pub fn needsEscapeUTF16(str: []const u16) bool {
-    if (str.len < 64) return needsEscapeUTF16Slow(str);
-
-    const needles = comptime brk: {
-        var needles: [SPECIAL_CHARS.len]@Vector(8, u16) = undefined;
-        for (SPECIAL_CHARS, 0..) |c, i| {
-            needles[i] = @splat(@as(u16, @intCast(c)));
-        }
-        break :brk needles;
-    };
-
-    var i: usize = 0;
-    while (i + 8 <= str.len) : (i += 8) {
-        const haystack: @Vector(8, u16) = str[i..][0..8].*;
-
-        inline for (needles) |needle| {
-            const result = haystack == needle;
-            if (std.simd.firstTrue(result) != null) return true;
-        }
+    for (str) |codeunit| {
+        if (codeunit < 0xff and SPECIAL_CHARS_TABLE.isSet(codeunit)) return true;
    }

-    if (i < str.len) return needsEscapeUTF16Slow(str[i..]);
-
    return false;
 }

@@ -4111,36 +4136,8 @@ pub fn needsEscapeUTF16(str: []const u16) bool {
 /// false positives, but it is faster than running the shell lexer through the
 /// input string for a more correct implementation.
 pub fn needsEscapeUtf8AsciiLatin1(str: []const u8) bool {
-    if (str.len < 128) return needsEscapeUtf8AsciiLatin1Slow(str);
-
-    const needles = comptime brk: {
-        var needles: [SPECIAL_CHARS.len]@Vector(16, u8) = undefined;
-        for (SPECIAL_CHARS, 0..) |c, i| {
-            needles[i] = @splat(c);
-        }
-        break :brk needles;
-    };
-
-    var i: usize = 0;
-    while (i + 16 <= str.len) : (i += 16) {
-        const haystack: @Vector(16, u8) = str[i..][0..16].*;
-
-        inline for (needles) |needle| {
-            const result = haystack == needle;
-            if (std.simd.firstTrue(result) != null) return true;
-        }
-    }
-
-    if (i < str.len) return needsEscapeUtf8AsciiLatin1Slow(str[i..]);
-
-    return false;
-}
-
-pub fn needsEscapeUtf8AsciiLatin1Slow(str: []const u8) bool {
    for (str) |c| {
-        inline for (SPECIAL_CHARS) |spc| {
-            if (spc == c) return true;
-        }
+        if (SPECIAL_CHARS_TABLE.isSet(c)) return true;
    }
    return false;
 }
--- a/test/js/bun/shell/bunshell.test.ts
+++ b/test/js/bun/shell/bunshell.test.ts
@@ -742,7 +742,56 @@ ${temp_dir}`
  /**
   *
   */
-  describe("escaping", () => {});
+  describe("escaping", () => {
+    // Testing characters that need special handling when not quoted or in different contexts
+    TestBuilder.command`echo ${"$"}`.stdout("$\n").runAsTest("dollar");
+    TestBuilder.command`echo ${">"}`.stdout(">\n").runAsTest("right_arrow");
+    TestBuilder.command`echo ${"&"}`.stdout("&\n").runAsTest("ampersand");
+    TestBuilder.command`echo ${"|"}`.stdout("|\n").runAsTest("pipe");
+    TestBuilder.command`echo ${"="}`.stdout("=\n").runAsTest("equals");
+    TestBuilder.command`echo ${";"}`.stdout(";\n").runAsTest("semicolon");
+    TestBuilder.command`echo ${"\n"}`.stdout("\n\n").runAsTest("newline");
+    TestBuilder.command`echo ${"{"}`.stdout("{\n").runAsTest("left_brace");
+    TestBuilder.command`echo ${"}"}`.stdout("}\n").runAsTest("right_brace");
+    TestBuilder.command`echo ${","}`.stdout(",\n").runAsTest("comma");
+    TestBuilder.command`echo ${"("}`.stdout("(\n").runAsTest("left_parenthesis");
+    TestBuilder.command`echo ${")"}`.stdout(")\n").runAsTest("right_parenthesis");
+    TestBuilder.command`echo ${"\\"}`.stdout("\\\n").runAsTest("backslash");
+    TestBuilder.command`echo ${" "}`.stdout(" \n").runAsTest("space");
+    TestBuilder.command`echo ${"'hello'"}`.stdout("'hello'\n").runAsTest("single_quote");
+    TestBuilder.command`echo ${'"hello"'}`.stdout('"hello"\n').runAsTest("double_quote");
+    TestBuilder.command`echo ${"`hello`"}`.stdout("`hello`\n").runAsTest("backtick");
+
+    // Testing characters that need to be escaped within double quotes
+    TestBuilder.command`echo "${"$"}"`.stdout("$\n").runAsTest("dollar_in_dquotes");
+    TestBuilder.command`echo "${"`"}"`.stdout("`\n").runAsTest("backtick_in_dquotes");
+    TestBuilder.command`echo "${'"'}"`.stdout('"\n').runAsTest("double_quote_in_dquotes");
+    TestBuilder.command`echo "${"\\"}"`.stdout("\\\n").runAsTest("backslash_in_dquotes");
+
+    // Testing characters that need to be escaped within single quotes
+    TestBuilder.command`echo '${"$"}'`.stdout("$\n").runAsTest("dollar_in_squotes");
+    TestBuilder.command`echo '${'"'}'`.stdout('"\n').runAsTest("double_quote_in_squotes");
+    TestBuilder.command`echo '${"`"}'`.stdout("`\n").runAsTest("backtick_in_squotes");
+    TestBuilder.command`echo '${"\\\\"}'`.stdout("\\\\\n").runAsTest("backslash_in_squotes");
+
+    // Ensure that backslash escapes within single quotes are treated literally
+    TestBuilder.command`echo '${"\\"}'`.stdout("\\\n").runAsTest("literal_backslash_single_quote");
+    TestBuilder.command`echo '${"\\\\"}'`.stdout("\\\\\n").runAsTest("double_backslash_single_quote");
+
+    // Edge cases with mixed quotes
+    TestBuilder.command`echo "'\${"$"}'"`.stdout("'${$}'\n").runAsTest("mixed_quotes_dollar");
+    TestBuilder.command`echo '"${"`"}"'`.stdout('"`"\n').runAsTest("mixed_quotes_backtick");
+
+    // Compound command with special characters
+    TestBuilder.command`echo ${"hello; echo world"}`.stdout("hello; echo world\n").runAsTest("compound_command");
+    TestBuilder.command`echo ${"hello > world"}`.stdout("hello > world\n").runAsTest("redirect_in_echo");
+    TestBuilder.command`echo ${"$(echo nested)"}`.stdout("$(echo nested)\n").runAsTest("nested_command_substitution");
+
+    // Pathological cases involving multiple special characters
+    TestBuilder.command`echo ${"complex > command; $(execute)"}`
+      .stdout("complex > command; $(execute)\n")
+      .runAsTest("complex_mixed_special_chars");
+  });
 });

 describe("deno_task", () => {
--- a/test/js/bun/shell/env.positionals.test.ts
+++ b/test/js/bun/shell/env.positionals.test.ts
@@ -9,7 +9,7 @@ $.nothrow();
 describe("$ argv", async () => {
  for (let i = 0; i < process.argv.length; i++) {
    const element = process.argv[i];
-    TestBuilder.command`echo $${i}`
+    TestBuilder.command`echo $${{ raw: i }}`
      .exitCode(0)
      .stdout(process.argv[i] + "\n")
      .runAsTest(`$${i} should equal process.argv[${i}]`);