mirror of
https://github.com/oven-sh/bun
synced 2026-02-09 10:28:47 +00:00
Fix backtick escaping and add more tests (#10980)
Co-authored-by: zackradisic <zackradisic@users.noreply.github.com> Co-authored-by: Georgijs <48869301+gvilums@users.noreply.github.com> Co-authored-by: Jarred Sumner <jarred@jarredsumner.com>
This commit is contained in:
@@ -2345,6 +2345,8 @@ pub fn NewLexer(comptime encoding: StringEncoding) type {
|
||||
switch (char) {
|
||||
// possibly double bracket open
|
||||
'[' => {
|
||||
comptime assertSpecialChar('[');
|
||||
|
||||
if (self.chars.state == .Single or self.chars.state == .Double) break :escaped;
|
||||
if (self.peek()) |p| {
|
||||
if (p.escaped or p.char != '[') break :escaped;
|
||||
@@ -2371,6 +2373,8 @@ pub fn NewLexer(comptime encoding: StringEncoding) type {
|
||||
break :escaped;
|
||||
},
|
||||
']' => {
|
||||
comptime assertSpecialChar(']');
|
||||
|
||||
if (self.chars.state == .Single or self.chars.state == .Double) break :escaped;
|
||||
if (self.peek()) |p| {
|
||||
if (p.escaped or p.char != ']') break :escaped;
|
||||
@@ -2398,6 +2402,8 @@ pub fn NewLexer(comptime encoding: StringEncoding) type {
|
||||
},
|
||||
|
||||
'#' => {
|
||||
comptime assertSpecialChar('#');
|
||||
|
||||
if (self.chars.state == .Single or self.chars.state == .Double) break :escaped;
|
||||
const whitespace_preceding =
|
||||
if (self.chars.prev) |prev|
|
||||
@@ -2410,12 +2416,16 @@ pub fn NewLexer(comptime encoding: StringEncoding) type {
|
||||
continue;
|
||||
},
|
||||
';' => {
|
||||
comptime assertSpecialChar(';');
|
||||
|
||||
if (self.chars.state == .Single or self.chars.state == .Double) break :escaped;
|
||||
try self.break_word(true);
|
||||
try self.tokens.append(.Semicolon);
|
||||
continue;
|
||||
},
|
||||
'\n' => {
|
||||
comptime assertSpecialChar('\n');
|
||||
|
||||
if (self.chars.state == .Single or self.chars.state == .Double) break :escaped;
|
||||
try self.break_word_impl(true, true, false);
|
||||
try self.tokens.append(.Newline);
|
||||
@@ -2424,6 +2434,8 @@ pub fn NewLexer(comptime encoding: StringEncoding) type {
|
||||
|
||||
// glob asterisks
|
||||
'*' => {
|
||||
comptime assertSpecialChar('*');
|
||||
|
||||
if (self.chars.state == .Single or self.chars.state == .Double) break :escaped;
|
||||
if (self.peek()) |next| {
|
||||
if (!next.escaped and next.char == '*') {
|
||||
@@ -2440,18 +2452,24 @@ pub fn NewLexer(comptime encoding: StringEncoding) type {
|
||||
|
||||
// brace expansion syntax
|
||||
'{' => {
|
||||
comptime assertSpecialChar('{');
|
||||
|
||||
if (self.chars.state == .Single or self.chars.state == .Double) break :escaped;
|
||||
try self.break_word(false);
|
||||
try self.tokens.append(.BraceBegin);
|
||||
continue;
|
||||
},
|
||||
',' => {
|
||||
comptime assertSpecialChar(',');
|
||||
|
||||
if (self.chars.state == .Single or self.chars.state == .Double) break :escaped;
|
||||
try self.break_word(false);
|
||||
try self.tokens.append(.Comma);
|
||||
continue;
|
||||
},
|
||||
'}' => {
|
||||
comptime assertSpecialChar('}');
|
||||
|
||||
if (self.chars.state == .Single or self.chars.state == .Double) break :escaped;
|
||||
try self.break_word(false);
|
||||
try self.tokens.append(.BraceEnd);
|
||||
@@ -2460,6 +2478,8 @@ pub fn NewLexer(comptime encoding: StringEncoding) type {
|
||||
|
||||
// Command substitution
|
||||
'`' => {
|
||||
comptime assertSpecialChar('`');
|
||||
|
||||
if (self.chars.state == .Single) break :escaped;
|
||||
if (self.in_subshell == .backtick) {
|
||||
try self.break_word(true);
|
||||
@@ -2474,6 +2494,8 @@ pub fn NewLexer(comptime encoding: StringEncoding) type {
|
||||
},
|
||||
// Command substitution/vars
|
||||
'$' => {
|
||||
comptime assertSpecialChar('$');
|
||||
|
||||
if (self.chars.state == .Single) break :escaped;
|
||||
|
||||
const peeked = self.peek() orelse InputChar{ .char = 0 };
|
||||
@@ -2509,12 +2531,16 @@ pub fn NewLexer(comptime encoding: StringEncoding) type {
|
||||
continue;
|
||||
},
|
||||
'(' => {
|
||||
comptime assertSpecialChar('(');
|
||||
|
||||
if (self.chars.state == .Single or self.chars.state == .Double) break :escaped;
|
||||
try self.break_word(true);
|
||||
try self.eat_subshell(.normal);
|
||||
continue;
|
||||
},
|
||||
')' => {
|
||||
comptime assertSpecialChar(')');
|
||||
|
||||
if (self.chars.state == .Single or self.chars.state == .Double) break :escaped;
|
||||
if (self.in_subshell != .dollar and self.in_subshell != .normal) {
|
||||
self.add_error("Unexpected ')'");
|
||||
@@ -2544,6 +2570,8 @@ pub fn NewLexer(comptime encoding: StringEncoding) type {
|
||||
},
|
||||
|
||||
'0'...'9' => {
|
||||
comptime for ('0'..'9') |c| assertSpecialChar(c);
|
||||
|
||||
if (self.chars.state != .Normal) break :escaped;
|
||||
const snapshot = self.make_snapshot();
|
||||
if (self.eat_redirect(input)) |redirect| {
|
||||
@@ -2557,6 +2585,8 @@ pub fn NewLexer(comptime encoding: StringEncoding) type {
|
||||
|
||||
// Operators
|
||||
'|' => {
|
||||
comptime assertSpecialChar('|');
|
||||
|
||||
if (self.chars.state == .Single or self.chars.state == .Double) break :escaped;
|
||||
try self.break_word(true);
|
||||
|
||||
@@ -2577,6 +2607,8 @@ pub fn NewLexer(comptime encoding: StringEncoding) type {
|
||||
continue;
|
||||
},
|
||||
'>' => {
|
||||
comptime assertSpecialChar('>');
|
||||
|
||||
if (self.chars.state == .Single or self.chars.state == .Double) break :escaped;
|
||||
try self.break_word_impl(true, false, true);
|
||||
const redirect = self.eat_simple_redirect(.out);
|
||||
@@ -2584,6 +2616,8 @@ pub fn NewLexer(comptime encoding: StringEncoding) type {
|
||||
continue;
|
||||
},
|
||||
'<' => {
|
||||
comptime assertSpecialChar('<');
|
||||
|
||||
if (self.chars.state == .Single or self.chars.state == .Double) break :escaped;
|
||||
try self.break_word_impl(true, false, true);
|
||||
const redirect = self.eat_simple_redirect(.in);
|
||||
@@ -2591,6 +2625,8 @@ pub fn NewLexer(comptime encoding: StringEncoding) type {
|
||||
continue;
|
||||
},
|
||||
'&' => {
|
||||
comptime assertSpecialChar('&');
|
||||
|
||||
if (self.chars.state == .Single or self.chars.state == .Double) break :escaped;
|
||||
try self.break_word(true);
|
||||
|
||||
@@ -2619,6 +2655,8 @@ pub fn NewLexer(comptime encoding: StringEncoding) type {
|
||||
|
||||
// 2. State switchers
|
||||
'\'' => {
|
||||
comptime assertSpecialChar('\'');
|
||||
|
||||
if (self.chars.state == .Single) {
|
||||
self.chars.state = .Normal;
|
||||
continue;
|
||||
@@ -2630,6 +2668,8 @@ pub fn NewLexer(comptime encoding: StringEncoding) type {
|
||||
break :escaped;
|
||||
},
|
||||
'"' => {
|
||||
comptime assertSpecialChar('"');
|
||||
|
||||
if (self.chars.state == .Single) break :escaped;
|
||||
if (self.chars.state == .Normal) {
|
||||
try self.break_word(false);
|
||||
@@ -2644,6 +2684,8 @@ pub fn NewLexer(comptime encoding: StringEncoding) type {
|
||||
|
||||
// 3. Word breakers
|
||||
' ' => {
|
||||
comptime assertSpecialChar(' ');
|
||||
|
||||
if (self.chars.state == .Normal) {
|
||||
try self.break_word_impl(true, true, false);
|
||||
continue;
|
||||
@@ -3961,7 +4003,18 @@ pub const ShellSrcBuilder = struct {
|
||||
};
|
||||
|
||||
/// Characters that need to escaped
|
||||
const SPECIAL_CHARS = [_]u8{ '$', '>', '&', '|', '=', ';', '\n', '{', '}', ',', '(', ')', '\\', '\"', ' ', '\'' };
|
||||
const SPECIAL_CHARS = [_]u8{ '~', '[', ']', '#', ';', '\n', '*', '{', ',', '}', '`', '$', '=', '(', ')', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '|', '>', '<', '&', '\'', '"', ' ', '\\' };
|
||||
const SPECIAL_CHARS_TABLE: std.bit_set.IntegerBitSet(256) = brk: {
|
||||
var table = std.bit_set.IntegerBitSet(256).initEmpty();
|
||||
for (SPECIAL_CHARS) |c| {
|
||||
table.set(c);
|
||||
}
|
||||
break :brk table;
|
||||
};
|
||||
pub fn assertSpecialChar(c: u8) void {
|
||||
comptime bun.assert(@inComptime());
|
||||
bun.assert(SPECIAL_CHARS_TABLE.isSet(c));
|
||||
}
|
||||
/// Characters that need to be backslashed inside double quotes
|
||||
const BACKSLASHABLE_CHARS = [_]u8{ '$', '`', '"', '\\' };
|
||||
|
||||
@@ -4070,39 +4123,11 @@ pub fn needsEscapeBunstr(bunstr: bun.String) bool {
|
||||
return needsEscapeUtf8AsciiLatin1(bunstr.byteSlice());
|
||||
}
|
||||
|
||||
pub fn needsEscapeUTF16Slow(str: []const u16) bool {
|
||||
for (str) |codeunit| {
|
||||
inline for (SPECIAL_CHARS) |spc| {
|
||||
if (@as(u16, @intCast(spc)) == codeunit) return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
pub fn needsEscapeUTF16(str: []const u16) bool {
|
||||
if (str.len < 64) return needsEscapeUTF16Slow(str);
|
||||
|
||||
const needles = comptime brk: {
|
||||
var needles: [SPECIAL_CHARS.len]@Vector(8, u16) = undefined;
|
||||
for (SPECIAL_CHARS, 0..) |c, i| {
|
||||
needles[i] = @splat(@as(u16, @intCast(c)));
|
||||
}
|
||||
break :brk needles;
|
||||
};
|
||||
|
||||
var i: usize = 0;
|
||||
while (i + 8 <= str.len) : (i += 8) {
|
||||
const haystack: @Vector(8, u16) = str[i..][0..8].*;
|
||||
|
||||
inline for (needles) |needle| {
|
||||
const result = haystack == needle;
|
||||
if (std.simd.firstTrue(result) != null) return true;
|
||||
}
|
||||
for (str) |codeunit| {
|
||||
if (codeunit < 0xff and SPECIAL_CHARS_TABLE.isSet(codeunit)) return true;
|
||||
}
|
||||
|
||||
if (i < str.len) return needsEscapeUTF16Slow(str[i..]);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -4111,36 +4136,8 @@ pub fn needsEscapeUTF16(str: []const u16) bool {
|
||||
/// false positives, but it is faster than running the shell lexer through the
|
||||
/// input string for a more correct implementation.
|
||||
pub fn needsEscapeUtf8AsciiLatin1(str: []const u8) bool {
|
||||
if (str.len < 128) return needsEscapeUtf8AsciiLatin1Slow(str);
|
||||
|
||||
const needles = comptime brk: {
|
||||
var needles: [SPECIAL_CHARS.len]@Vector(16, u8) = undefined;
|
||||
for (SPECIAL_CHARS, 0..) |c, i| {
|
||||
needles[i] = @splat(c);
|
||||
}
|
||||
break :brk needles;
|
||||
};
|
||||
|
||||
var i: usize = 0;
|
||||
while (i + 16 <= str.len) : (i += 16) {
|
||||
const haystack: @Vector(16, u8) = str[i..][0..16].*;
|
||||
|
||||
inline for (needles) |needle| {
|
||||
const result = haystack == needle;
|
||||
if (std.simd.firstTrue(result) != null) return true;
|
||||
}
|
||||
}
|
||||
|
||||
if (i < str.len) return needsEscapeUtf8AsciiLatin1Slow(str[i..]);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
pub fn needsEscapeUtf8AsciiLatin1Slow(str: []const u8) bool {
|
||||
for (str) |c| {
|
||||
inline for (SPECIAL_CHARS) |spc| {
|
||||
if (spc == c) return true;
|
||||
}
|
||||
if (SPECIAL_CHARS_TABLE.isSet(c)) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -742,7 +742,56 @@ ${temp_dir}`
|
||||
/**
|
||||
*
|
||||
*/
|
||||
describe("escaping", () => {});
|
||||
describe("escaping", () => {
|
||||
// Testing characters that need special handling when not quoted or in different contexts
|
||||
TestBuilder.command`echo ${"$"}`.stdout("$\n").runAsTest("dollar");
|
||||
TestBuilder.command`echo ${">"}`.stdout(">\n").runAsTest("right_arrow");
|
||||
TestBuilder.command`echo ${"&"}`.stdout("&\n").runAsTest("ampersand");
|
||||
TestBuilder.command`echo ${"|"}`.stdout("|\n").runAsTest("pipe");
|
||||
TestBuilder.command`echo ${"="}`.stdout("=\n").runAsTest("equals");
|
||||
TestBuilder.command`echo ${";"}`.stdout(";\n").runAsTest("semicolon");
|
||||
TestBuilder.command`echo ${"\n"}`.stdout("\n\n").runAsTest("newline");
|
||||
TestBuilder.command`echo ${"{"}`.stdout("{\n").runAsTest("left_brace");
|
||||
TestBuilder.command`echo ${"}"}`.stdout("}\n").runAsTest("right_brace");
|
||||
TestBuilder.command`echo ${","}`.stdout(",\n").runAsTest("comma");
|
||||
TestBuilder.command`echo ${"("}`.stdout("(\n").runAsTest("left_parenthesis");
|
||||
TestBuilder.command`echo ${")"}`.stdout(")\n").runAsTest("right_parenthesis");
|
||||
TestBuilder.command`echo ${"\\"}`.stdout("\\\n").runAsTest("backslash");
|
||||
TestBuilder.command`echo ${" "}`.stdout(" \n").runAsTest("space");
|
||||
TestBuilder.command`echo ${"'hello'"}`.stdout("'hello'\n").runAsTest("single_quote");
|
||||
TestBuilder.command`echo ${'"hello"'}`.stdout('"hello"\n').runAsTest("double_quote");
|
||||
TestBuilder.command`echo ${"`hello`"}`.stdout("`hello`\n").runAsTest("backtick");
|
||||
|
||||
// Testing characters that need to be escaped within double quotes
|
||||
TestBuilder.command`echo "${"$"}"`.stdout("$\n").runAsTest("dollar_in_dquotes");
|
||||
TestBuilder.command`echo "${"`"}"`.stdout("`\n").runAsTest("backtick_in_dquotes");
|
||||
TestBuilder.command`echo "${'"'}"`.stdout('"\n').runAsTest("double_quote_in_dquotes");
|
||||
TestBuilder.command`echo "${"\\"}"`.stdout("\\\n").runAsTest("backslash_in_dquotes");
|
||||
|
||||
// Testing characters that need to be escaped within single quotes
|
||||
TestBuilder.command`echo '${"$"}'`.stdout("$\n").runAsTest("dollar_in_squotes");
|
||||
TestBuilder.command`echo '${'"'}'`.stdout('"\n').runAsTest("double_quote_in_squotes");
|
||||
TestBuilder.command`echo '${"`"}'`.stdout("`\n").runAsTest("backtick_in_squotes");
|
||||
TestBuilder.command`echo '${"\\\\"}'`.stdout("\\\\\n").runAsTest("backslash_in_squotes");
|
||||
|
||||
// Ensure that backslash escapes within single quotes are treated literally
|
||||
TestBuilder.command`echo '${"\\"}'`.stdout("\\\n").runAsTest("literal_backslash_single_quote");
|
||||
TestBuilder.command`echo '${"\\\\"}'`.stdout("\\\\\n").runAsTest("double_backslash_single_quote");
|
||||
|
||||
// Edge cases with mixed quotes
|
||||
TestBuilder.command`echo "'\${"$"}'"`.stdout("'${$}'\n").runAsTest("mixed_quotes_dollar");
|
||||
TestBuilder.command`echo '"${"`"}"'`.stdout('"`"\n').runAsTest("mixed_quotes_backtick");
|
||||
|
||||
// Compound command with special characters
|
||||
TestBuilder.command`echo ${"hello; echo world"}`.stdout("hello; echo world\n").runAsTest("compound_command");
|
||||
TestBuilder.command`echo ${"hello > world"}`.stdout("hello > world\n").runAsTest("redirect_in_echo");
|
||||
TestBuilder.command`echo ${"$(echo nested)"}`.stdout("$(echo nested)\n").runAsTest("nested_command_substitution");
|
||||
|
||||
// Pathological cases involving multiple special characters
|
||||
TestBuilder.command`echo ${"complex > command; $(execute)"}`
|
||||
.stdout("complex > command; $(execute)\n")
|
||||
.runAsTest("complex_mixed_special_chars");
|
||||
});
|
||||
});
|
||||
|
||||
describe("deno_task", () => {
|
||||
|
||||
@@ -9,7 +9,7 @@ $.nothrow();
|
||||
describe("$ argv", async () => {
|
||||
for (let i = 0; i < process.argv.length; i++) {
|
||||
const element = process.argv[i];
|
||||
TestBuilder.command`echo $${i}`
|
||||
TestBuilder.command`echo $${{ raw: i }}`
|
||||
.exitCode(0)
|
||||
.stdout(process.argv[i] + "\n")
|
||||
.runAsTest(`$${i} should equal process.argv[${i}]`);
|
||||
|
||||
Reference in New Issue
Block a user