mirror of
https://github.com/oven-sh/bun
synced 2026-02-17 22:32:06 +00:00
Merge branch 'main' into jarred/process-change
This commit is contained in:
@@ -206,7 +206,7 @@ pub const EnvStr = packed struct {
|
||||
tag: Tag,
|
||||
len: usize = 0,
|
||||
|
||||
const print = bun.Output.scoped(.EnvStr, false);
|
||||
const print = bun.Output.scoped(.EnvStr, true);
|
||||
|
||||
const Tag = enum(u16) {
|
||||
/// Dealloced by reference counting
|
||||
@@ -841,9 +841,20 @@ pub fn NewInterpreter(comptime EventLoopKind: JSC.EventLoopKind) type {
|
||||
};
|
||||
|
||||
const template_args = callframe.argumentsPtr()[1..callframe.argumentsCount()];
|
||||
var stack_alloc = std.heap.stackFallback(@sizeOf(bun.String) * 4, arena.allocator());
|
||||
var jsstrings = std.ArrayList(bun.String).initCapacity(stack_alloc.get(), 4) catch {
|
||||
globalThis.throwOutOfMemory();
|
||||
return null;
|
||||
};
|
||||
defer {
|
||||
for (jsstrings.items[0..]) |bunstr| {
|
||||
bunstr.deref();
|
||||
}
|
||||
jsstrings.deinit();
|
||||
}
|
||||
var jsobjs = std.ArrayList(JSValue).init(arena.allocator());
|
||||
var script = std.ArrayList(u8).init(arena.allocator());
|
||||
if (!(bun.shell.shellCmdFromJS(globalThis, string_args, template_args, &jsobjs, &script) catch {
|
||||
if (!(bun.shell.shellCmdFromJS(globalThis, string_args, template_args, &jsobjs, &jsstrings, &script) catch {
|
||||
globalThis.throwOutOfMemory();
|
||||
return null;
|
||||
})) {
|
||||
@@ -856,6 +867,7 @@ pub fn NewInterpreter(comptime EventLoopKind: JSC.EventLoopKind) type {
|
||||
&arena,
|
||||
script.items[0..],
|
||||
jsobjs.items[0..],
|
||||
jsstrings.items[0..],
|
||||
&parser,
|
||||
&lex_result,
|
||||
) catch |err| {
|
||||
@@ -901,14 +913,21 @@ pub fn NewInterpreter(comptime EventLoopKind: JSC.EventLoopKind) type {
|
||||
return interpreter;
|
||||
}
|
||||
|
||||
pub fn parse(arena: *bun.ArenaAllocator, script: []const u8, jsobjs: []JSValue, out_parser: *?bun.shell.Parser, out_lex_result: *?shell.LexResult) !ast.Script {
|
||||
pub fn parse(
|
||||
arena: *bun.ArenaAllocator,
|
||||
script: []const u8,
|
||||
jsobjs: []JSValue,
|
||||
jsstrings_to_escape: []bun.String,
|
||||
out_parser: *?bun.shell.Parser,
|
||||
out_lex_result: *?shell.LexResult,
|
||||
) !ast.Script {
|
||||
const lex_result = brk: {
|
||||
if (bun.strings.isAllASCII(script)) {
|
||||
var lexer = bun.shell.LexerAscii.new(arena.allocator(), script);
|
||||
var lexer = bun.shell.LexerAscii.new(arena.allocator(), script, jsstrings_to_escape);
|
||||
try lexer.lex();
|
||||
break :brk lexer.get_result();
|
||||
}
|
||||
var lexer = bun.shell.LexerUnicode.new(arena.allocator(), script);
|
||||
var lexer = bun.shell.LexerUnicode.new(arena.allocator(), script, jsstrings_to_escape);
|
||||
try lexer.lex();
|
||||
break :brk lexer.get_result();
|
||||
};
|
||||
@@ -1028,7 +1047,14 @@ pub fn NewInterpreter(comptime EventLoopKind: JSC.EventLoopKind) type {
|
||||
const jsobjs: []JSValue = &[_]JSValue{};
|
||||
var out_parser: ?bun.shell.Parser = null;
|
||||
var out_lex_result: ?bun.shell.LexResult = null;
|
||||
const script = ThisInterpreter.parse(&arena, src, jsobjs, &out_parser, &out_lex_result) catch |err| {
|
||||
const script = ThisInterpreter.parse(
|
||||
&arena,
|
||||
src,
|
||||
jsobjs,
|
||||
&[_]bun.String{},
|
||||
&out_parser,
|
||||
&out_lex_result,
|
||||
) catch |err| {
|
||||
if (err == bun.shell.ParseError.Lex) {
|
||||
std.debug.assert(out_lex_result != null);
|
||||
const str = out_lex_result.?.combineErrors(arena.allocator());
|
||||
@@ -1074,7 +1100,7 @@ pub fn NewInterpreter(comptime EventLoopKind: JSC.EventLoopKind) type {
|
||||
const jsobjs: []JSValue = &[_]JSValue{};
|
||||
var out_parser: ?bun.shell.Parser = null;
|
||||
var out_lex_result: ?bun.shell.LexResult = null;
|
||||
const script = ThisInterpreter.parse(&arena, src, jsobjs, &out_parser, &out_lex_result) catch |err| {
|
||||
const script = ThisInterpreter.parse(&arena, src, jsobjs, &[_]bun.String{}, &out_parser, &out_lex_result) catch |err| {
|
||||
if (err == bun.shell.ParseError.Lex) {
|
||||
std.debug.assert(out_lex_result != null);
|
||||
const str = out_lex_result.?.combineErrors(arena.allocator());
|
||||
@@ -1156,6 +1182,7 @@ pub fn NewInterpreter(comptime EventLoopKind: JSC.EventLoopKind) type {
|
||||
|
||||
fn finish(this: *ThisInterpreter, exit_code: ExitCode) void {
|
||||
log("finish", .{});
|
||||
defer decrPendingActivityFlag(&this.has_pending_activity);
|
||||
if (comptime EventLoopKind == .js) {
|
||||
// defer this.deinit();
|
||||
// this.promise.resolve(this.global, JSValue.jsNumberFromInt32(@intCast(exit_code)));
|
||||
@@ -1169,6 +1196,7 @@ pub fn NewInterpreter(comptime EventLoopKind: JSC.EventLoopKind) type {
|
||||
|
||||
fn errored(this: *ThisInterpreter, the_error: ShellError) void {
|
||||
_ = the_error; // autofix
|
||||
defer decrPendingActivityFlag(&this.has_pending_activity);
|
||||
|
||||
if (comptime EventLoopKind == .js) {
|
||||
// defer this.deinit();
|
||||
@@ -1319,6 +1347,7 @@ pub fn NewInterpreter(comptime EventLoopKind: JSC.EventLoopKind) type {
|
||||
pub fn finalize(
|
||||
this: *ThisInterpreter,
|
||||
) callconv(.C) void {
|
||||
log("Interpreter finalize", .{});
|
||||
this.deinit();
|
||||
}
|
||||
|
||||
@@ -1360,12 +1389,12 @@ pub fn NewInterpreter(comptime EventLoopKind: JSC.EventLoopKind) type {
|
||||
|
||||
word_idx: u32,
|
||||
current_out: std.ArrayList(u8),
|
||||
state: enum {
|
||||
state: union(enum) {
|
||||
normal,
|
||||
braces,
|
||||
glob,
|
||||
done,
|
||||
err,
|
||||
err: bun.shell.ShellErr,
|
||||
},
|
||||
child_state: union(enum) {
|
||||
idle,
|
||||
@@ -1582,6 +1611,12 @@ pub fn NewInterpreter(comptime EventLoopKind: JSC.EventLoopKind) type {
|
||||
this.parent.childDone(this, 0);
|
||||
return;
|
||||
}
|
||||
|
||||
// Parent will inspect the `this.state.err`
|
||||
if (this.state == .err) {
|
||||
this.parent.childDone(this, 1);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
fn transitionToGlobState(this: *Expansion) void {
|
||||
@@ -1589,10 +1624,23 @@ pub fn NewInterpreter(comptime EventLoopKind: JSC.EventLoopKind) type {
|
||||
this.child_state = .{ .glob = .{ .walker = .{} } };
|
||||
const pattern = this.current_out.items[0..];
|
||||
|
||||
switch (GlobWalker.init(&this.child_state.glob.walker, &arena, pattern, false, false, false, false, false) catch bun.outOfMemory()) {
|
||||
const cwd = this.base.shell.cwd();
|
||||
|
||||
switch (GlobWalker.initWithCwd(
|
||||
&this.child_state.glob.walker,
|
||||
&arena,
|
||||
pattern,
|
||||
cwd,
|
||||
false,
|
||||
false,
|
||||
false,
|
||||
false,
|
||||
false,
|
||||
) catch bun.outOfMemory()) {
|
||||
.result => {},
|
||||
.err => |e| {
|
||||
global_handle.get().actuallyThrow(bun.shell.ShellErr.newSys(e));
|
||||
this.state = .{ .err = bun.shell.ShellErr.newSys(e) };
|
||||
this.next();
|
||||
return;
|
||||
},
|
||||
}
|
||||
@@ -1803,6 +1851,19 @@ pub fn NewInterpreter(comptime EventLoopKind: JSC.EventLoopKind) type {
|
||||
}
|
||||
}
|
||||
|
||||
if (task.result.items.len == 0) {
|
||||
const msg = std.fmt.allocPrint(bun.default_allocator, "no matches found: {s}", .{this.child_state.glob.walker.pattern}) catch bun.outOfMemory();
|
||||
this.state = .{
|
||||
.err = bun.shell.ShellErr{
|
||||
.custom = msg,
|
||||
},
|
||||
};
|
||||
this.child_state.glob.walker.deinit(true);
|
||||
this.child_state = .idle;
|
||||
this.next();
|
||||
return;
|
||||
}
|
||||
|
||||
for (task.result.items) |sentinel_str| {
|
||||
// The string is allocated in the glob walker arena and will be freed, so needs to be duped here
|
||||
const duped = this.base.interpreter.allocator.dupeZ(u8, sentinel_str[0..sentinel_str.len]) catch bun.outOfMemory();
|
||||
@@ -2172,10 +2233,13 @@ pub fn NewInterpreter(comptime EventLoopKind: JSC.EventLoopKind) type {
|
||||
}
|
||||
|
||||
this.base.shell.deinit();
|
||||
bun.default_allocator.destroy(this);
|
||||
}
|
||||
|
||||
pub fn deinitFromInterpreter(this: *Script) void {
|
||||
this.base.shell.deinitImpl(false, false);
|
||||
// Let the interpreter deinitialize the shell state
|
||||
// this.base.shell.deinitImpl(false, false);
|
||||
bun.default_allocator.destroy(this);
|
||||
}
|
||||
};
|
||||
|
||||
@@ -2193,6 +2257,7 @@ pub fn NewInterpreter(comptime EventLoopKind: JSC.EventLoopKind) type {
|
||||
current_expansion_result: std.ArrayList([:0]const u8),
|
||||
expansion: Expansion,
|
||||
},
|
||||
err: bun.shell.ShellErr,
|
||||
done,
|
||||
},
|
||||
ctx: AssignCtx,
|
||||
@@ -2265,6 +2330,7 @@ pub fn NewInterpreter(comptime EventLoopKind: JSC.EventLoopKind) type {
|
||||
return;
|
||||
},
|
||||
.done => unreachable,
|
||||
.err => return this.parent.childDone(this, 1),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2272,9 +2338,14 @@ pub fn NewInterpreter(comptime EventLoopKind: JSC.EventLoopKind) type {
|
||||
}
|
||||
|
||||
pub fn childDone(this: *Assigns, child: ChildPtr, exit_code: ExitCode) void {
|
||||
_ = exit_code;
|
||||
|
||||
if (child.ptr.is(Expansion)) {
|
||||
const expansion = child.ptr.as(Expansion);
|
||||
if (exit_code != 0) {
|
||||
this.state = .{
|
||||
.err = expansion.state.err,
|
||||
};
|
||||
return;
|
||||
}
|
||||
var expanding = &this.state.expanding;
|
||||
|
||||
const label = this.node[expanding.idx].label;
|
||||
@@ -3182,9 +3253,16 @@ pub fn NewInterpreter(comptime EventLoopKind: JSC.EventLoopKind) type {
|
||||
}
|
||||
|
||||
pub fn childDone(this: *Cmd, child: ChildPtr, exit_code: ExitCode) void {
|
||||
_ = exit_code; // autofix
|
||||
|
||||
if (child.ptr.is(Assigns)) {
|
||||
if (exit_code != 0) {
|
||||
const err = this.state.expanding_assigns.state.err;
|
||||
defer err.deinit(bun.default_allocator);
|
||||
this.state.expanding_assigns.deinit();
|
||||
const buf = err.fmt();
|
||||
this.writeFailingError(buf, exit_code);
|
||||
return;
|
||||
}
|
||||
|
||||
this.state.expanding_assigns.deinit();
|
||||
this.state = .{
|
||||
.expanding_redirect = .{
|
||||
@@ -3196,6 +3274,18 @@ pub fn NewInterpreter(comptime EventLoopKind: JSC.EventLoopKind) type {
|
||||
}
|
||||
|
||||
if (child.ptr.is(Expansion)) {
|
||||
child.deinit();
|
||||
if (exit_code != 0) {
|
||||
const err = switch (this.state) {
|
||||
.expanding_redirect => this.state.expanding_redirect.expansion.state.err,
|
||||
.expanding_args => this.state.expanding_args.expansion.state.err,
|
||||
else => @panic("Invalid state"),
|
||||
};
|
||||
defer err.deinit(bun.default_allocator);
|
||||
const buf = err.fmt();
|
||||
this.writeFailingError(buf, exit_code);
|
||||
return;
|
||||
}
|
||||
this.next();
|
||||
return;
|
||||
}
|
||||
@@ -3537,7 +3627,10 @@ pub fn NewInterpreter(comptime EventLoopKind: JSC.EventLoopKind) type {
|
||||
this.exec = .none;
|
||||
}
|
||||
|
||||
if (!this.spawn_arena_freed) this.spawn_arena.deinit();
|
||||
if (!this.spawn_arena_freed) {
|
||||
log("Spawn arena free", .{});
|
||||
this.spawn_arena.deinit();
|
||||
}
|
||||
this.freed = true;
|
||||
this.base.interpreter.allocator.destroy(this);
|
||||
}
|
||||
|
||||
@@ -47,6 +47,27 @@ pub const ShellErr = union(enum) {
|
||||
};
|
||||
}
|
||||
|
||||
pub fn fmt(this: @This()) []const u8 {
|
||||
switch (this) {
|
||||
.sys => {
|
||||
const err = this.sys;
|
||||
const str = std.fmt.allocPrint(bun.default_allocator, "bun: {s}: {}\n", .{ err.message, err.path }) catch bun.outOfMemory();
|
||||
return str;
|
||||
},
|
||||
.custom => {
|
||||
return std.fmt.allocPrint(bun.default_allocator, "bun: {s}\n", .{this.custom}) catch bun.outOfMemory();
|
||||
},
|
||||
.invalid_arguments => {
|
||||
const str = std.fmt.allocPrint(bun.default_allocator, "bun: invalid arguments: {s}\n", .{this.invalid_arguments.val}) catch bun.outOfMemory();
|
||||
return str;
|
||||
},
|
||||
.todo => {
|
||||
const str = std.fmt.allocPrint(bun.default_allocator, "bun: TODO: {s}\n", .{this.invalid_arguments.val}) catch bun.outOfMemory();
|
||||
return str;
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
pub fn throwJS(this: @This(), globalThis: *JSC.JSGlobalObject) void {
|
||||
switch (this) {
|
||||
.sys => {
|
||||
@@ -922,7 +943,7 @@ pub const Parser = struct {
|
||||
self.continue_from_subparser(&subparser);
|
||||
if (self.delimits(self.peek())) {
|
||||
_ = self.match(.Delimit);
|
||||
if (should_break) break;
|
||||
break;
|
||||
}
|
||||
},
|
||||
.Text => |txtrng| {
|
||||
@@ -1279,7 +1300,8 @@ pub const LexError = struct {
|
||||
/// Allocated with lexer arena
|
||||
msg: []const u8,
|
||||
};
|
||||
pub const LEX_JS_OBJREF_PREFIX = "$__bun_";
|
||||
pub const LEX_JS_OBJREF_PREFIX = "~__bun_";
|
||||
pub const LEX_JS_STRING_PREFIX = "~__bunstr_";
|
||||
|
||||
pub fn NewLexer(comptime encoding: StringEncoding) type {
|
||||
const Chars = ShellCharIter(encoding);
|
||||
@@ -1300,6 +1322,10 @@ pub fn NewLexer(comptime encoding: StringEncoding) type {
|
||||
in_subshell: ?SubShellKind = null,
|
||||
errors: std.ArrayList(LexError),
|
||||
|
||||
/// Contains a list of strings we need to escape
|
||||
/// Not owned by this struct
|
||||
string_refs: []bun.String,
|
||||
|
||||
const SubShellKind = enum {
|
||||
/// (echo hi; echo hello)
|
||||
normal,
|
||||
@@ -1329,12 +1355,13 @@ pub fn NewLexer(comptime encoding: StringEncoding) type {
|
||||
delimit_quote: bool,
|
||||
};
|
||||
|
||||
pub fn new(alloc: Allocator, src: []const u8) @This() {
|
||||
pub fn new(alloc: Allocator, src: []const u8, strings_to_escape: []bun.String) @This() {
|
||||
return .{
|
||||
.chars = Chars.init(src),
|
||||
.tokens = ArrayList(Token).init(alloc),
|
||||
.strpool = ArrayList(u8).init(alloc),
|
||||
.errors = ArrayList(LexError).init(alloc),
|
||||
.string_refs = strings_to_escape,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -1364,6 +1391,7 @@ pub fn NewLexer(comptime encoding: StringEncoding) type {
|
||||
|
||||
.word_start = self.word_start,
|
||||
.j = self.j,
|
||||
.string_refs = self.string_refs,
|
||||
};
|
||||
sublexer.chars.state = .Normal;
|
||||
return sublexer;
|
||||
@@ -1411,11 +1439,31 @@ pub fn NewLexer(comptime encoding: StringEncoding) type {
|
||||
const char = input.char;
|
||||
const escaped = input.escaped;
|
||||
|
||||
// Special token to denote substituted JS variables
|
||||
if (char == '~') {
|
||||
if (self.looksLikeJSStringRef()) {
|
||||
if (self.eatJSStringRef()) |bunstr| {
|
||||
try self.break_word(false);
|
||||
try self.handleJSStringRef(bunstr);
|
||||
continue;
|
||||
}
|
||||
} else if (self.looksLikeJSObjRef()) {
|
||||
if (self.eatJSObjRef()) |tok| {
|
||||
if (self.chars.state == .Double) {
|
||||
self.add_error("JS object reference not allowed in double quotes");
|
||||
return;
|
||||
}
|
||||
try self.break_word(false);
|
||||
try self.tokens.append(tok);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
// Handle non-escaped chars:
|
||||
// 1. special syntax (operators, etc.)
|
||||
// 2. lexing state switchers (quotes)
|
||||
// 3. word breakers (spaces, etc.)
|
||||
if (!escaped) escaped: {
|
||||
else if (!escaped) escaped: {
|
||||
switch (char) {
|
||||
'#' => {
|
||||
if (self.chars.state == .Single or self.chars.state == .Double) break :escaped;
|
||||
@@ -1506,21 +1554,13 @@ pub fn NewLexer(comptime encoding: StringEncoding) type {
|
||||
// const snapshot = self.make_snapshot();
|
||||
// Handle variable
|
||||
try self.break_word(false);
|
||||
if (self.eat_js_obj_ref()) |ref| {
|
||||
if (self.chars.state == .Double) {
|
||||
try self.errors.append(.{ .msg = bun.default_allocator.dupe(u8, "JS object reference not allowed in double quotes") catch bun.outOfMemory() });
|
||||
return;
|
||||
}
|
||||
try self.tokens.append(ref);
|
||||
const var_tok = try self.eat_var();
|
||||
// empty var
|
||||
if (var_tok.start == var_tok.end) {
|
||||
try self.appendCharToStrPool('$');
|
||||
try self.break_word(false);
|
||||
} else {
|
||||
const var_tok = try self.eat_var();
|
||||
// empty var
|
||||
if (var_tok.start == var_tok.end) {
|
||||
try self.appendCharToStrPool('$');
|
||||
try self.break_word(false);
|
||||
} else {
|
||||
try self.tokens.append(.{ .Var = var_tok });
|
||||
}
|
||||
try self.tokens.append(.{ .Var = var_tok });
|
||||
}
|
||||
self.word_start = self.j;
|
||||
continue;
|
||||
@@ -1778,6 +1818,9 @@ pub fn NewLexer(comptime encoding: StringEncoding) type {
|
||||
switch (char) {
|
||||
'0'...'9' => {
|
||||
_ = self.eat();
|
||||
if (count >= 32) {
|
||||
return null;
|
||||
}
|
||||
buf[count] = @intCast(char);
|
||||
count += 1;
|
||||
continue;
|
||||
@@ -1863,6 +1906,7 @@ pub fn NewLexer(comptime encoding: StringEncoding) type {
|
||||
const char = result.char;
|
||||
switch (char) {
|
||||
'0'...'9' => {
|
||||
if (count >= 32) return null;
|
||||
// Safe to cast here because 0-8 is in ASCII range
|
||||
buf[count] = @intCast(char);
|
||||
count += 1;
|
||||
@@ -1907,19 +1951,146 @@ pub fn NewLexer(comptime encoding: StringEncoding) type {
|
||||
self.continue_from_sublexer(&sublexer);
|
||||
}
|
||||
|
||||
fn eat_js_obj_ref(self: *@This()) ?Token {
|
||||
const snap = self.make_snapshot();
|
||||
if (self.eat_literal(u8, LEX_JS_OBJREF_PREFIX)) {
|
||||
if (self.eat_number_word()) |num| {
|
||||
if (num <= std.math.maxInt(u32)) {
|
||||
return .{ .JSObjRef = @intCast(num) };
|
||||
fn appendStringToStrPool(self: *@This(), bunstr: bun.String) !void {
|
||||
const start = self.strpool.items.len;
|
||||
if (bunstr.is8Bit() or bunstr.isUTF8()) {
|
||||
try self.strpool.appendSlice(bunstr.byteSlice());
|
||||
} else {
|
||||
const utf16 = bunstr.utf16();
|
||||
const additional = bun.simdutf.simdutf__utf8_length_from_utf16le(utf16.ptr, utf16.len);
|
||||
try self.strpool.ensureUnusedCapacity(additional);
|
||||
try bun.strings.convertUTF16ToUTF8Append(&self.strpool, bunstr.utf16());
|
||||
}
|
||||
const end = self.strpool.items.len;
|
||||
self.j += @intCast(end - start);
|
||||
}
|
||||
|
||||
fn handleJSStringRef(self: *@This(), bunstr: bun.String) !void {
|
||||
try self.appendStringToStrPool(bunstr);
|
||||
}
|
||||
|
||||
fn looksLikeJSObjRef(self: *@This()) bool {
|
||||
const bytes = self.chars.srcBytesAtCursor();
|
||||
if (LEX_JS_OBJREF_PREFIX.len - 1 >= bytes.len) return false;
|
||||
return std.mem.eql(u8, bytes[0 .. LEX_JS_OBJREF_PREFIX.len - 1], LEX_JS_OBJREF_PREFIX[1..]);
|
||||
}
|
||||
|
||||
fn looksLikeJSStringRef(self: *@This()) bool {
|
||||
const bytes = self.chars.srcBytesAtCursor();
|
||||
if (LEX_JS_STRING_PREFIX.len - 1 >= bytes.len) return false;
|
||||
return std.mem.eql(u8, bytes[0 .. LEX_JS_STRING_PREFIX.len - 1], LEX_JS_STRING_PREFIX[1..]);
|
||||
}
|
||||
|
||||
fn eatJSSubstitutionIdx(self: *@This(), comptime literal: []const u8, comptime name: []const u8, comptime validate: *const fn (*@This(), usize) bool) ?usize {
|
||||
const bytes = self.chars.srcBytesAtCursor();
|
||||
if (literal.len - 1 >= bytes.len) return null;
|
||||
if (std.mem.eql(u8, bytes[0 .. literal.len - 1], literal[1..])) {
|
||||
var i: usize = 0;
|
||||
var digit_buf: [32]u8 = undefined;
|
||||
var digit_buf_count: u8 = 0;
|
||||
|
||||
i += literal.len - 1;
|
||||
|
||||
while (i < bytes.len) : (i += 1) {
|
||||
switch (bytes[i]) {
|
||||
'0'...'9' => {
|
||||
if (digit_buf_count >= digit_buf.len) {
|
||||
const ERROR_STR = "Invalid " ++ name ++ " (number too high): ";
|
||||
var error_buf: [ERROR_STR.len + digit_buf.len + 1]u8 = undefined;
|
||||
const error_msg = std.fmt.bufPrint(error_buf[0..], "{s} {s}{c}", .{ ERROR_STR, digit_buf[0..digit_buf_count], bytes[i] }) catch @panic("Should not happen");
|
||||
self.add_error(error_msg);
|
||||
return null;
|
||||
}
|
||||
digit_buf[digit_buf_count] = bytes[i];
|
||||
digit_buf_count += 1;
|
||||
},
|
||||
else => break,
|
||||
}
|
||||
}
|
||||
|
||||
if (digit_buf_count == 0) {
|
||||
self.add_error("Invalid " ++ name ++ " (no idx)");
|
||||
return null;
|
||||
}
|
||||
|
||||
const idx = std.fmt.parseInt(usize, digit_buf[0..digit_buf_count], 10) catch {
|
||||
self.add_error("Invalid " ++ name ++ " ref ");
|
||||
return null;
|
||||
};
|
||||
|
||||
if (!validate(self, idx)) return null;
|
||||
// if (idx >= self.string_refs.len) {
|
||||
// self.add_error("Invalid " ++ name ++ " (out of bounds");
|
||||
// return null;
|
||||
// }
|
||||
|
||||
// Bump the cursor
|
||||
brk: {
|
||||
const new_idx = self.chars.cursorPos() + i;
|
||||
const prev_ascii_char: ?u7 = if (digit_buf_count == 1) null else @truncate(digit_buf[digit_buf_count - 2]);
|
||||
const cur_ascii_char: u7 = @truncate(digit_buf[digit_buf_count - 1]);
|
||||
if (comptime encoding == .ascii) {
|
||||
self.chars.src.i = new_idx;
|
||||
if (prev_ascii_char) |pc| self.chars.prev = .{ .char = pc };
|
||||
self.chars.current = .{ .char = cur_ascii_char };
|
||||
break :brk;
|
||||
}
|
||||
self.chars.src.cursor = CodepointIterator.Cursor{
|
||||
.i = @intCast(new_idx),
|
||||
.c = cur_ascii_char,
|
||||
.width = 1,
|
||||
};
|
||||
self.chars.src.next_cursor = self.chars.src.cursor;
|
||||
SrcUnicode.nextCursor(&self.chars.src.iter, &self.chars.src.next_cursor);
|
||||
if (prev_ascii_char) |pc| self.chars.prev = .{ .char = pc };
|
||||
self.chars.current = .{ .char = cur_ascii_char };
|
||||
}
|
||||
|
||||
// return self.string_refs[idx];
|
||||
return idx;
|
||||
}
|
||||
self.backtrack(snap);
|
||||
return null;
|
||||
}
|
||||
|
||||
/// __NOTE__: Do not store references to the returned bun.String, it does not have its ref count incremented
|
||||
fn eatJSStringRef(self: *@This()) ?bun.String {
|
||||
if (self.eatJSSubstitutionIdx(
|
||||
LEX_JS_STRING_PREFIX,
|
||||
"JS string ref",
|
||||
validateJSStringRefIdx,
|
||||
)) |idx| {
|
||||
return self.string_refs[idx];
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
fn validateJSStringRefIdx(self: *@This(), idx: usize) bool {
|
||||
if (idx >= self.string_refs.len) {
|
||||
self.add_error("Invalid JS string ref (out of bounds");
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
fn eatJSObjRef(self: *@This()) ?Token {
|
||||
if (self.eatJSSubstitutionIdx(
|
||||
LEX_JS_OBJREF_PREFIX,
|
||||
"JS object ref",
|
||||
validateJSObjRefIdx,
|
||||
)) |idx| {
|
||||
return .{ .JSObjRef = @intCast(idx) };
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
fn validateJSObjRefIdx(self: *@This(), idx: usize) bool {
|
||||
if (idx >= std.math.maxInt(u32)) {
|
||||
self.add_error("Invalid JS object ref (out of bounds)");
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
fn eat_var(self: *@This()) !Token.TextRange {
|
||||
const start = self.j;
|
||||
var i: usize = 0;
|
||||
@@ -2087,7 +2258,7 @@ const SrcUnicode = struct {
|
||||
|
||||
inline fn indexNext(this: *const SrcUnicode) ?IndexValue {
|
||||
if (this.next_cursor.width + this.next_cursor.i > this.iter.bytes.len) return null;
|
||||
return .{ .char = this.next_cursor.c, .width = this.next_cursor.width };
|
||||
return .{ .char = @intCast(this.next_cursor.c), .width = this.next_cursor.width };
|
||||
}
|
||||
|
||||
inline fn eat(this: *SrcUnicode, escaped: bool) void {
|
||||
@@ -2147,6 +2318,27 @@ pub fn ShellCharIter(comptime encoding: StringEncoding) type {
|
||||
};
|
||||
}
|
||||
|
||||
pub fn srcBytes(self: *@This()) []const u8 {
|
||||
if (comptime encoding == .ascii) return self.src.bytes;
|
||||
return self.src.iter.bytes;
|
||||
}
|
||||
|
||||
pub fn srcBytesAtCursor(self: *@This()) []const u8 {
|
||||
const bytes = self.srcBytes();
|
||||
if (comptime encoding == .ascii) {
|
||||
if (self.src.i >= bytes.len) return "";
|
||||
return bytes[self.src.i..];
|
||||
}
|
||||
|
||||
if (self.src.iter.i >= bytes.len) return "";
|
||||
return bytes[self.src.iter.i..];
|
||||
}
|
||||
|
||||
pub fn cursorPos(self: *@This()) usize {
|
||||
if (comptime encoding == .ascii) return self.src.i;
|
||||
return self.src.iter.i;
|
||||
}
|
||||
|
||||
pub fn eat(self: *@This()) ?InputChar {
|
||||
if (self.read_char()) |result| {
|
||||
self.prev = self.current;
|
||||
@@ -2451,8 +2643,10 @@ pub fn shellCmdFromJS(
|
||||
string_args: JSValue,
|
||||
template_args: []const JSValue,
|
||||
out_jsobjs: *std.ArrayList(JSValue),
|
||||
jsstrings: *std.ArrayList(bun.String),
|
||||
out_script: *std.ArrayList(u8),
|
||||
) !bool {
|
||||
var builder = ShellSrcBuilder.init(globalThis, out_script, jsstrings);
|
||||
var jsobjref_buf: [128]u8 = [_]u8{0} ** 128;
|
||||
|
||||
var string_iter = string_args.arrayIterator(globalThis);
|
||||
@@ -2460,7 +2654,7 @@ pub fn shellCmdFromJS(
|
||||
const last = string_iter.len -| 1;
|
||||
while (string_iter.next()) |js_value| {
|
||||
defer i += 1;
|
||||
if (!try appendJSValueStr(globalThis, js_value, out_script, false)) {
|
||||
if (!try builder.appendJSValueStr(js_value, false)) {
|
||||
globalThis.throw("Shell script string contains invalid UTF-16", .{});
|
||||
return false;
|
||||
}
|
||||
@@ -2468,7 +2662,7 @@ pub fn shellCmdFromJS(
|
||||
// try script.appendSlice(str.full());
|
||||
if (i < last) {
|
||||
const template_value = template_args[i];
|
||||
if (!(try handleTemplateValue(globalThis, template_value, out_jsobjs, out_script, jsobjref_buf[0..]))) return false;
|
||||
if (!(try handleTemplateValue(globalThis, template_value, out_jsobjs, out_script, jsstrings, jsobjref_buf[0..]))) return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
@@ -2479,8 +2673,10 @@ pub fn handleTemplateValue(
|
||||
template_value: JSValue,
|
||||
out_jsobjs: *std.ArrayList(JSValue),
|
||||
out_script: *std.ArrayList(u8),
|
||||
jsstrings: *std.ArrayList(bun.String),
|
||||
jsobjref_buf: []u8,
|
||||
) !bool {
|
||||
var builder = ShellSrcBuilder.init(globalThis, out_script, jsstrings);
|
||||
if (!template_value.isEmpty()) {
|
||||
if (template_value.asArrayBuffer(globalThis)) |array_buffer| {
|
||||
_ = array_buffer;
|
||||
@@ -2497,7 +2693,7 @@ pub fn handleTemplateValue(
|
||||
if (store.data == .file) {
|
||||
if (store.data.file.pathlike == .path) {
|
||||
const path = store.data.file.pathlike.path.slice();
|
||||
if (!try appendUTF8Text(path, out_script, true)) {
|
||||
if (!try builder.appendUTF8(path, true)) {
|
||||
globalThis.throw("Shell script string contains invalid UTF-16", .{});
|
||||
return false;
|
||||
}
|
||||
@@ -2537,7 +2733,7 @@ pub fn handleTemplateValue(
|
||||
}
|
||||
|
||||
if (template_value.isString()) {
|
||||
if (!try appendJSValueStr(globalThis, template_value, out_script, true)) {
|
||||
if (!try builder.appendJSValueStr(template_value, true)) {
|
||||
globalThis.throw("Shell script string contains invalid UTF-16", .{});
|
||||
return false;
|
||||
}
|
||||
@@ -2549,10 +2745,10 @@ pub fn handleTemplateValue(
|
||||
const last = array.len -| 1;
|
||||
var i: u32 = 0;
|
||||
while (array.next()) |arr| : (i += 1) {
|
||||
if (!(try handleTemplateValue(globalThis, arr, out_jsobjs, out_script, jsobjref_buf))) return false;
|
||||
if (!(try handleTemplateValue(globalThis, arr, out_jsobjs, out_script, jsstrings, jsobjref_buf))) return false;
|
||||
if (i < last) {
|
||||
const str = bun.String.init(" ");
|
||||
if (!try appendBunStr(str, out_script, false)) return false;
|
||||
const str = bun.String.static(" ");
|
||||
if (!try builder.appendBunStr(str, false)) return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
@@ -2562,7 +2758,7 @@ pub fn handleTemplateValue(
|
||||
if (template_value.getTruthy(globalThis, "raw")) |maybe_str| {
|
||||
const bunstr = maybe_str.toBunString(globalThis);
|
||||
defer bunstr.deref();
|
||||
if (!try appendBunStr(bunstr, out_script, false)) {
|
||||
if (!try builder.appendBunStr(bunstr, false)) {
|
||||
globalThis.throw("Shell script string contains invalid UTF-16", .{});
|
||||
return false;
|
||||
}
|
||||
@@ -2571,7 +2767,7 @@ pub fn handleTemplateValue(
|
||||
}
|
||||
|
||||
if (template_value.isPrimitive()) {
|
||||
if (!try appendJSValueStr(globalThis, template_value, out_script, true)) {
|
||||
if (!try builder.appendJSValueStr(template_value, true)) {
|
||||
globalThis.throw("Shell script string contains invalid UTF-16", .{});
|
||||
return false;
|
||||
}
|
||||
@@ -2579,7 +2775,7 @@ pub fn handleTemplateValue(
|
||||
}
|
||||
|
||||
if (template_value.implementsToString(globalThis)) {
|
||||
if (!try appendJSValueStr(globalThis, template_value, out_script, true)) {
|
||||
if (!try builder.appendJSValueStr(template_value, true)) {
|
||||
globalThis.throw("Shell script string contains invalid UTF-16", .{});
|
||||
return false;
|
||||
}
|
||||
@@ -2593,57 +2789,127 @@ pub fn handleTemplateValue(
|
||||
return true;
|
||||
}
|
||||
|
||||
/// This will disallow invalid surrogate pairs
|
||||
pub fn appendJSValueStr(globalThis: *JSC.JSGlobalObject, jsval: JSValue, outbuf: *std.ArrayList(u8), comptime allow_escape: bool) !bool {
|
||||
const bunstr = jsval.toBunString(globalThis);
|
||||
defer bunstr.deref();
|
||||
pub const ShellSrcBuilder = struct {
|
||||
globalThis: *JSC.JSGlobalObject,
|
||||
outbuf: *std.ArrayList(u8),
|
||||
jsstrs_to_escape: *std.ArrayList(bun.String),
|
||||
jsstr_ref_buf: [128]u8 = [_]u8{0} ** 128,
|
||||
|
||||
return try appendBunStr(bunstr, outbuf, allow_escape);
|
||||
}
|
||||
|
||||
pub fn appendUTF8Text(slice: []const u8, outbuf: *std.ArrayList(u8), comptime allow_escape: bool) !bool {
|
||||
if (!bun.simdutf.validate.utf8(slice)) {
|
||||
return false;
|
||||
pub fn init(
|
||||
globalThis: *JSC.JSGlobalObject,
|
||||
outbuf: *std.ArrayList(u8),
|
||||
jsstrs_to_escape: *std.ArrayList(bun.String),
|
||||
) ShellSrcBuilder {
|
||||
return .{
|
||||
.globalThis = globalThis,
|
||||
.outbuf = outbuf,
|
||||
.jsstrs_to_escape = jsstrs_to_escape,
|
||||
};
|
||||
}
|
||||
|
||||
if (allow_escape and needsEscape(slice)) {
|
||||
try escape(slice, outbuf);
|
||||
} else {
|
||||
try outbuf.appendSlice(slice);
|
||||
pub fn appendJSValueStr(this: *ShellSrcBuilder, jsval: JSValue, comptime allow_escape: bool) !bool {
|
||||
const bunstr = jsval.toBunString(this.globalThis);
|
||||
defer bunstr.deref();
|
||||
|
||||
return try this.appendBunStr(bunstr, allow_escape);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
pub fn appendBunStr(bunstr: bun.String, outbuf: *std.ArrayList(u8), comptime allow_escape: bool) !bool {
|
||||
const str = bunstr.toUTF8WithoutRef(bun.default_allocator);
|
||||
defer str.deinit();
|
||||
|
||||
// TODO: toUTF8 already validates. We shouldn't have to do this twice!
|
||||
const is_ascii = str.isAllocated();
|
||||
if (!is_ascii and !bun.simdutf.validate.utf8(str.slice())) {
|
||||
return false;
|
||||
pub fn appendBunStr(
|
||||
this: *ShellSrcBuilder,
|
||||
bunstr: bun.String,
|
||||
comptime allow_escape: bool,
|
||||
) !bool {
|
||||
const invalid = (bunstr.isUTF16() and !bun.simdutf.validate.utf16le(bunstr.utf16())) or (bunstr.isUTF8() and !bun.simdutf.validate.utf8(bunstr.byteSlice()));
|
||||
if (invalid) return false;
|
||||
if (allow_escape) {
|
||||
if (needsEscapeBunstr(bunstr)) {
|
||||
try this.appendJSStrRef(bunstr);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
if (bunstr.isUTF16()) {
|
||||
try this.appendUTF16Impl(bunstr.utf16());
|
||||
return true;
|
||||
}
|
||||
if (bunstr.isUTF8() or bun.strings.isAllASCII(bunstr.byteSlice())) {
|
||||
try this.appendUTF8Impl(bunstr.byteSlice());
|
||||
return true;
|
||||
}
|
||||
try this.appendLatin1Impl(bunstr.byteSlice());
|
||||
return true;
|
||||
}
|
||||
|
||||
if (allow_escape and needsEscape(str.slice())) {
|
||||
try escape(str.slice(), outbuf);
|
||||
} else {
|
||||
try outbuf.appendSlice(str.slice());
|
||||
pub fn appendUTF8(this: *ShellSrcBuilder, utf8: []const u8, comptime allow_escape: bool) !bool {
|
||||
const invalid = bun.simdutf.validate.utf8(utf8);
|
||||
if (!invalid) return false;
|
||||
if (allow_escape) {
|
||||
if (needsEscapeUtf8AsciiLatin1(utf8)) {
|
||||
const bunstr = bun.String.createUTF8(utf8);
|
||||
defer bunstr.deref();
|
||||
try this.appendJSStrRef(bunstr);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
try this.appendUTF8Impl(utf8);
|
||||
return true;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
pub fn appendUTF16Impl(this: *ShellSrcBuilder, utf16: []const u16) !void {
|
||||
const size = bun.simdutf.simdutf__utf8_length_from_utf16le(utf16.ptr, utf16.len);
|
||||
try this.outbuf.ensureUnusedCapacity(size);
|
||||
try bun.strings.convertUTF16ToUTF8Append(this.outbuf, utf16);
|
||||
}
|
||||
|
||||
pub fn appendUTF8Impl(this: *ShellSrcBuilder, utf8: []const u8) !void {
|
||||
try this.outbuf.appendSlice(utf8);
|
||||
}
|
||||
|
||||
pub fn appendLatin1Impl(this: *ShellSrcBuilder, latin1: []const u8) !void {
|
||||
const non_ascii_idx = bun.strings.firstNonASCII(latin1) orelse 0;
|
||||
|
||||
if (non_ascii_idx > 0) {
|
||||
try this.appendUTF8Impl(latin1[0..non_ascii_idx]);
|
||||
}
|
||||
|
||||
this.outbuf.* = try bun.strings.allocateLatin1IntoUTF8WithList(this.outbuf.*, this.outbuf.items.len, []const u8, latin1);
|
||||
}
|
||||
|
||||
pub fn appendJSStrRef(this: *ShellSrcBuilder, bunstr: bun.String) !void {
|
||||
const idx = this.jsstrs_to_escape.items.len;
|
||||
const str = std.fmt.bufPrint(this.jsstr_ref_buf[0..], "{s}{d}", .{ LEX_JS_STRING_PREFIX, idx }) catch {
|
||||
@panic("Impossible");
|
||||
};
|
||||
try this.outbuf.appendSlice(str);
|
||||
bunstr.ref();
|
||||
try this.jsstrs_to_escape.append(bunstr);
|
||||
}
|
||||
};
|
||||
|
||||
/// Characters that need to escaped
|
||||
const SPECIAL_CHARS = [_]u8{ '$', '>', '&', '|', '=', ';', '\n', '{', '}', ',', '(', ')', '\\', '\"', ' ' };
|
||||
const SPECIAL_CHARS = [_]u8{ '$', '>', '&', '|', '=', ';', '\n', '{', '}', ',', '(', ')', '\\', '\"', ' ', '\'' };
|
||||
/// Characters that need to be backslashed inside double quotes
|
||||
const BACKSLASHABLE_CHARS = [_]u8{ '$', '`', '"', '\\' };
|
||||
|
||||
/// assumes WTF-8
|
||||
pub fn escape(str: []const u8, outbuf: *std.ArrayList(u8)) !void {
|
||||
pub fn escapeBunStr(bunstr: bun.String, outbuf: *std.ArrayList(u8), comptime add_quotes: bool) !bool {
|
||||
// latin-1 or ascii
|
||||
if (bunstr.is8Bit()) {
|
||||
try escape8Bit(bunstr.byteSlice(), outbuf, add_quotes);
|
||||
return true;
|
||||
}
|
||||
if (bunstr.isUTF16()) {
|
||||
return try escapeUtf16(bunstr.utf16(), outbuf, add_quotes);
|
||||
}
|
||||
// Otherwise is utf-8
|
||||
try escapeWTF8(bunstr.byteSlice(), outbuf, add_quotes);
|
||||
return true;
|
||||
}
|
||||
|
||||
/// works for latin-1 and ascii
|
||||
pub fn escape8Bit(str: []const u8, outbuf: *std.ArrayList(u8), comptime add_quotes: bool) !void {
|
||||
try outbuf.ensureUnusedCapacity(str.len);
|
||||
|
||||
try outbuf.append('\"');
|
||||
if (add_quotes) try outbuf.append('\"');
|
||||
|
||||
loop: for (str) |c| {
|
||||
inline for (BACKSLASHABLE_CHARS) |spc| {
|
||||
@@ -2658,15 +2924,15 @@ pub fn escape(str: []const u8, outbuf: *std.ArrayList(u8)) !void {
|
||||
try outbuf.append(c);
|
||||
}
|
||||
|
||||
try outbuf.append('\"');
|
||||
if (add_quotes) try outbuf.append('\"');
|
||||
}
|
||||
|
||||
pub fn escapeUnicode(str: []const u8, outbuf: *std.ArrayList(u8)) !void {
|
||||
pub fn escapeWTF8(str: []const u8, outbuf: *std.ArrayList(u8), comptime add_quotes: bool) !void {
|
||||
try outbuf.ensureUnusedCapacity(str.len);
|
||||
|
||||
var bytes: [8]u8 = undefined;
|
||||
var n = bun.strings.encodeWTF8Rune(bytes[0..4], '"');
|
||||
try outbuf.appendSlice(bytes[0..n]);
|
||||
var n: u3 = if (add_quotes) bun.strings.encodeWTF8Rune(bytes[0..4], '"') else 0;
|
||||
if (add_quotes) try outbuf.appendSlice(bytes[0..n]);
|
||||
|
||||
loop: for (str) |c| {
|
||||
inline for (BACKSLASHABLE_CHARS) |spc| {
|
||||
@@ -2686,18 +2952,84 @@ pub fn escapeUnicode(str: []const u8, outbuf: *std.ArrayList(u8)) !void {
|
||||
try outbuf.appendSlice(bytes[0..n]);
|
||||
}
|
||||
|
||||
n = bun.strings.encodeWTF8Rune(bytes[0..4], '"');
|
||||
try outbuf.appendSlice(bytes[0..n]);
|
||||
if (add_quotes) {
|
||||
n = bun.strings.encodeWTF8Rune(bytes[0..4], '"');
|
||||
try outbuf.appendSlice(bytes[0..n]);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn escapeUtf16(str: []const u16, outbuf: *std.ArrayList(u8), comptime add_quotes: bool) !bool {
|
||||
if (add_quotes) try outbuf.append('"');
|
||||
|
||||
const non_ascii = bun.strings.firstNonASCII16([]const u16, str) orelse 0;
|
||||
var cp_buf: [4]u8 = undefined;
|
||||
|
||||
var i: usize = 0;
|
||||
loop: while (i < str.len) {
|
||||
const char: u32 = brk: {
|
||||
if (i < non_ascii) {
|
||||
i += 1;
|
||||
break :brk str[i];
|
||||
}
|
||||
const ret = bun.strings.utf16Codepoint([]const u16, str[i..]);
|
||||
if (ret.fail) return false;
|
||||
i += ret.len;
|
||||
break :brk ret.code_point;
|
||||
};
|
||||
|
||||
inline for (BACKSLASHABLE_CHARS) |bchar| {
|
||||
if (@as(u32, @intCast(bchar)) == char) {
|
||||
try outbuf.appendSlice(&[_]u8{ '\\', @intCast(char) });
|
||||
continue :loop;
|
||||
}
|
||||
}
|
||||
|
||||
const len = bun.strings.encodeWTF8RuneT(&cp_buf, u32, char);
|
||||
try outbuf.appendSlice(cp_buf[0..len]);
|
||||
}
|
||||
if (add_quotes) try outbuf.append('"');
|
||||
return true;
|
||||
}
|
||||
|
||||
pub fn needsEscapeBunstr(bunstr: bun.String) bool {
|
||||
if (bunstr.isUTF16()) return needsEscapeUTF16(bunstr.utf16());
|
||||
// Otherwise is utf-8, ascii, or latin-1
|
||||
return needsEscapeUtf8AsciiLatin1(bunstr.byteSlice());
|
||||
}
|
||||
|
||||
pub fn needsEscapeUTF16Slow(str: []const u16) bool {
|
||||
for (str) |codeunit| {
|
||||
inline for (SPECIAL_CHARS) |spc| {
|
||||
if (@as(u16, @intCast(spc)) == codeunit) return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
pub fn needsEscapeUTF16(str: []const u16) bool {
|
||||
for (str) |char| {
|
||||
switch (char) {
|
||||
'$', '>', '&', '|', '=', ';', '\n', '{', '}', ',', '(', ')', '\\', '\"', ' ' => return true,
|
||||
else => {},
|
||||
if (str.len < 64) return needsEscapeUTF16Slow(str);
|
||||
|
||||
const needles = comptime brk: {
|
||||
var needles: [SPECIAL_CHARS.len]@Vector(8, u16) = undefined;
|
||||
for (SPECIAL_CHARS, 0..) |c, i| {
|
||||
needles[i] = @splat(@as(u16, @intCast(c)));
|
||||
}
|
||||
break :brk needles;
|
||||
};
|
||||
|
||||
var i: usize = 0;
|
||||
while (i + 8 <= str.len) : (i += 8) {
|
||||
const haystack: @Vector(8, u16) = str[i..][0..8].*;
|
||||
|
||||
inline for (needles) |needle| {
|
||||
const result = haystack == needle;
|
||||
if (std.simd.firstTrue(result) != null) return true;
|
||||
}
|
||||
}
|
||||
|
||||
if (i < str.len) return needsEscapeUTF16Slow(str[i..]);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -2705,8 +3037,8 @@ pub fn needsEscapeUTF16(str: []const u16) bool {
|
||||
/// indicates the *possibility* that the string must be escaped, so it can have
|
||||
/// false positives, but it is faster than running the shell lexer through the
|
||||
/// input string for a more correct implementation.
|
||||
pub fn needsEscape(str: []const u8) bool {
|
||||
if (str.len < 128) return needsEscapeSlow(str);
|
||||
pub fn needsEscapeUtf8AsciiLatin1(str: []const u8) bool {
|
||||
if (str.len < 128) return needsEscapeUtf8AsciiLatin1Slow(str);
|
||||
|
||||
const needles = comptime brk: {
|
||||
var needles: [SPECIAL_CHARS.len]@Vector(16, u8) = undefined;
|
||||
@@ -2726,12 +3058,12 @@ pub fn needsEscape(str: []const u8) bool {
|
||||
}
|
||||
}
|
||||
|
||||
if (i < str.len) return needsEscapeSlow(str[i..]);
|
||||
if (i < str.len) return needsEscapeUtf8AsciiLatin1Slow(str[i..]);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
pub fn needsEscapeSlow(str: []const u8) bool {
|
||||
pub fn needsEscapeUtf8AsciiLatin1Slow(str: []const u8) bool {
|
||||
for (str) |c| {
|
||||
inline for (SPECIAL_CHARS) |spc| {
|
||||
if (spc == c) return true;
|
||||
|
||||
Reference in New Issue
Block a user