From 6b5de25d8a26207b5ee83efb65170beed31d936e Mon Sep 17 00:00:00 2001 From: robobun Date: Sat, 27 Dec 2025 17:25:52 -0800 Subject: [PATCH] feat(shell): add $.trace for analyzing shell commands without execution (#25667) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Summary Adds `Bun.$.trace` for tracing shell commands without executing them. ```js const result = $.trace`cat /tmp/file.txt > output.txt`; // { operations: [...], cwd: "...", success: true, error: null } ``` ## Test plan - [x] `bun bd test test/js/bun/shell/trace.test.ts` 🤖 Generated with [Claude Code](https://claude.com/claude-code) --------- Co-authored-by: Claude Bot Co-authored-by: Claude Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com> Co-authored-by: Jarred Sumner --- src/bun.js/api/BunObject.zig | 2 + src/bun.js/bindings/BunObject+exports.h | 1 + src/bun.js/bindings/BunObject.cpp | 2 + src/js/builtins/shell.ts | 54 +- src/shell/TraceInterpreter.zig | 1252 +++++++++++++++++++++++ src/shell/braces.zig | 62 ++ src/shell/shell.zig | 1 + test/js/bun/shell/trace.test.ts | 417 ++++++++ 8 files changed, 1790 insertions(+), 1 deletion(-) create mode 100644 src/shell/TraceInterpreter.zig create mode 100644 test/js/bun/shell/trace.test.ts diff --git a/src/bun.js/api/BunObject.zig b/src/bun.js/api/BunObject.zig index 1ed2b9e244..831c4c5008 100644 --- a/src/bun.js/api/BunObject.zig +++ b/src/bun.js/api/BunObject.zig @@ -16,6 +16,7 @@ pub const BunObject = struct { pub const connect = toJSCallback(host_fn.wrapStaticMethod(api.Listener, "connect", false)); pub const createParsedShellScript = toJSCallback(bun.shell.ParsedShellScript.createParsedShellScript); pub const createShellInterpreter = toJSCallback(bun.shell.Interpreter.createShellInterpreter); + pub const traceShellScript = toJSCallback(bun.shell.TraceInterpreter.traceShellScript); pub const deflateSync = toJSCallback(JSZlib.deflateSync); pub const file = toJSCallback(WebCore.Blob.constructBunFile); pub const gunzipSync = toJSCallback(JSZlib.gunzipSync); @@ -154,6 +155,7 @@ pub const BunObject = struct { @export(&BunObject.connect, .{ .name = callbackName("connect") }); @export(&BunObject.createParsedShellScript, .{ .name = callbackName("createParsedShellScript") }); @export(&BunObject.createShellInterpreter, .{ .name = callbackName("createShellInterpreter") }); + @export(&BunObject.traceShellScript, .{ .name = callbackName("traceShellScript") }); @export(&BunObject.deflateSync, .{ .name = callbackName("deflateSync") }); @export(&BunObject.file, .{ .name = callbackName("file") }); @export(&BunObject.gunzipSync, .{ .name = callbackName("gunzipSync") }); diff --git a/src/bun.js/bindings/BunObject+exports.h b/src/bun.js/bindings/BunObject+exports.h index d92b41c0cd..b4481f7e47 100644 --- a/src/bun.js/bindings/BunObject+exports.h +++ b/src/bun.js/bindings/BunObject+exports.h @@ -69,6 +69,7 @@ macro(spawn) \ macro(spawnSync) \ macro(stringWidth) \ + macro(traceShellScript) \ macro(udpSocket) \ macro(which) \ macro(write) \ diff --git a/src/bun.js/bindings/BunObject.cpp b/src/bun.js/bindings/BunObject.cpp index 6296b0599f..c94784cf96 100644 --- a/src/bun.js/bindings/BunObject.cpp +++ b/src/bun.js/bindings/BunObject.cpp @@ -354,12 +354,14 @@ static JSValue constructBunShell(VM& vm, JSObject* bunObject) auto* globalObject = jsCast(bunObject->globalObject()); JSFunction* createParsedShellScript = JSFunction::create(vm, bunObject->globalObject(), 2, "createParsedShellScript"_s, BunObject_callback_createParsedShellScript, ImplementationVisibility::Private, NoIntrinsic); JSFunction* createShellInterpreterFunction = JSFunction::create(vm, bunObject->globalObject(), 1, "createShellInterpreter"_s, BunObject_callback_createShellInterpreter, ImplementationVisibility::Private, NoIntrinsic); + JSFunction* traceShellScriptFunction = JSFunction::create(vm, bunObject->globalObject(), 1, "traceShellScript"_s, BunObject_callback_traceShellScript, ImplementationVisibility::Private, NoIntrinsic); JSC::JSFunction* createShellFn = JSC::JSFunction::create(vm, globalObject, shellCreateBunShellTemplateFunctionCodeGenerator(vm), globalObject); auto scope = DECLARE_THROW_SCOPE(vm); auto args = JSC::MarkedArgumentBuffer(); args.append(createShellInterpreterFunction); args.append(createParsedShellScript); + args.append(traceShellScriptFunction); JSC::JSValue shell = JSC::call(globalObject, createShellFn, args, "BunShell"_s); RETURN_IF_EXCEPTION(scope, {}); diff --git a/src/js/builtins/shell.ts b/src/js/builtins/shell.ts index 40f3e7d3a0..142eecccc2 100644 --- a/src/js/builtins/shell.ts +++ b/src/js/builtins/shell.ts @@ -1,4 +1,35 @@ -export function createBunShellTemplateFunction(createShellInterpreter_, createParsedShellScript_) { +// Note: ShellTraceFlags interface documents the permission flag values returned +// by $.trace operations. These are intentionally not exported as runtime values +// to keep the trace API simple - users compare against numeric constants directly. +// The values mirror standard Unix open(2) and access(2) flags. + +interface ShellTraceOperation { + /** Permission flags (octal integer, can be combined with |) */ + flags: number; + /** Working directory at time of operation */ + cwd: string; + /** Absolute path that would be accessed (for file/execute operations) */ + path?: string; + /** Command name (for execute operations) */ + command?: string; + /** Accumulated environment variables at this point in execution */ + env?: Record; + /** Which standard stream is being redirected: "stdin", "stdout", or "stderr" */ + stream?: "stdin" | "stdout" | "stderr"; + /** Command arguments for external commands (excluding command name) */ + args?: string[]; + /** True if operation contains non-statically-analyzable values (command substitution, $1, etc.) */ + dynamic?: true; +} + +interface ShellTraceResult { + operations: ShellTraceOperation[]; + cwd: string; + success: boolean; + error: string | null; +} + +export function createBunShellTemplateFunction(createShellInterpreter_, createParsedShellScript_, traceShellScript_) { const createShellInterpreter = createShellInterpreter_ as ( resolve: (code: number, stdout: Buffer, stderr: Buffer) => void, reject: (code: number, stdout: Buffer, stderr: Buffer) => void, @@ -8,6 +39,7 @@ export function createBunShellTemplateFunction(createShellInterpreter_, createPa raw: string, args: string[], ) => $ZigGeneratedClasses.ParsedShellScript; + const traceShellScript = traceShellScript_ as (args: $ZigGeneratedClasses.ParsedShellScript) => ShellTraceResult; function lazyBufferToHumanReadableString(this: Buffer) { return this.toString(); @@ -348,6 +380,22 @@ export function createBunShellTemplateFunction(createShellInterpreter_, createPa BunShell[envSymbol] = defaultEnv; BunShell[throwsSymbol] = true; + // Trace function - analyzes shell script without running it + function trace(first, ...rest): ShellTraceResult { + if (first?.raw === undefined) + throw new Error("Please use '$.trace' as a tagged template function: $.trace`cmd arg1 arg2`"); + const parsed_shell_script = createParsedShellScript(first.raw, rest); + + const cwd = BunShell[cwdSymbol]; + const env = BunShell[envSymbol]; + + // cwd must be set before env or else it will be injected into env as "PWD=/" + if (cwd) parsed_shell_script.setCwd(cwd); + if (env) parsed_shell_script.setEnv(env); + + return traceShellScript(parsed_shell_script); + } + Object.defineProperties(BunShell, { Shell: { value: Shell, @@ -361,6 +409,10 @@ export function createBunShellTemplateFunction(createShellInterpreter_, createPa value: ShellError, enumerable: true, }, + trace: { + value: trace, + enumerable: true, + }, }); return BunShell; diff --git a/src/shell/TraceInterpreter.zig b/src/shell/TraceInterpreter.zig new file mode 100644 index 0000000000..275144a4a7 --- /dev/null +++ b/src/shell/TraceInterpreter.zig @@ -0,0 +1,1252 @@ +//! The trace interpreter simulates shell execution without actually running commands. +//! It walks the AST and collects information about what permissions would be needed +//! and what file paths would be accessed. +//! +//! This is used for a permission system where users can inspect what a shell command +//! would do before actually executing it. + +/// Unix-style permission flags using standard octal values +/// These mirror the constants used by open(2), chmod(2), and access(2) +pub const Permission = struct { + /// Standard Unix permission bits (octal) + pub const O_RDONLY: u32 = 0o0; // Read only + pub const O_WRONLY: u32 = 0o1; // Write only + pub const O_RDWR: u32 = 0o2; // Read and write + pub const O_CREAT: u32 = 0o100; // Create file if it doesn't exist + pub const O_EXCL: u32 = 0o200; // Fail if file exists (with O_CREAT) + pub const O_TRUNC: u32 = 0o1000; // Truncate file to zero length + pub const O_APPEND: u32 = 0o2000; // Append to file + + /// Extended operation flags (using higher bits to avoid conflicts) + pub const X_OK: u32 = 0o100000; // Execute permission / run command + pub const DELETE: u32 = 0o200000; // Delete file or directory + pub const MKDIR: u32 = 0o400000; // Create directory + pub const CHDIR: u32 = 0o1000000; // Change directory + pub const ENV: u32 = 0o2000000; // Modify environment + + /// Convenience combinations + pub const READ: u32 = O_RDONLY; + pub const WRITE: u32 = O_WRONLY; + pub const READ_WRITE: u32 = O_RDWR; + pub const CREATE: u32 = O_CREAT | O_WRONLY; + pub const CREATE_TRUNC: u32 = O_CREAT | O_TRUNC | O_WRONLY; + pub const APPEND: u32 = O_APPEND | O_WRONLY; + pub const EXECUTE: u32 = X_OK; +}; + +/// Standard stream identifiers for redirections +pub const Stream = enum(u8) { + none = 0, + stdin = 1, + stdout = 2, + stderr = 3, + + pub fn toJS(this: Stream, globalThis: *JSGlobalObject) JSValue { + return if (this == .none) .null else bun.String.static(@tagName(this)).toJS(globalThis); + } +}; + +/// A snapshot of environment variables at a point in execution +pub const EnvSnapshot = struct { + /// Map of variable name -> value + vars: bun.StringHashMapUnmanaged([]const u8), + /// Allocator used for this snapshot + allocator: Allocator, + + pub fn init(allocator: Allocator) EnvSnapshot { + return .{ + .vars = .{}, + .allocator = allocator, + }; + } + + pub fn clone(this: *const EnvSnapshot, allocator: Allocator) EnvSnapshot { + var new_vars: bun.StringHashMapUnmanaged([]const u8) = .{}; + var iter = this.vars.iterator(); + while (iter.next()) |entry| { + const key_copy = allocator.dupe(u8, entry.key_ptr.*) catch continue; + const val_copy = allocator.dupe(u8, entry.value_ptr.*) catch { + allocator.free(key_copy); + continue; + }; + new_vars.put(allocator, key_copy, val_copy) catch { + allocator.free(key_copy); + allocator.free(val_copy); + continue; + }; + } + return .{ + .vars = new_vars, + .allocator = allocator, + }; + } + + pub fn deinit(this: *EnvSnapshot) void { + var iter = this.vars.iterator(); + while (iter.next()) |entry| { + this.allocator.free(entry.key_ptr.*); + this.allocator.free(entry.value_ptr.*); + } + this.vars.deinit(this.allocator); + } + + pub fn toJS(this: *const EnvSnapshot, globalThis: *JSGlobalObject) JSValue { + var obj = jsc.JSValue.createEmptyObject(globalThis, @intCast(this.vars.count())); + var iter = this.vars.iterator(); + while (iter.next()) |entry| { + obj.put( + globalThis, + bun.String.init(entry.key_ptr.*), + bun.String.init(entry.value_ptr.*).toJS(globalThis), + ); + } + return obj; + } +}; + +/// Represents a single traced operation +pub const TracedOperation = struct { + /// The permission flags required (octal, like open/chmod) + flags: u32, + /// Absolute path that would be accessed (null for non-path operations) + path: ?[]const u8, + /// The command name (for execute operations) + command: ?[]const u8, + /// Working directory at time of operation + cwd: []const u8, + /// Snapshot of environment variables at this point + env: EnvSnapshot, + /// Which standard stream is being redirected (if any) + stream: Stream, + /// Command arguments (for execute operations, excluding the command name itself) + args: ?[]const []const u8, + /// Whether this operation contains dynamic/non-statically-analyzable values + dynamic: bool, + + pub fn deinit(this: *TracedOperation, allocator: Allocator) void { + if (this.path) |p| allocator.free(p); + if (this.command) |c| allocator.free(c); + allocator.free(this.cwd); + this.env.deinit(); + if (this.args) |args| { + for (args) |arg| allocator.free(arg); + allocator.free(args); + } + } + + pub fn toJS(this: *const TracedOperation, globalThis: *JSGlobalObject) bun.JSError!JSValue { + var obj = jsc.JSValue.createEmptyObject(globalThis, 6); + + // Return flags as integer (octal value) + obj.put( + globalThis, + bun.String.static("flags"), + jsc.JSValue.jsNumber(@as(i32, @intCast(this.flags))), + ); + + // cwd is always present + obj.put( + globalThis, + bun.String.static("cwd"), + bun.String.init(this.cwd).toJS(globalThis), + ); + + // Only set optional properties if they have values (otherwise undefined) + if (this.path) |p| { + obj.put( + globalThis, + bun.String.static("path"), + bun.String.init(p).toJS(globalThis), + ); + } + + if (this.command) |c| { + obj.put( + globalThis, + bun.String.static("command"), + bun.String.init(c).toJS(globalThis), + ); + } + + // Environment snapshot - only include if there are env vars + if (this.env.vars.count() > 0) { + obj.put( + globalThis, + bun.String.static("env"), + this.env.toJS(globalThis), + ); + } + + // Stream redirection (stdin, stdout, stderr) - only set if not none + if (this.stream != .none) { + obj.put( + globalThis, + bun.String.static("stream"), + this.stream.toJS(globalThis), + ); + } + + // Command arguments (for execute operations) + if (this.args) |args| { + const arr = try jsc.JSValue.createEmptyArray(globalThis, args.len); + for (args, 0..) |arg, i| { + try arr.putIndex(globalThis, @intCast(i), bun.String.init(arg).toJS(globalThis)); + } + obj.put(globalThis, bun.String.static("args"), arr); + } + + // Dynamic flag - only set if true + if (this.dynamic) { + obj.put( + globalThis, + bun.String.static("dynamic"), + .true, + ); + } + + return obj; + } +}; + +/// Result of tracing a shell script +pub const TraceResult = struct { + /// All traced operations + operations: std.array_list.Managed(TracedOperation), + /// The working directory + cwd: []const u8, + /// Whether tracing was successful + success: bool, + /// Error message if tracing failed + error_message: ?[]const u8, + /// Allocator used for this result + allocator: Allocator, + + pub fn init(allocator: Allocator, cwd: []const u8) TraceResult { + return .{ + .operations = std.array_list.Managed(TracedOperation).init(allocator), + .cwd = bun.handleOom(allocator.dupe(u8, cwd)), + .success = true, + .error_message = null, + .allocator = allocator, + }; + } + + pub fn deinit(this: *TraceResult) void { + for (this.operations.items) |*op| { + op.deinit(this.allocator); + } + this.operations.deinit(); + this.allocator.free(this.cwd); + if (this.error_message) |msg| { + this.allocator.free(msg); + } + } + + pub fn addOperation(this: *TraceResult, op: TracedOperation) void { + bun.handleOom(this.operations.append(op)); + } + + pub fn setError(this: *TraceResult, msg: []const u8) void { + this.success = false; + this.error_message = bun.handleOom(this.allocator.dupe(u8, msg)); + } + + pub fn toJS(this: *const TraceResult, globalThis: *JSGlobalObject) bun.JSError!JSValue { + var result_obj = jsc.JSValue.createEmptyObject(globalThis, 4); + + // Create operations array + const ops_array = try jsc.JSValue.createEmptyArray(globalThis, this.operations.items.len); + for (this.operations.items, 0..) |*op, i| { + const op_js = try op.toJS(globalThis); + try ops_array.putIndex(globalThis, @intCast(i), op_js); + } + result_obj.put(globalThis, bun.String.static("operations"), ops_array); + + // Add cwd + result_obj.put( + globalThis, + bun.String.static("cwd"), + bun.String.init(this.cwd).toJS(globalThis), + ); + + // Add success + result_obj.put( + globalThis, + bun.String.static("success"), + jsc.JSValue.jsBoolean(this.success), + ); + + // Add error if present + if (this.error_message) |msg| { + result_obj.put( + globalThis, + bun.String.static("error"), + bun.String.init(msg).toJS(globalThis), + ); + } else { + result_obj.put(globalThis, bun.String.static("error"), .null); + } + + return result_obj; + } +}; + +/// TraceContext holds state during trace interpretation. +/// Note: The allocator is stored because it's needed throughout traversal for allocating +/// strings, paths, etc. The result is a pointer because it's created before the context +/// and operations are added to it during traversal. +pub const TraceContext = struct { + /// Allocator used for all allocations during tracing + allocator: Allocator, + /// Output: traced operations are added here during traversal + result: *TraceResult, + /// Current working directory during trace (unmanaged, uses this.allocator) + cwd: std.ArrayListUnmanaged(u8), + /// Shell environment for variable expansion + shell_env: EnvMap, + /// Exported environment (for subprocess) - borrowed pointer, not owned (do not deinit) + export_env: ?*EnvMap, + /// Whether export_env is owned by us (should be freed on deinit) + owns_export_env: bool, + /// Accumulated traced environment variables (snapshot for each operation) + traced_env: bun.StringHashMapUnmanaged([]const u8), + /// Whether the current operation has dynamic (non-statically-analyzable) values + current_dynamic: bool, + /// JS objects from template literal interpolation, indexed by position + jsobjs: []JSValue, + globalThis: *JSGlobalObject, + + pub fn init( + allocator: Allocator, + result: *TraceResult, + cwd: []const u8, + export_env: ?*EnvMap, + jsobjs: []JSValue, + globalThis: *JSGlobalObject, + ) TraceContext { + var ctx = TraceContext{ + .allocator = allocator, + .result = result, + .cwd = .{}, + .shell_env = EnvMap.init(allocator), + .export_env = export_env, + .owns_export_env = false, // We borrow it, don't own it + .traced_env = .{}, + .current_dynamic = false, + .jsobjs = jsobjs, + .globalThis = globalThis, + }; + bun.handleOom(ctx.cwd.appendSlice(allocator, cwd)); + return ctx; + } + + pub fn deinit(this: *TraceContext) void { + this.cwd.deinit(this.allocator); + this.shell_env.deinit(); + // export_env is borrowed, not owned - never free it + // Free traced_env + var iter = this.traced_env.iterator(); + while (iter.next()) |entry| { + this.allocator.free(entry.key_ptr.*); + this.allocator.free(entry.value_ptr.*); + } + this.traced_env.deinit(this.allocator); + } + + /// Set an environment variable in the traced env + pub fn setTracedEnv(this: *TraceContext, name: []const u8, value: []const u8) void { + // If key already exists, free the old value + if (this.traced_env.get(name)) |old_val| { + this.allocator.free(old_val); + // Update in place + const key = this.traced_env.getKey(name).?; + this.traced_env.put(this.allocator, key, this.allocator.dupe(u8, value) catch return) catch return; + } else { + // New key + const key_copy = this.allocator.dupe(u8, name) catch return; + const val_copy = this.allocator.dupe(u8, value) catch { + this.allocator.free(key_copy); + return; + }; + this.traced_env.put(this.allocator, key_copy, val_copy) catch { + this.allocator.free(key_copy); + this.allocator.free(val_copy); + return; + }; + } + } + + /// Create a snapshot of the current traced environment + pub fn snapshotEnv(this: *TraceContext) EnvSnapshot { + var snapshot = EnvSnapshot.init(this.allocator); + var iter = this.traced_env.iterator(); + while (iter.next()) |entry| { + const key_copy = this.allocator.dupe(u8, entry.key_ptr.*) catch continue; + const val_copy = this.allocator.dupe(u8, entry.value_ptr.*) catch { + this.allocator.free(key_copy); + continue; + }; + snapshot.vars.put(this.allocator, key_copy, val_copy) catch { + this.allocator.free(key_copy); + this.allocator.free(val_copy); + continue; + }; + } + return snapshot; + } + + pub fn cwdSlice(this: *const TraceContext) []const u8 { + return this.cwd.items; + } + + pub fn resolvePath(this: *TraceContext, path: []const u8) []const u8 { + if (ResolvePath.Platform.auto.isAbsolute(path)) { + return bun.handleOom(this.allocator.dupe(u8, path)); + } + // Join with cwd + const parts: []const []const u8 = &.{ this.cwdSlice(), path }; + const joined = ResolvePath.joinZ(parts, .auto); + return bun.handleOom(this.allocator.dupe(u8, joined[0..joined.len])); + } + + pub fn addOperation(this: *TraceContext, flags: u32, path: ?[]const u8, command: ?[]const u8) void { + this.addOperationFull(flags, path, command, .none, null); + } + + pub fn addOperationWithStream(this: *TraceContext, flags: u32, path: ?[]const u8, command: ?[]const u8, stream: Stream) void { + this.addOperationFull(flags, path, command, stream, null); + } + + pub fn addOperationWithArgs(this: *TraceContext, flags: u32, path: ?[]const u8, command: ?[]const u8, args: ?[]const []const u8) void { + this.addOperationFull(flags, path, command, .none, args); + } + + pub fn addOperationFull(this: *TraceContext, flags: u32, path: ?[]const u8, command: ?[]const u8, stream: Stream, args: ?[]const []const u8) void { + const resolved_path = if (path) |p| this.resolvePath(p) else null; + + // Duplicate args array + const duped_args: ?[]const []const u8 = if (args) |a| blk: { + const arr = this.allocator.alloc([]const u8, a.len) catch break :blk null; + for (a, 0..) |arg, i| { + arr[i] = this.allocator.dupe(u8, arg) catch { + // Free already allocated + for (arr[0..i]) |prev| this.allocator.free(prev); + this.allocator.free(arr); + break :blk null; + }; + } + break :blk arr; + } else null; + + // Snapshot the current environment + const env_snapshot = this.snapshotEnv(); + + // Capture dynamic flag and reset it + const is_dynamic = this.current_dynamic; + this.current_dynamic = false; + + this.result.addOperation(.{ + .flags = flags, + .path = resolved_path, + .command = if (command) |c| bun.handleOom(this.allocator.dupe(u8, c)) else null, + .cwd = bun.handleOom(this.allocator.dupe(u8, this.cwdSlice())), + .env = env_snapshot, + .stream = stream, + .args = duped_args, + .dynamic = is_dynamic, + }); + } + + pub fn getVar(this: *TraceContext, name: []const u8) ?[]const u8 { + const key = EnvStr.initSlice(name); + if (this.shell_env.get(key)) |v| { + return v.slice(); + } + if (this.export_env) |env| { + if (env.get(key)) |v| { + return v.slice(); + } + } + return null; + } + + pub fn changeCwd(this: *TraceContext, new_cwd: []const u8) void { + // Just update the context's cwd - don't add an operation + // (the caller is responsible for adding the CHDIR operation if needed) + if (ResolvePath.Platform.auto.isAbsolute(new_cwd)) { + this.cwd.clearRetainingCapacity(); + bun.handleOom(this.cwd.appendSlice(this.allocator, new_cwd)); + } else { + // Join with current cwd and normalize (handles .. and .) + const parts: []const []const u8 = &.{ this.cwdSlice(), new_cwd }; + const joined = ResolvePath.joinZ(parts, .auto); + this.cwd.clearRetainingCapacity(); + bun.handleOom(this.cwd.appendSlice(this.allocator, joined[0..joined.len])); + } + } +}; + +// ============================================================================= +// AST Walking Functions +// ============================================================================= + +pub fn traceScript(ctx: *TraceContext, script: *const ast.Script) void { + for (script.stmts) |*stmt| { + traceStmt(ctx, stmt); + } +} + +fn traceStmt(ctx: *TraceContext, stmt: *const ast.Stmt) void { + // Stmt is a struct with exprs field, not a union + for (stmt.exprs) |*expr| { + traceExpr(ctx, expr); + } +} + +fn traceExpr(ctx: *TraceContext, expr: *const ast.Expr) void { + switch (expr.*) { + .cmd => |cmd| traceCmd(ctx, cmd), + .assign => |assigns| { + for (assigns) |*assign| { + traceAssign(ctx, assign); + } + }, + .binary => |binary| traceBinary(ctx, binary), + .pipeline => |pipeline| tracePipeline(ctx, pipeline), + .subshell => |subshell| traceSubshell(ctx, &subshell.script), + .@"if" => |if_clause| traceIfClause(ctx, if_clause), + .condexpr => |condexpr| traceCondExpr(ctx, condexpr), + .async => |async_expr| traceExpr(ctx, async_expr), + } +} + +fn traceSubshell(ctx: *TraceContext, script: *const ast.Script) void { + // Save current cwd - subshell changes shouldn't affect parent + const saved_cwd = bun.handleOom(ctx.allocator.dupe(u8, ctx.cwdSlice())); + defer ctx.allocator.free(saved_cwd); + + traceScript(ctx, script); + + // Restore cwd after subshell + ctx.cwd.clearRetainingCapacity(); + bun.handleOom(ctx.cwd.appendSlice(ctx.allocator, saved_cwd)); +} + +fn traceAssign(ctx: *TraceContext, assign: *const ast.Assign) void { + // Expand the value + const value = expandAtom(ctx, &assign.value); + defer ctx.allocator.free(value); + + // Set the env var in traced env + ctx.setTracedEnv(assign.label, value); + + // Add an ENV operation (the env snapshot will include this new var) + ctx.addOperation(Permission.ENV, null, null); +} + +fn traceBinary(ctx: *TraceContext, binary: *const ast.Binary) void { + traceExpr(ctx, &binary.left); + traceExpr(ctx, &binary.right); +} + +fn tracePipeline(ctx: *TraceContext, pipeline: *const ast.Pipeline) void { + for (pipeline.items) |*item| { + tracePipelineItem(ctx, item); + } +} + +fn tracePipelineItem(ctx: *TraceContext, item: *const ast.PipelineItem) void { + switch (item.*) { + .cmd => |cmd| traceCmd(ctx, cmd), + .assigns => |assigns| { + for (assigns) |*assign| { + traceAssign(ctx, assign); + } + }, + .subshell => |subshell| traceSubshell(ctx, &subshell.script), + .@"if" => |if_clause| traceIfClause(ctx, if_clause), + .condexpr => |condexpr| traceCondExpr(ctx, condexpr), + } +} + +fn traceIfClause(ctx: *TraceContext, if_clause: *const ast.If) void { + // Trace the condition statements + for (if_clause.cond.slice()) |*stmt| { + traceStmt(ctx, stmt); + } + // Trace the then branch statements + for (if_clause.then.slice()) |*stmt| { + traceStmt(ctx, stmt); + } + // Trace the else parts + // else_parts is a SmolList of SmolList(Stmt, 1) + // Length 0 = no else, length 1 = just else, length 2n = elif/then pairs, length 2n+1 = elif/then pairs + else + for (if_clause.else_parts.slice()) |*part| { + for (part.slice()) |*stmt| { + traceStmt(ctx, stmt); + } + } +} + +fn traceCondExpr(ctx: *TraceContext, cond: *const ast.CondExpr) void { + const op = cond.op; + // File test operators (single argument) + const is_file_test = op == .@"-e" or op == .@"-f" or op == .@"-d" or + op == .@"-r" or op == .@"-w" or op == .@"-x" or + op == .@"-s" or op == .@"-L" or op == .@"-h" or + op == .@"-b" or op == .@"-c" or op == .@"-g" or + op == .@"-k" or op == .@"-p" or op == .@"-u" or + op == .@"-O" or op == .@"-G" or op == .@"-S" or + op == .@"-a" or op == .@"-N"; + + // File comparison operators (two arguments) + const is_file_comparison = op == .@"-ef" or op == .@"-nt" or op == .@"-ot"; + + if (is_file_test or is_file_comparison) { + // Expand all arguments and add read operations for file paths + for (cond.args.slice()) |*arg| { + const path = expandAtom(ctx, arg); + if (path.len > 0) { + ctx.addOperation(Permission.READ, path, null); + } + ctx.allocator.free(path); + } + } +} + +/// Information about a command's redirections +const RedirectInfo = struct { + /// Path for stdin redirection (if any) + stdin_path: ?[]const u8 = null, + /// Path for stdout redirection (if any) + stdout_path: ?[]const u8 = null, + /// Flags for stdout redirection + stdout_flags: u32 = 0, + /// Path for stderr redirection (if any) + stderr_path: ?[]const u8 = null, + /// Flags for stderr redirection + stderr_flags: u32 = 0, +}; + +fn traceCmd(ctx: *TraceContext, cmd: *const ast.Cmd) void { + // First, trace any assignments + for (cmd.assigns) |*assign| { + traceAssign(ctx, assign); + } + + // Expand the command name and arguments + if (cmd.name_and_args.len == 0) { + return; + } + + const cmd_name = expandAtom(ctx, &cmd.name_and_args[0]); + defer ctx.allocator.free(cmd_name); + + if (cmd_name.len == 0) { + return; + } + + // Get redirection info first + const redir = getRedirectInfo(ctx, cmd); + defer { + if (redir.stdin_path) |p| ctx.allocator.free(p); + if (redir.stdout_path) |p| ctx.allocator.free(p); + if (redir.stderr_path) |p| ctx.allocator.free(p); + } + + // Check for known commands (builtins) and map them to permissions + // Use stringToEnum directly to recognize all known commands, even if they're + // disabled as builtins on this platform (e.g., cat/cp on POSIX) + if (std.meta.stringToEnum(Interpreter.Builtin.Kind, cmd_name)) |builtin_kind| { + traceBuiltin(ctx, builtin_kind, cmd, &redir); + } else { + // External command - needs execute permission + traceExternalCommand(ctx, cmd_name, cmd, &redir); + } +} + +/// Expand command arguments and extract file paths (skipping flags). +/// Returns a list of expanded file paths. Caller owns the returned memory. +/// Handles brace expansion ({a,b}.txt) and glob expansion (*.txt). +fn extractFileArgs(ctx: *TraceContext, cmd: *const ast.Cmd) std.array_list.Managed([]const u8) { + var file_args = std.array_list.Managed([]const u8).init(ctx.allocator); + + for (cmd.name_and_args[1..]) |*arg| { + var expanded_list = expandAtomMultiple(ctx, arg); + defer expanded_list.deinit(); + + for (expanded_list.items) |expanded| { + if (expanded.len > 0 and expanded[0] != '-') { + // Keep this path - transfer ownership + bun.handleOom(file_args.append(expanded)); + } else { + ctx.allocator.free(expanded); + } + } + } + + // Expand glob patterns (e.g., *.txt -> file1.txt, file2.txt) + expandGlobs(ctx, &file_args); + + return file_args; +} + +/// Free a list of file args +fn freeFileArgs(ctx: *TraceContext, file_args: *std.array_list.Managed([]const u8)) void { + for (file_args.items) |path| { + ctx.allocator.free(path); + } + file_args.deinit(); +} + +/// Add redirections as operations with stream info +fn traceRedirections(ctx: *TraceContext, redir: *const RedirectInfo) void { + if (redir.stdin_path) |stdin| { + ctx.addOperationWithStream(Permission.READ, stdin, null, .stdin); + } + if (redir.stdout_path) |out| { + ctx.addOperationWithStream(redir.stdout_flags, out, null, .stdout); + } + if (redir.stderr_path) |err_path| { + ctx.addOperationWithStream(redir.stderr_flags, err_path, null, .stderr); + } +} + +fn traceBuiltin(ctx: *TraceContext, kind: Interpreter.Builtin.Kind, cmd: *const ast.Cmd, redir: *const RedirectInfo) void { + // Builtins run in-process, so they don't need EXECUTE permission on a binary. + // We only trace the file operations they perform. + + switch (kind) { + .cat => { + // cat reads files and writes to stdout (or redirect) + var file_args = extractFileArgs(ctx, cmd); + defer freeFileArgs(ctx, &file_args); + + for (file_args.items) |path| { + ctx.addOperation(Permission.READ, path, null); + } + traceRedirections(ctx, redir); + }, + .touch => { + // touch creates/modifies files + var file_args = extractFileArgs(ctx, cmd); + defer freeFileArgs(ctx, &file_args); + + for (file_args.items) |path| { + ctx.addOperation(Permission.CREATE, path, null); + } + }, + .mkdir => { + // mkdir creates directories + var file_args = extractFileArgs(ctx, cmd); + defer freeFileArgs(ctx, &file_args); + + for (file_args.items) |path| { + ctx.addOperation(Permission.MKDIR, path, null); + } + }, + .rm => { + // rm deletes files/directories + var file_args = extractFileArgs(ctx, cmd); + defer freeFileArgs(ctx, &file_args); + + for (file_args.items) |path| { + ctx.addOperation(Permission.DELETE, path, null); + } + }, + .mv => { + // mv moves files (read+delete source, create dest) + // Handles: mv src dest OR mv src1 src2 ... dest_dir/ + var file_args = extractFileArgs(ctx, cmd); + defer freeFileArgs(ctx, &file_args); + + if (file_args.items.len >= 2) { + const dest = file_args.items[file_args.items.len - 1]; + // All but the last arg are sources + for (file_args.items[0 .. file_args.items.len - 1]) |src| { + ctx.addOperation(Permission.READ | Permission.DELETE, src, null); + } + ctx.addOperation(Permission.CREATE, dest, null); + } else if (file_args.items.len == 1) { + // Just one arg - read it (mv will fail but we trace the access) + ctx.addOperation(Permission.READ | Permission.DELETE, file_args.items[0], null); + } + }, + .cp => { + // cp copies files (read source, create dest) + // Handles: cp src dest OR cp src1 src2 ... dest_dir/ + var file_args = extractFileArgs(ctx, cmd); + defer freeFileArgs(ctx, &file_args); + + if (file_args.items.len >= 2) { + const dest = file_args.items[file_args.items.len - 1]; + // All but the last arg are sources + for (file_args.items[0 .. file_args.items.len - 1]) |src| { + ctx.addOperation(Permission.READ, src, null); + } + ctx.addOperation(Permission.CREATE, dest, null); + } else if (file_args.items.len == 1) { + // Just one arg - read it (cp will fail but we trace the access) + ctx.addOperation(Permission.READ, file_args.items[0], null); + } + }, + .ls => { + // ls reads directory contents and writes to stdout (or redirect) + var file_args = extractFileArgs(ctx, cmd); + defer freeFileArgs(ctx, &file_args); + + if (file_args.items.len == 0) { + // ls with no args reads current directory + ctx.addOperation(Permission.READ, ".", null); + } else { + for (file_args.items) |path| { + ctx.addOperation(Permission.READ, path, null); + } + } + traceRedirections(ctx, redir); + }, + .cd => { + // cd changes directory - takes first non-flag arg + var file_args = extractFileArgs(ctx, cmd); + defer freeFileArgs(ctx, &file_args); + + if (file_args.items.len >= 1) { + ctx.addOperation(Permission.CHDIR, file_args.items[0], null); + // Actually update the context's cwd for subsequent commands + ctx.changeCwd(file_args.items[0]); + } + }, + .@"export" => { + // export sets environment variables + // Parse arguments like FOO=bar or just FOO + var file_args = extractFileArgs(ctx, cmd); + defer freeFileArgs(ctx, &file_args); + + for (file_args.items) |arg| { + // Look for = sign + if (std.mem.indexOfScalar(u8, arg, '=')) |eq_idx| { + const name = arg[0..eq_idx]; + const value = arg[eq_idx + 1 ..]; + ctx.setTracedEnv(name, value); + } else { + // Just exporting existing var - set to empty if not already set + if (ctx.traced_env.get(arg) == null) { + ctx.setTracedEnv(arg, ""); + } + } + } + // Add ENV operation after setting all vars + if (file_args.items.len > 0) { + ctx.addOperation(Permission.ENV, null, null); + } + }, + .echo, .pwd, .which, .yes, .seq, .dirname, .basename => { + // These only write to stdout (or redirect) - no file reads + traceRedirections(ctx, redir); + }, + .exit, .true, .false => { + // These don't access any files + }, + } +} + +fn traceExternalCommand(ctx: *TraceContext, cmd_name: []const u8, cmd: *const ast.Cmd, redir: *const RedirectInfo) void { + // Resolve the command path using which + // Get PATH from environment + const path_env = ctx.getVar("PATH") orelse "/usr/bin:/bin"; + var path_buf: bun.PathBuffer = undefined; + const resolved = which(&path_buf, path_env, ctx.cwdSlice(), cmd_name); + + // Collect arguments (skip the command name itself) + var args_list = std.array_list.Managed([]const u8).init(ctx.allocator); + defer args_list.deinit(); + + if (cmd.name_and_args.len > 1) { + for (cmd.name_and_args[1..]) |*arg| { + const expanded = expandAtom(ctx, arg); + args_list.append(expanded) catch {}; + } + } + + const args: ?[]const []const u8 = if (args_list.items.len > 0) args_list.items else null; + + // Record the command execution with args + if (resolved) |exe_path| { + ctx.addOperationWithArgs(Permission.EXECUTE, exe_path, cmd_name, args); + } else { + // Command not found, but still record the execute attempt + ctx.addOperationWithArgs(Permission.EXECUTE, null, cmd_name, args); + } + + // Free the expanded args (they were duped in addOperationWithArgs) + for (args_list.items) |arg| { + ctx.allocator.free(arg); + } + + // Handle stdin redirection + if (redir.stdin_path) |stdin| { + ctx.addOperationWithStream(Permission.READ, stdin, null, .stdin); + } + + // Handle stdout redirection + if (redir.stdout_path) |out| { + ctx.addOperationWithStream(redir.stdout_flags, out, null, .stdout); + } + + // Handle stderr redirection + if (redir.stderr_path) |err_path| { + ctx.addOperationWithStream(redir.stderr_flags, err_path, null, .stderr); + } +} + +fn getRedirectInfo(ctx: *TraceContext, cmd: *const ast.Cmd) RedirectInfo { + var info = RedirectInfo{}; + + if (cmd.redirect_file) |redirect| { + switch (redirect) { + .atom => |*atom| { + const path = expandAtom(ctx, atom); + if (path.len > 0) { + if (cmd.redirect.stdin) { + info.stdin_path = path; + } else { + const flags = if (cmd.redirect.append) Permission.APPEND else Permission.CREATE_TRUNC; + // Handle stdout and stderr separately + if (cmd.redirect.stdout and cmd.redirect.stderr) { + // &> or similar - both go to same file + info.stdout_path = path; + info.stdout_flags = flags; + // Also set stderr to same path (duplicate the path) + info.stderr_path = bun.handleOom(ctx.allocator.dupe(u8, path)); + info.stderr_flags = flags; + } else if (cmd.redirect.stdout) { + info.stdout_path = path; + info.stdout_flags = flags; + } else if (cmd.redirect.stderr) { + info.stderr_path = path; + info.stderr_flags = flags; + } else { + ctx.allocator.free(path); + } + } + } else { + ctx.allocator.free(path); + } + }, + .jsbuf => { + // JS buffer redirections don't involve file paths + }, + } + } + + return info; +} + +// ============================================================================= +// Expansion (simplified for tracing) +// ============================================================================= + +/// Expand an atom, potentially returning multiple strings due to brace expansion. +/// Returns a list of expanded strings. Caller owns the memory. +fn expandAtomMultiple(ctx: *TraceContext, atom: *const ast.Atom) std.array_list.Managed([]const u8) { + var result = std.array_list.Managed(u8).init(ctx.allocator); + var has_braces = false; + + switch (atom.*) { + .simple => |*simple| { + if (simple.* == .brace_begin) has_braces = true; + expandSimple(ctx, simple, &result); + }, + .compound => |compound| { + for (compound.atoms) |*simple| { + if (simple.* == .brace_begin) has_braces = true; + expandSimple(ctx, simple, &result); + } + }, + } + + const expanded_str = result.toOwnedSlice() catch ""; + + // If there are braces, expand them + if (has_braces and expanded_str.len > 0) { + const expanded = expandBraces(ctx, expanded_str); + ctx.allocator.free(expanded_str); + return expanded; + } + + // No braces - return single result + var out = std.array_list.Managed([]const u8).init(ctx.allocator); + if (expanded_str.len > 0) { + bun.handleOom(out.append(expanded_str)); + } else { + ctx.allocator.free(expanded_str); + } + return out; +} + +/// Expand brace patterns like {a,b,c} into multiple strings +fn expandBraces(ctx: *TraceContext, input: []const u8) std.array_list.Managed([]const u8) { + // Use the shared brace expansion helper + const unmanaged = Braces.expandBracesAlloc(input, ctx.allocator); + return .{ .items = unmanaged.items, .capacity = unmanaged.capacity, .allocator = ctx.allocator }; +} + +/// Expand glob patterns like *.txt into matching file paths +fn expandGlobs(ctx: *TraceContext, patterns: *std.array_list.Managed([]const u8)) void { + var i: usize = 0; + while (i < patterns.items.len) { + const pattern = patterns.items[i]; + + // Check if this pattern contains glob syntax + if (!bun.glob.detectGlobSyntax(pattern)) { + i += 1; + continue; + } + + // This pattern has glob syntax - expand it + var arena = std.heap.ArenaAllocator.init(ctx.allocator); + defer arena.deinit(); + + var walker: GlobWalker = .{}; + const init_result = walker.initWithCwd( + &arena, + pattern, + ctx.cwdSlice(), + false, // dot + true, // absolute (return absolute paths) + false, // follow_symlinks + false, // error_on_broken_symlinks + false, // only_files (include directories too) + ) catch { + i += 1; + continue; + }; + + switch (init_result) { + .err => { + i += 1; + continue; + }, + .result => {}, + } + + var iter: GlobWalker.Iterator = .{ .walker = &walker }; + const iter_init = iter.init() catch { + i += 1; + continue; + }; + switch (iter_init) { + .err => { + i += 1; + continue; + }, + .result => {}, + } + + // Collect all matched paths + var matched_paths = std.array_list.Managed([]const u8).init(ctx.allocator); + while (true) { + const next_result = iter.next() catch break; + switch (next_result) { + .err => break, + .result => |maybe_path| { + if (maybe_path) |path| { + // Dupe the path since it's owned by the arena + const duped = ctx.allocator.dupe(u8, path) catch break; + matched_paths.append(duped) catch { + ctx.allocator.free(duped); + break; + }; + } else { + // No more matches + break; + } + }, + } + } + + // If we found matches, replace the pattern with matched paths + if (matched_paths.items.len > 0) { + // Free the original pattern + ctx.allocator.free(pattern); + + // Remove the pattern from the list + _ = patterns.orderedRemove(i); + + // Insert all matched paths at position i + for (matched_paths.items) |matched_path| { + patterns.insert(i, matched_path) catch { + ctx.allocator.free(matched_path); + continue; + }; + i += 1; + } + matched_paths.deinit(); + } else { + // No matches - keep original pattern + matched_paths.deinit(); + i += 1; + } + } +} + +/// Expand an atom to a single string (for backward compatibility). +/// For brace expansions, only returns the first result. +fn expandAtom(ctx: *TraceContext, atom: *const ast.Atom) []const u8 { + var results = expandAtomMultiple(ctx, atom); + defer { + // Free all but the first + if (results.items.len > 1) { + for (results.items[1..]) |s| { + ctx.allocator.free(s); + } + } + results.deinit(); + } + + if (results.items.len > 0) { + return results.items[0]; + } + return bun.handleOom(ctx.allocator.dupe(u8, "")); +} + +fn expandSimple(ctx: *TraceContext, simple: *const ast.SimpleAtom, out: *std.array_list.Managed(u8)) void { + switch (simple.*) { + .Text => |text| { + bun.handleOom(out.appendSlice(text)); + }, + .Var => |varname| { + if (ctx.getVar(varname)) |val| { + bun.handleOom(out.appendSlice(val)); + } + }, + .VarArgv => { + // Special variables like $1, $@, etc. depend on runtime args + ctx.current_dynamic = true; + }, + .cmd_subst => { + // Command substitutions can't be statically analyzed + // Mark as dynamic and skip the actual substitution + ctx.current_dynamic = true; + }, + .asterisk => { + // Glob pattern - output as literal for tracing + bun.handleOom(out.appendSlice("*")); + }, + .double_asterisk => { + // Glob pattern - output as literal for tracing + bun.handleOom(out.appendSlice("**")); + }, + .brace_begin => { + bun.handleOom(out.appendSlice("{")); + }, + .brace_end => { + bun.handleOom(out.appendSlice("}")); + }, + .comma => { + bun.handleOom(out.appendSlice(",")); + }, + .tilde => { + // Expand tilde to home directory + if (ctx.getVar("HOME")) |home| { + bun.handleOom(out.appendSlice(home)); + } else { + bun.handleOom(out.appendSlice("~")); + } + }, + } +} + +// ============================================================================= +// Public API +// ============================================================================= + +/// Trace a shell script and return the trace result +pub fn trace( + allocator: Allocator, + shargs: *ShellArgs, + jsobjs: []JSValue, + export_env: ?*EnvMap, + cwd: ?[]const u8, + globalThis: *JSGlobalObject, +) TraceResult { + // Get current working directory + var cwd_buf: bun.PathBuffer = undefined; + const current_cwd = cwd orelse brk: { + const result = bun.sys.getcwdZ(&cwd_buf); + switch (result) { + .result => |c| break :brk c[0..c.len], + .err => break :brk "/", + } + }; + + var result = TraceResult.init(allocator, current_cwd); + var ctx = TraceContext.init(allocator, &result, current_cwd, export_env, jsobjs, globalThis); + defer ctx.deinit(); + + traceScript(&ctx, &shargs.script_ast); + + return result; +} + +/// JavaScript-callable function to trace a shell script +pub fn traceShellScript(globalThis: *JSGlobalObject, callframe: *jsc.CallFrame) bun.JSError!JSValue { + const allocator = bun.default_allocator; + const parsed_shell_script_js = callframe.argumentsAsArray(1)[0]; + if (parsed_shell_script_js.isUndefined()) { + return globalThis.throw("trace: expected a ParsedShellScript", .{}); + } + + const parsed_shell_script = jsc.Codegen.JSParsedShellScript.fromJS(parsed_shell_script_js) orelse { + return globalThis.throw("trace: expected a ParsedShellScript", .{}); + }; + + if (parsed_shell_script.args == null) { + return globalThis.throw("trace: shell args is null", .{}); + } + + const shargs = parsed_shell_script.args.?; + const jsobjs = parsed_shell_script.jsobjs.items; + + // Get cwd from parsed script if set + var cwd_utf8: ?bun.ZigString.Slice = null; + defer if (cwd_utf8) |*utf8| utf8.deinit(); + + const cwd_slice: ?[]const u8 = if (parsed_shell_script.cwd) |c| blk: { + cwd_utf8 = c.toUTF8(bun.default_allocator); + break :blk cwd_utf8.?.slice(); + } else null; + + var result = trace( + allocator, + shargs, + jsobjs, + if (parsed_shell_script.export_env != null) &parsed_shell_script.export_env.? else null, + cwd_slice, + globalThis, + ); + defer result.deinit(); + + return result.toJS(globalThis); +} + +const std = @import("std"); +const Allocator = std.mem.Allocator; + +const bun = @import("bun"); +const ResolvePath = bun.path; +const which = bun.which; +const GlobWalker = bun.glob.BunGlobWalker; + +const jsc = bun.jsc; +const JSGlobalObject = jsc.JSGlobalObject; +const JSValue = jsc.JSValue; + +const shell = bun.shell; +const EnvMap = shell.EnvMap; +const EnvStr = shell.EnvStr; +const Interpreter = shell.Interpreter; +const ast = shell.AST; + +const Braces = shell.interpret.Braces; +const ShellArgs = shell.interpret.ShellArgs; diff --git a/src/shell/braces.zig b/src/shell/braces.zig index 3f9ef5cf6c..832adab71a 100644 --- a/src/shell/braces.zig +++ b/src/shell/braces.zig @@ -723,6 +723,68 @@ test Lexer { } } +/// High-level helper that expands brace patterns in a string. +/// Returns a list of expanded strings. Caller owns the returned memory. +/// On error or if no expansion is needed, returns the input as a single-element list. +pub fn expandBracesAlloc(input: []const u8, allocator: Allocator) std.ArrayListUnmanaged([]const u8) { + var out: std.ArrayListUnmanaged([]const u8) = .{}; + + // Use arena for temporary tokenization + var arena = std.heap.ArenaAllocator.init(allocator); + defer arena.deinit(); + const arena_alloc = arena.allocator(); + + // Tokenize - use appropriate lexer based on content + const lexer_output = if (bun.strings.isAllASCII(input)) + Lexer.tokenize(arena_alloc, input) catch { + out.append(allocator, allocator.dupe(u8, input) catch return out) catch {}; + return out; + } + else + NewLexer(.wtf8).tokenize(arena_alloc, input) catch { + out.append(allocator, allocator.dupe(u8, input) catch return out) catch {}; + return out; + }; + + const expansion_count = calculateExpandedAmount(lexer_output.tokens.items[0..]); + if (expansion_count == 0) { + out.append(allocator, allocator.dupe(u8, input) catch return out) catch {}; + return out; + } + + // Allocate expanded strings + const expanded_strings = arena_alloc.alloc(std.array_list.Managed(u8), expansion_count) catch { + out.append(allocator, allocator.dupe(u8, input) catch return out) catch {}; + return out; + }; + + for (0..expansion_count) |i| { + expanded_strings[i] = std.array_list.Managed(u8).init(allocator); + } + + // Perform brace expansion + expand( + arena_alloc, + lexer_output.tokens.items[0..], + expanded_strings, + lexer_output.contains_nested, + ) catch { + for (expanded_strings) |*s| s.deinit(); + out.append(allocator, allocator.dupe(u8, input) catch return out) catch {}; + return out; + }; + + // Collect results + for (expanded_strings) |*s| { + const slice = s.toOwnedSlice() catch ""; + if (slice.len > 0) { + out.append(allocator, slice) catch {}; + } + } + + return out; +} + const SmolStr = @import("../string.zig").SmolStr; const Encoding = @import("./shell.zig").StringEncoding; diff --git a/src/shell/shell.zig b/src/shell/shell.zig index 92bf54c9cb..83e7209485 100644 --- a/src/shell/shell.zig +++ b/src/shell/shell.zig @@ -2,6 +2,7 @@ pub const interpret = @import("./interpreter.zig"); pub const subproc = @import("./subproc.zig"); pub const AllocScope = @import("./AllocScope.zig"); +pub const TraceInterpreter = @import("./TraceInterpreter.zig"); pub const EnvMap = interpret.EnvMap; pub const EnvStr = interpret.EnvStr; diff --git a/test/js/bun/shell/trace.test.ts b/test/js/bun/shell/trace.test.ts new file mode 100644 index 0000000000..41808088be --- /dev/null +++ b/test/js/bun/shell/trace.test.ts @@ -0,0 +1,417 @@ +import { $ } from "bun"; +import { describe, expect, test } from "bun:test"; +import { tempDir } from "harness"; + +// Normalize path separators for cross-platform tests +const normalizePath = (p: string) => p.replaceAll("\\", "/"); + +// Permission flags (octal) - mirrors the Zig constants +const Permission = { + O_RDONLY: 0o0, + O_WRONLY: 0o1, + O_RDWR: 0o2, + O_CREAT: 0o100, + O_EXCL: 0o200, + O_TRUNC: 0o1000, + O_APPEND: 0o2000, + X_OK: 0o100000, + DELETE: 0o200000, + MKDIR: 0o400000, + CHDIR: 0o1000000, + ENV: 0o2000000, +} as const; + +// Convenience combinations +const READ = Permission.O_RDONLY; +const WRITE = Permission.O_WRONLY; +const CREATE = Permission.O_CREAT | Permission.O_WRONLY; +const CREATE_TRUNC = Permission.O_CREAT | Permission.O_TRUNC | Permission.O_WRONLY; +const APPEND = Permission.O_APPEND | Permission.O_WRONLY; +const EXECUTE = Permission.X_OK; + +describe("Bun.$.trace", () => { + test("returns trace result object", () => { + const result = $.trace`echo hello`; + expect(result).toHaveProperty("operations"); + expect(result).toHaveProperty("cwd"); + expect(result).toHaveProperty("success"); + expect(result).toHaveProperty("error"); + expect(result.success).toBe(true); + expect(result.error).toBeNull(); + expect(Array.isArray(result.operations)).toBe(true); + }); + + test("traces echo command (builtin, no file access)", () => { + const result = $.trace`echo hello world`; + expect(result.success).toBe(true); + + // echo is a builtin that runs in-process - no file access, no operations + // It just writes to stdout (terminal) which doesn't require any permissions + expect(result.operations.length).toBe(0); + }); + + test("traces cat command with file read", () => { + const result = $.trace`cat /tmp/test.txt`; + expect(result.success).toBe(true); + + // cat is a builtin - it reads files but runs in-process (no EXECUTE) + const readOps = result.operations.filter(op => op.flags === READ && op.path?.endsWith("test.txt")); + expect(readOps.length).toBe(1); + expect(normalizePath(readOps[0].path!)).toBe("/tmp/test.txt"); + }); + + test("traces rm command with delete permission", () => { + const result = $.trace`rm /tmp/to-delete.txt`; + expect(result.success).toBe(true); + + // Should have delete for the file + const deleteOps = result.operations.filter(op => op.flags === Permission.DELETE); + expect(deleteOps.length).toBe(1); + expect(normalizePath(deleteOps[0].path!)).toBe("/tmp/to-delete.txt"); + }); + + test("traces mkdir command", () => { + const result = $.trace`mkdir /tmp/newdir`; + expect(result.success).toBe(true); + + // Should have mkdir permission + const mkdirOps = result.operations.filter(op => op.flags === Permission.MKDIR); + expect(mkdirOps.length).toBe(1); + expect(normalizePath(mkdirOps[0].path!)).toBe("/tmp/newdir"); + }); + + test("traces touch command with create permission", () => { + const result = $.trace`touch /tmp/newfile.txt`; + expect(result.success).toBe(true); + + // Should have create permission + const createOps = result.operations.filter(op => op.flags === CREATE); + expect(createOps.length).toBe(1); + expect(normalizePath(createOps[0].path!)).toBe("/tmp/newfile.txt"); + }); + + test("traces cp command with read and write", () => { + const result = $.trace`cp /tmp/src.txt /tmp/dst.txt`; + expect(result.success).toBe(true); + + // Should have read for source + const readOps = result.operations.filter(op => op.flags === READ && op.path?.endsWith("src.txt")); + expect(readOps.length).toBe(1); + + // Should have create for destination + const writeOps = result.operations.filter(op => op.flags === CREATE && op.path?.endsWith("dst.txt")); + expect(writeOps.length).toBe(1); + }); + + test("traces mv command with read, delete, and write", () => { + const result = $.trace`mv /tmp/old.txt /tmp/new.txt`; + expect(result.success).toBe(true); + + // Should have read+delete for source (combined in one operation) + const srcOps = result.operations.filter( + op => op.flags === (READ | Permission.DELETE) && op.path?.endsWith("old.txt"), + ); + expect(srcOps.length).toBe(1); + + // Should have create for destination + const dstOps = result.operations.filter(op => op.flags === CREATE && op.path?.endsWith("new.txt")); + expect(dstOps.length).toBe(1); + }); + + test("traces cd command with chdir permission", () => { + const result = $.trace`cd /tmp`; + expect(result.success).toBe(true); + + const chdirOps = result.operations.filter(op => op.flags === Permission.CHDIR); + expect(chdirOps.length).toBe(1); + expect(normalizePath(chdirOps[0].path!)).toBe("/tmp"); + }); + + test("traces environment variable assignments with accumulated env", () => { + const result = $.trace`FOO=1 BAR=2 echo test`; + expect(result.success).toBe(true); + + const envOps = result.operations.filter(op => op.flags === Permission.ENV); + expect(envOps.length).toBe(2); + // First op has FOO + expect(envOps[0].env).toEqual({ FOO: "1" }); + // Second op has both FOO and BAR + expect(envOps[1].env?.FOO).toBe("1"); + expect(envOps[1].env?.BAR).toBe("2"); + }); + + test("traces export with env values", () => { + const result = $.trace`export FOO=hello BAR=world`; + expect(result.success).toBe(true); + + const envOps = result.operations.filter(op => op.flags === Permission.ENV); + expect(envOps.length).toBe(1); + expect(envOps[0].env?.FOO).toBe("hello"); + expect(envOps[0].env?.BAR).toBe("world"); + }); + + test("traces output redirection combined with command", () => { + const result = $.trace`echo hello > /tmp/output.txt`; + expect(result.success).toBe(true); + + // echo is a builtin - redirect creates the output file (CREATE_TRUNC, no EXECUTE) + const redirectOps = result.operations.filter(op => op.flags === CREATE_TRUNC && op.path?.endsWith("output.txt")); + expect(redirectOps.length).toBe(1); + }); + + test("traces append redirection combined with command", () => { + const result = $.trace`echo hello >> /tmp/append.txt`; + expect(result.success).toBe(true); + + // echo is a builtin - append redirect opens file for appending (no EXECUTE) + const appendOps = result.operations.filter(op => op.flags === APPEND && op.path?.endsWith("append.txt")); + expect(appendOps.length).toBe(1); + }); + + test("traces input redirection with read and stdin stream", () => { + const result = $.trace`cat < /tmp/input.txt`; + expect(result.success).toBe(true); + + // Should have read for input file with stdin stream marker + const stdinOps = result.operations.filter( + op => op.flags === READ && op.path?.endsWith("input.txt") && op.stream === "stdin", + ); + expect(stdinOps.length).toBe(1); + }); + + test("traces stderr redirection with stream marker", () => { + const result = $.trace`cat /nonexistent 2> /tmp/err.txt`; + expect(result.success).toBe(true); + + // Should have stderr stream for error redirect + const stderrOps = result.operations.filter(op => op.stream === "stderr" && op.path?.endsWith("err.txt")); + expect(stderrOps.length).toBe(1); + expect(stderrOps[0].flags).toBe(CREATE_TRUNC); + }); + + test("stdout redirect has stream marker", () => { + const result = $.trace`echo hello > /tmp/out.txt`; + expect(result.success).toBe(true); + + const stdoutOps = result.operations.filter(op => op.stream === "stdout"); + expect(stdoutOps.length).toBe(1); + expect(normalizePath(stdoutOps[0].path!)).toBe("/tmp/out.txt"); + }); + + test("traces export command with env permission", () => { + const result = $.trace`export FOO=bar`; + expect(result.success).toBe(true); + + const envOps = result.operations.filter(op => op.flags === Permission.ENV); + expect(envOps.length).toBeGreaterThan(0); + }); + + test("traces variable assignment with env permission", () => { + const result = $.trace`FOO=bar echo $FOO`; + expect(result.success).toBe(true); + + const envOps = result.operations.filter(op => op.flags === Permission.ENV); + expect(envOps.length).toBeGreaterThan(0); + }); + + test("traces pipeline", () => { + const result = $.trace`cat /tmp/file.txt | grep pattern`; + expect(result.success).toBe(true); + + // cat is a builtin - reads file (no EXECUTE, no command field) + const readOps = result.operations.filter(op => op.flags === READ && op.path?.endsWith("file.txt")); + expect(readOps.length).toBe(1); + + // grep is external, should have execute permission and command field + const grepOps = result.operations.filter(op => op.command === "grep" && (op.flags & EXECUTE) !== 0); + expect(grepOps.length).toBe(1); + }); + + test("traces ls with directory read", () => { + const result = $.trace`ls /tmp`; + expect(result.success).toBe(true); + + const readOps = result.operations.filter(op => op.flags === READ && normalizePath(op.path || "") === "/tmp"); + expect(readOps.length).toBe(1); + }); + + test("traces ls without args (current dir)", () => { + const result = $.trace`ls`; + expect(result.success).toBe(true); + + // Should read current directory (.) + const readOps = result.operations.filter(op => op.flags === READ); + expect(readOps.length).toBe(1); + }); + + test("includes cwd in result", () => { + const result = $.trace`echo test`; + expect(result.cwd).toBeTruthy(); + expect(typeof result.cwd).toBe("string"); + }); + + test("includes cwd in each operation", () => { + const result = $.trace`cat /tmp/test.txt`; + for (const op of result.operations) { + expect(op.cwd).toBeTruthy(); + expect(typeof op.cwd).toBe("string"); + } + }); + + test("handles template literal interpolation", () => { + const filename = "test.txt"; + const result = $.trace`cat /tmp/${filename}`; + expect(result.success).toBe(true); + + const readOps = result.operations.filter(op => op.flags === READ && op.path?.endsWith("test.txt")); + expect(readOps.length).toBe(1); + }); + + test("does not actually execute commands", () => { + // This would fail if it actually ran, since the file doesn't exist + const result = $.trace`cat /nonexistent/path/that/does/not/exist.txt`; + expect(result.success).toBe(true); + expect(result.operations.length).toBeGreaterThan(0); + }); + + test("external command resolves path when available", () => { + // Use a cross-platform external command + const cmd = process.platform === "win32" ? "cmd" : "/bin/ls"; + const result = $.trace`${cmd} --version`; + expect(result.success).toBe(true); + + const execOps = result.operations.filter(op => op.flags === EXECUTE); + expect(execOps.length).toBeGreaterThan(0); + // Command name should be captured + expect(execOps[0].command).toBe(cmd); + }); + + test("external commands include args array", () => { + const result = $.trace`grep -r 'pattern' src/`; + expect(result.success).toBe(true); + + const execOps = result.operations.filter(op => op.flags === EXECUTE); + expect(execOps.length).toBe(1); + expect(execOps[0].command).toBe("grep"); + expect(execOps[0].args).toEqual(["-r", "pattern", "src/"]); + }); + + test("pipeline commands each have their own args", () => { + const result = $.trace`git diff HEAD^ -- src/ | head -100`; + expect(result.success).toBe(true); + + const execOps = result.operations.filter(op => op.flags === EXECUTE); + expect(execOps.length).toBe(2); + + expect(execOps[0].command).toBe("git"); + expect(execOps[0].args).toEqual(["diff", "HEAD^", "--", "src/"]); + + expect(execOps[1].command).toBe("head"); + expect(execOps[1].args).toEqual(["-100"]); + }); + + test("builtins do not have args (tracked as file operations)", () => { + const result = $.trace`cat file1.txt file2.txt`; + expect(result.success).toBe(true); + + // Builtins track files, not args + const readOps = result.operations.filter(op => op.flags === READ); + expect(readOps.length).toBe(2); + expect(readOps[0].args).toBeUndefined(); + expect(readOps[1].args).toBeUndefined(); + }); + + test("traces && (and) operator", () => { + const result = $.trace`cat /tmp/a.txt && cat /tmp/b.txt`; + expect(result.success).toBe(true); + + // Both commands should be traced + const readOps = result.operations.filter(op => op.flags === READ); + expect(readOps.length).toBe(2); + expect(normalizePath(readOps[0].path!)).toBe("/tmp/a.txt"); + expect(normalizePath(readOps[1].path!)).toBe("/tmp/b.txt"); + }); + + test("traces || (or) operator", () => { + const result = $.trace`cat /tmp/a.txt || cat /tmp/b.txt`; + expect(result.success).toBe(true); + + // Both commands should be traced + const readOps = result.operations.filter(op => op.flags === READ); + expect(readOps.length).toBe(2); + }); + + test("traces subshell with cwd isolation", () => { + const result = $.trace`(cd /tmp && ls) && ls`; + expect(result.success).toBe(true); + + // Should have: CHDIR /tmp, READ /tmp (inside subshell), READ . (outside subshell) + const chdirOps = result.operations.filter(op => op.flags === Permission.CHDIR); + expect(chdirOps.length).toBe(1); + expect(normalizePath(chdirOps[0].path!)).toBe("/tmp"); + + const readOps = result.operations.filter(op => op.flags === READ); + expect(readOps.length).toBe(2); + // First ls inside subshell should see /tmp + expect(normalizePath(readOps[0].cwd!)).toBe("/tmp"); + // Second ls outside subshell should see original cwd (subshell cwd is restored) + expect(normalizePath(readOps[1].cwd!)).not.toBe("/tmp"); + }); + + test("cd updates cwd for subsequent commands", () => { + const result = $.trace`cd /tmp && ls`; + expect(result.success).toBe(true); + + const readOps = result.operations.filter(op => op.flags === READ); + expect(readOps.length).toBe(1); + expect(normalizePath(readOps[0].cwd!)).toBe("/tmp"); + expect(normalizePath(readOps[0].path!)).toBe("/tmp"); // ls reads cwd + }); + + test("expands brace patterns", () => { + const result = $.trace`cat /tmp/{a,b,c}.txt`; + expect(result.success).toBe(true); + + const readOps = result.operations.filter(op => op.flags === READ); + expect(readOps.length).toBe(3); + expect(normalizePath(readOps[0].path!)).toBe("/tmp/a.txt"); + expect(normalizePath(readOps[1].path!)).toBe("/tmp/b.txt"); + expect(normalizePath(readOps[2].path!)).toBe("/tmp/c.txt"); + }); + + test("expands tilde to home directory", () => { + const result = $.trace`cat ~/.config/test.txt`; + expect(result.success).toBe(true); + + const readOps = result.operations.filter(op => op.flags === READ); + expect(readOps.length).toBe(1); + expect(readOps[0].path).not.toContain("~"); + // Home directory path varies by platform + if (process.platform === "win32") { + // Windows uses USERPROFILE which expands to something like C:\Users\username + expect(readOps[0].path).toMatch(/\.config[/\\]test\.txt$/); + } else { + expect(readOps[0].path).toContain(".config/test.txt"); + } + }); + + test("expands glob patterns to matching files", () => { + // Create test files for glob expansion using tempDir helper + const { join } = require("path"); + using dir = tempDir("trace-glob-test", { + "a.txt": "", + "b.txt": "", + "c.txt": "", + }); + const testDir = String(dir); + + const result = $.trace`cat ${testDir}/*.txt`; + expect(result.success).toBe(true); + + const readOps = result.operations.filter(op => op.flags === READ); + expect(readOps.length).toBe(3); + const paths = readOps.map(op => normalizePath(op.path!)).sort(); + const expected = [join(testDir, "a.txt"), join(testDir, "b.txt"), join(testDir, "c.txt")].map(normalizePath); + expect(paths).toEqual(expected); + }); +});