Compare commits

...

5 Commits

Author SHA1 Message Date
Zack Radisic
41998360bd use .asArrayBuffer() 2025-04-03 13:19:37 -07:00
Zack Radisic
2e9c9cba02 more test 2025-04-03 12:43:45 -07:00
Zack Radisic
69f1331f7b wip 2025-04-02 17:22:34 -07:00
Zack Radisic
d068b1c0b7 okay 2025-04-01 16:55:50 -07:00
Zack Radisic
c54bebc79d wip 2025-03-31 18:02:38 -07:00
13 changed files with 409 additions and 46 deletions

15
.vscode/launch.json generated vendored
View File

@@ -146,6 +146,21 @@
"action": "openExternally",
},
},
{
"type": "lldb",
"request": "launch",
"name": "ZACK",
"program": "${workspaceFolder}/build/debug/bun-debug",
"args": ["./index.ts"],
"cwd": "/Users/zackradisic/Code/dd-trace-bun",
"env": {
"FORCE_COLOR": "0",
"BUN_DEBUG_QUIET_LOGS": "1",
"BUN_GARBAGE_COLLECTOR_LEVEL": "2",
},
"console": "internalConsole",
// Don't pause when the GC runs while the debugger is open.
},
// bun run [file]
{
"type": "lldb",

21
index.ts Normal file
View File

@@ -0,0 +1,21 @@
import { $ } from "bun";
import util from "node:util";
let foo = "hi";
console.log(util.inspect(foo));
// await $`ls fljsdklfjslkdfj`.quiet();
// Options:
// 1. make it so that VirtualMachine.printErrorInstance (javascript.zig) knows how to check if an error instance is a ShellError (right now it skips custom inspect)
// 2. make the shell error not actually an Error but just an obejct
// try {
// await $`ls fljsdklfjslkdfj`.throws(true).quiet();
// } catch (e) {
// // e[Bun.inspect.custom] = () => "LOL";
// console.log(e);
// console.log(Object.getOwnPropertyNames(e.stdout));
// }
await $`ls fljsdklfjslkdfj`.throws(true).quiet();

View File

@@ -22,6 +22,10 @@ export default [
fn: "setQuiet",
length: 0,
},
getQuiet: {
fn: "getQuiet",
length: 0,
},
},
}),
];

View File

@@ -148,6 +148,7 @@ export default [
},
aborted: {
getter: "getAborted",
passThis: true,
},
flags: {
getter: "getFlags",

View File

@@ -337,7 +337,14 @@ JSValue constructBunFetchObject(VM& vm, JSObject* bunObject)
return fetchFn;
}
static JSValue constructBunShell(VM& vm, JSObject* bunObject)
/// WARNING: you must check that the JSObject* comes from an error instance
extern "C" bool ShellError__isShellError(JSGlobalObject* globalObject, JSC::JSObject* jsObject)
{
return jsObject->hasProperty(globalObject, WebCore::builtinNames(globalObject->vm()).napiDlopenHandlePrivateName());
}
static JSValue
constructBunShell(VM& vm, JSObject* bunObject)
{
auto* globalObject = jsCast<Zig::GlobalObject*>(bunObject->globalObject());
JSFunction* createParsedShellScript = JSFunction::create(vm, bunObject->globalObject(), 2, "createParsedShellScript"_s, BunObject_callback_createParsedShellScript, ImplementationVisibility::Private, NoIntrinsic);
@@ -896,7 +903,6 @@ static void exportBunObject(JSC::VM& vm, JSC::JSGlobalObject* globalObject, JSC:
exportValues.append(value);
}
}
}
namespace Zig {

View File

@@ -1865,6 +1865,29 @@ bool inline parseArrayIndex(JSC::ThrowScope& scope, JSC::JSGlobalObject* globalO
return true;
}
extern "C" const unsigned char* jsBufferGetBytes(JSC::JSGlobalObject* globalObject, JSC::EncodedJSValue value, uint32_t* lenOut, bool* failed)
{
auto& vm = JSC::getVM(globalObject);
auto throwScope = DECLARE_THROW_SCOPE(vm);
auto jsvalue = JSValue::decode(value);
if (jsvalue.isUndefinedOrNull()) {
throwTypeError(globalObject, throwScope, "Cannot convert undefined or null to object"_s);
*failed = true;
return nullptr;
}
auto thisObject = JSC::jsDynamicCast<JSC::JSUint8Array*>(jsvalue);
if (UNLIKELY(!thisObject)) {
throwTypeError(globalObject, throwScope, "Not a JSBuffer"_s);
*failed = true;
return nullptr;
}
*lenOut = thisObject->byteLength();
return thisObject->span().data();
}
// https://github.com/nodejs/node/blob/v22.9.0/lib/buffer.js#L834
// using byteLength and byte offsets here is intentional
static JSC::EncodedJSValue jsBufferPrototypeFunction_toStringBody(JSC::JSGlobalObject* lexicalGlobalObject, JSC::CallFrame* callFrame, typename IDLOperation<JSArrayBufferView>::ClassParameter castedThis)

View File

@@ -4027,6 +4027,10 @@ pub const VirtualMachine = struct {
}
if (is_error_instance) {
// SAFETY: error instances are always objects
const error_obj = error_instance.getObject().?;
const is_shell_error = if (mode == .js) bun.shell.isShellError(this.global, error_obj) else false;
var saw_cause = false;
const Iterator = JSC.JSPropertyIterator(.{
.include_value = true,
@@ -4035,8 +4039,6 @@ pub const VirtualMachine = struct {
.observable = false,
.only_non_index_properties = true,
});
// SAFETY: error instances are always objects
const error_obj = error_instance.getObject().?;
var iterator = try Iterator.init(this.global, error_obj);
defer iterator.deinit();
const longest_name = @min(iterator.getLongestPropertyName(), 10);
@@ -4077,7 +4079,8 @@ pub const VirtualMachine = struct {
}
formatter.max_depth = 1;
formatter.quote_strings = true;
formatter.disable_inspect_custom = true;
const disable_inspect_custom = !is_shell_error;
formatter.disable_inspect_custom = disable_inspect_custom;
const pad_left = longest_name -| field.length();
is_first_property = false;
@@ -4096,7 +4099,7 @@ pub const VirtualMachine = struct {
JSC.Formatter.Tag.getAdvanced(
value,
this.global,
.{ .disable_inspect_custom = true, .hide_global = true },
.{ .disable_inspect_custom = disable_inspect_custom, .hide_global = true },
),
Writer,
writer,

View File

@@ -217,6 +217,103 @@ pub const Encoder = struct {
return bun_string.transferToJS(global);
}
/// Used internally for the shell, assumes utf-8 strings
///
/// If the stdout/stderr Buffer is really long, we don't want to show the
/// entire string to the user so we'll display max 256 chars
///
/// But we don't want to call `stderr.toString().substring(0, 256)` because
/// that will decode the entire buffer (and it could be huge)
///
/// So this function takes as `input` a truncated byte array (e.g. bufferBytes[0 .. 256])
///
/// By truncating the byte array, we may accidentally cut off some bytes in a multi-byte char
/// encoding. So we'll skip those so we don't print out invalid UTF-8
///
/// In the case that it's not valid utf-8 we just fallback
pub fn toBunStringMaxCharsBestEffortForShell(input: []const u8) bun.String {
if (input.len == 0) return bun.String.empty;
bun.assert(input.len > 0); // don't call this function with an empty string
var last_index: i64 = @intCast(input.len - 1);
var allow_rollback = true;
outer_loop: while (last_index >= 0) {
switch (input[@bitCast(last_index)]) {
// ending is ascii and so it's good
0x0...0x7F => return toBunStringMaxCharsBestEffortForShellImpl(input[0..@bitCast(last_index + 1)]),
// we hit a continuation byte, there are two cases:
// 1. Valid string -> we hit the last continuation byte in a multi-byte sequence
// 2. Invalid string -> we accidentally cut off some continuation bytes
0x80...0xBF => {
var continuation_bytes_count: u32 = 1;
var j = last_index - 1;
while (j >= 0) {
if (continuation_bytes_count > 3) return toBunStringMaxCharsBestEffortForShellHandleInvalidUTF8(input);
switch (input[@bitCast(j)]) {
0x80...0xBF => {
continuation_bytes_count += 1;
j -= 1;
},
0xC0...0xDF => {
if (continuation_bytes_count == 1) return toBunStringMaxCharsBestEffortForShellImpl(input[0..@bitCast(last_index + 1)]);
if (allow_rollback) last_index = j - 1 else return toBunStringMaxCharsBestEffortForShellHandleInvalidUTF8(input[0..@bitCast(j)]);
allow_rollback = false;
continue :outer_loop;
},
0xE0...0xEF => {
if (continuation_bytes_count == 2) return toBunStringMaxCharsBestEffortForShellImpl(input[0..@bitCast(last_index + 1)]);
if (allow_rollback) last_index = j - 1 else return toBunStringMaxCharsBestEffortForShellHandleInvalidUTF8(input[0..@bitCast(j)]);
allow_rollback = false;
continue :outer_loop;
},
0xF0...0xF7 => {
if (continuation_bytes_count == 3) return toBunStringMaxCharsBestEffortForShellImpl(input[0..@bitCast(last_index + 1)]);
if (allow_rollback) last_index = j - 1 else return toBunStringMaxCharsBestEffortForShellHandleInvalidUTF8(input[0..@bitCast(j)]);
allow_rollback = false;
continue :outer_loop;
},
// invalid utf-8
else => {
return toBunStringMaxCharsBestEffortForShellHandleInvalidUTF8(input);
},
}
}
// didn't find the starting byte and looked through the whole string,
// means invalid
return toBunStringMaxCharsBestEffortForShellHandleInvalidUTF8(input);
},
// first byte of a 2 byte encoding
0xC0...0xDF => {
if (allow_rollback) last_index -= 1 else return toBunStringMaxCharsBestEffortForShellHandleInvalidUTF8(input[0..@bitCast(last_index)]);
allow_rollback = false;
},
// first byte of a 3 byte encoding
0xE0...0xEF => {
if (allow_rollback) last_index -= 1 else return toBunStringMaxCharsBestEffortForShellHandleInvalidUTF8(input[0..@bitCast(last_index)]);
allow_rollback = false;
},
// first byte of a 4 byte encoding
0xF0...0xF7 => {
if (allow_rollback) last_index -= 1 else return toBunStringMaxCharsBestEffortForShellHandleInvalidUTF8(input[0..@bitCast(last_index)]);
allow_rollback = false;
},
// invalid utf-8
else => {
return toBunStringMaxCharsBestEffortForShellHandleInvalidUTF8(input[0..@bitCast(last_index)]);
},
}
}
return toBunStringMaxCharsBestEffortForShellHandleInvalidUTF8(input);
}
fn toBunStringMaxCharsBestEffortForShellImpl(input: []const u8) bun.String {
return toBunStringComptime(std.mem.trimRight(u8, input, "\n\r"), .utf8);
}
fn toBunStringMaxCharsBestEffortForShellHandleInvalidUTF8(input: []const u8) bun.String {
return toBunStringComptime(input, .utf8);
}
pub fn toBunString(input: []const u8, encoding: JSC.Node.Encoding) bun.String {
return switch (encoding) {
inline else => |enc| toBunStringComptime(input, enc),
@@ -531,6 +628,39 @@ pub const Encoder = struct {
}
};
extern fn jsBufferGetBytes(*JSC.JSGlobalObject, JSValue, len_out: *u32, failed: *bool) [*]const u8;
pub fn BufferToBunStringMaxCharsBestEffort(g: *JSC.JSGlobalObject) bun.JSError!JSC.JSValue {
return JSC.JSFunction.create(
g,
"BufferToBunStringMaxCharsBestEffort",
struct {
fn impl(global: *JSC.JSGlobalObject, callframe: *JSC.CallFrame) bun.JSError!JSC.JSValue {
const nargs = callframe.argumentsCount();
if (nargs < 1) {
return global.throwNotEnoughArguments("BufferToBunStringMaxCharsBestEffort", 2, callframe.argumentsCount());
}
const buffer = callframe.argument(0);
const max_chars = 256;
if (!buffer.isBuffer(global)) {
return global.throwTypeError("first argument must be a buffer", .{});
}
const array_buffer = buffer.asArrayBuffer(global) orelse {
return global.throwTypeError("first argument must be a buffer", .{});
};
var input = array_buffer.byteSlice();
input = input[0..@min(input.len, max_chars)];
return Encoder.toBunStringMaxCharsBestEffortForShell(input).toJS(global);
}
}.impl,
1,
.{},
);
}
comptime {
_ = &TextEncoder.TextEncoder__encode8;
_ = &TextEncoder.TextEncoder__encode16;

View File

@@ -5,9 +5,20 @@ type ParsedShellScript = any;
type Resolve = (value: ShellOutput) => void;
export function createBunShellTemplateFunction(createShellInterpreter, createParsedShellScript) {
// Small amount for the part that goes `ShellError: ...`
const MAX_MESSAGE_BYTES = 256;
const MAX_INFO_BYTES = 4096;
const BufferToBunStringMaxCharsBestEffort = $zig("encoding.zig", "BufferToBunStringMaxCharsBestEffort");
function lazyBufferToHumanReadableString(this: Buffer) {
if (this.byteLength > MAX_INFO_BYTES)
return `<truncated from ${this.byteLength} bytes> ${BufferToBunStringMaxCharsBestEffort(this, MAX_INFO_BYTES)}`;
return this.toString();
}
function bufferInspect(this: Buffer) {
if (this.byteLength > MAX_INFO_BYTES)
return `<truncated from ${this.byteLength} bytes> ${Bun.inspect(BufferToBunStringMaxCharsBestEffort(this, MAX_INFO_BYTES))}`;
return Bun.inspect(this.toString());
}
class ShellError extends Error {
#output?: ShellOutput = undefined;
@@ -21,7 +32,15 @@ export function createBunShellTemplateFunction(createShellInterpreter, createPar
}
initialize(output: ShellOutput, code: number) {
this.message = `Failed with exit code ${code}`;
// dummy private symbol so we can check if the error is a shell error
this.$napiDlopenHandle = true;
let msg: string =
output.stderr.byteLength > MAX_MESSAGE_BYTES
? `<truncated from ${output.stderr.byteLength} bytes> ${BufferToBunStringMaxCharsBestEffort(output.stderr, MAX_MESSAGE_BYTES)}`
: output.stderr.toString().trimEnd();
this.message = msg;
this.#output = output;
this.name = "ShellError";
@@ -44,6 +63,9 @@ export function createBunShellTemplateFunction(createShellInterpreter, createPar
this.stdout = output.stdout;
this.stderr = output.stderr;
this.exitCode = code;
this.stdout[Bun.inspect.custom] = bufferInspect;
this.stderr[Bun.inspect.custom] = bufferInspect;
}
text(encoding) {

View File

@@ -665,6 +665,10 @@ pub const ParsedShellScript = struct {
return .undefined;
}
pub fn getQuiet(this: *ParsedShellScript, _: *JSGlobalObject, _: *JSC.CallFrame) bun.JSError!JSC.JSValue {
return JSC.JSValue.jsBoolean(this.quiet);
}
pub fn setEnv(this: *ParsedShellScript, globalThis: *JSGlobalObject, callframe: *JSC.CallFrame) bun.JSError!JSC.JSValue {
const value1 = callframe.argument(0).getObject() orelse {
return globalThis.throwInvalidArguments("env must be an object", .{});

View File

@@ -45,6 +45,11 @@ pub const STDERR_FD: bun.FileDescriptor = if (bun.Environment.isWindows) bun.FDI
pub const POSIX_DEV_NULL: [:0]const u8 = "/dev/null";
pub const WINDOWS_DEV_NULL: [:0]const u8 = "NUL";
pub extern fn ShellError__isShellError(globalObject: *JSGlobalObject, value: *JSC.JSObject) bool;
pub fn isShellError(globalObject: *JSC.JSGlobalObject, value: *JSC.JSObject) bool {
return ShellError__isShellError(globalObject, value);
}
/// The strings in this type are allocated with event loop ctx allocator
pub const ShellErr = union(enum) {
sys: JSC.SystemError,
@@ -2381,9 +2386,9 @@ pub fn NewLexer(comptime encoding: StringEncoding) type {
if (self.chars.state == .Single or self.chars.state == .Double) break :escaped;
const whitespace_preceding =
if (self.chars.prev) |prev|
Chars.isWhitespace(prev)
else
true;
Chars.isWhitespace(prev)
else
true;
if (!whitespace_preceding) break :escaped;
try self.break_word(true);
self.eatComment();
@@ -2744,10 +2749,10 @@ pub fn NewLexer(comptime encoding: StringEncoding) type {
) {
const tok: Token =
switch (self.chars.state) {
.Normal => @unionInit(Token, "Text", .{ .start = start, .end = end }),
.Single => @unionInit(Token, "SingleQuotedText", .{ .start = start, .end = end }),
.Double => @unionInit(Token, "DoubleQuotedText", .{ .start = start, .end = end }),
};
.Normal => @unionInit(Token, "Text", .{ .start = start, .end = end }),
.Single => @unionInit(Token, "SingleQuotedText", .{ .start = start, .end = end }),
.Double => @unionInit(Token, "DoubleQuotedText", .{ .start = start, .end = end }),
};
try self.tokens.append(tok);
if (add_delimiter) {
try self.tokens.append(.Delimit);
@@ -2755,39 +2760,40 @@ pub fn NewLexer(comptime encoding: StringEncoding) type {
} else if ((in_normal_space or in_operator) and self.tokens.items.len > 0 and
// whether or not to add a delimiter token
switch (self.tokens.items[self.tokens.items.len - 1]) {
.Var,
.VarArgv,
.Text,
.SingleQuotedText,
.DoubleQuotedText,
.BraceBegin,
.Comma,
.BraceEnd,
.CmdSubstEnd,
.Asterisk,
=> true,
.Var,
.VarArgv,
.Text,
.SingleQuotedText,
.DoubleQuotedText,
.BraceBegin,
.Comma,
.BraceEnd,
.CmdSubstEnd,
.Asterisk,
=> true,
.Pipe,
.DoublePipe,
.Ampersand,
.DoubleAmpersand,
.Redirect,
.Dollar,
.DoubleAsterisk,
.Eq,
.Semicolon,
.Newline,
.CmdSubstBegin,
.CmdSubstQuoted,
.OpenParen,
.CloseParen,
.JSObjRef,
.DoubleBracketOpen,
.DoubleBracketClose,
.Delimit,
.Eof,
=> false,
}) {
.Pipe,
.DoublePipe,
.Ampersand,
.DoubleAmpersand,
.Redirect,
.Dollar,
.DoubleAsterisk,
.Eq,
.Semicolon,
.Newline,
.CmdSubstBegin,
.CmdSubstQuoted,
.OpenParen,
.CloseParen,
.JSObjRef,
.DoubleBracketOpen,
.DoubleBracketClose,
.Delimit,
.Eof,
=> false,
})
{
try self.tokens.append(.Delimit);
self.delimit_quote = false;
}

View File

@@ -0,0 +1,90 @@
export type Kind =
| "ascii-at-end"
| "2-byte-sequence-at-end"
| "3-byte-sequence-at-end"
| "4-byte-sequence-at-end"
| "continuation-byte-at-end"
| "no-over-rollback-3byte"
| "no-over-rollback-4byte"
| "trim-newlines"
| "utf-8-in-the-middle"
| "random";
const kind: Kind = process.argv[2];
let array: Uint8Array;
if (kind === "ascii-at-end") {
array = new Uint8Array(512);
array.fill(97);
} else if (kind === "2-byte-sequence-at-end") {
array = new Uint8Array(512);
array.fill(97);
// £
array[254] = 0xc2;
array[255] = 0xa3;
} else if (kind === "3-byte-sequence-at-end") {
array = new Uint8Array(512);
array.fill(97);
// ⛄
array[253] = 0xe2;
array[254] = 0x9b;
array[255] = 0x84;
} else if (kind === "4-byte-sequence-at-end") {
array = new Uint8Array(512);
array.fill(97);
// 𒀖
array[252] = 0xf0;
array[253] = 0x92;
array[254] = 0x80;
array[255] = 0x96;
} else if (kind === "continuation-byte-at-end") {
array = new Uint8Array(512);
array.fill(97);
// 3 byte sequence, but only 1 continuation byte
array[254] = 0xe0;
array[255] = 0x80;
} else if (kind === "no-over-rollback-3byte") {
array = new Uint8Array(512);
array.fill(97);
// 3 byte sequence, but only 1 continuation byte
array[252] = 0xe0;
array[253] = 0x80;
array[254] = 0xe0;
array[255] = 0x80;
} else if (kind === "no-over-rollback-4byte") {
array = new Uint8Array(512);
array.fill(97);
array[252] = 0xf0;
array[253] = 0xf0;
array[254] = 0x80;
array[255] = 0x80;
} else if (kind === "random") {
array = new Uint8Array(512);
for (let i = 0; i < array.length; i++) {
array[i] = Math.floor(Math.random() * 256);
if (array[i] === 0) {
array[i] = 0x61;
}
}
} else if (kind === "trim-newlines") {
array = new Uint8Array(512);
array.fill(97);
array[252] = 10;
array[253] = 10;
array[254] = 10;
array[255] = 0xc0;
} else if (kind === "utf-8-in-the-middle") {
array = new Uint8Array(512);
for (let i = 0; i < array.length; i += 2) {
// £
array[i] = 0xc2;
array[i + 1] = 0xa3;
}
array[254] = 0xf0;
array[255] = 0x80;
} else {
throw new Error("Invalid kind");
}
process.stderr.write(array);
process.exit(1);

View File

@@ -0,0 +1,38 @@
import { $, type ShellError } from "bun";
import { afterAll, beforeAll, describe, expect, test } from "bun:test";
import { join } from "path";
import type { Kind } from "./fixtures/error";
describe("shell-error", () => {
const fixture = join(__dirname, "fixtures", "error");
const kinds: [kind: Kind, expected: string | ((s: string) => void)][] = [
["ascii-at-end", "<truncated from 512 bytes> " + "a".repeat(256)],
["2-byte-sequence-at-end", "<truncated from 512 bytes> " + "a".repeat(254) + "£"],
["3-byte-sequence-at-end", "<truncated from 512 bytes> " + "a".repeat(253) + "⛄"],
["4-byte-sequence-at-end", "<truncated from 512 bytes> " + "a".repeat(252) + "𒀖"],
["continuation-byte-at-end", "<truncated from 512 bytes> " + "a".repeat(254)],
["random", (s: string) => s.startsWith("<truncated from 512 bytes> ")],
["no-over-rollback-3byte", "<truncated from 512 bytes> " + "a".repeat(252)],
["no-over-rollback-4byte", "<truncated from 512 bytes> " + "a".repeat(252)],
["trim-newlines", "<truncated from 512 bytes> " + "a".repeat(252)],
["utf-8-in-the-middle", "<truncated from 512 bytes> " + "£".repeat(127)],
];
for (const [kind, expected] of kinds) {
test(kind, async () => {
try {
await $`bun ${fixture} ${kind}`.throws(true).quiet();
} catch (err_) {
let err = err_ as ShellError;
expect(err.exitCode).toBe(1);
if (typeof expected === "function") {
expected(err.message);
} else {
expect(err.message).toEqual(expected);
}
return;
}
expect.unreachable();
});
}
});