This commit is contained in:
Zack Radisic
2025-04-02 17:22:34 -07:00
parent d068b1c0b7
commit 69f1331f7b
6 changed files with 288 additions and 34 deletions

View File

@@ -337,37 +337,10 @@ JSValue constructBunFetchObject(VM& vm, JSObject* bunObject)
return fetchFn;
}
/// WARNING: you must check that the JSObject* comes from an error instance
extern "C" bool ShellError__isShellError(JSGlobalObject* globalObject, JSC::JSObject* jsObject)
{
static JSC::JSObject* cachedShellErrorConstructor(nullptr);
// Get the ShellError constructor from cache or look it up
JSC::JSObject* shellErrorConstructor = cachedShellErrorConstructor;
if (!shellErrorConstructor) {
// Get the Bun object from the global object
JSC::JSValue bunObject = globalObject->get(globalObject, JSC::Identifier::fromString(globalObject->vm(), "Bun"_s));
if (!bunObject.isObject())
return false;
// Get the shell module from the Bun object
JSC::JSValue shellModule = bunObject.getObject()->get(globalObject, JSC::Identifier::fromString(globalObject->vm(), "$"_s));
if (!shellModule.isObject())
return false;
// Get the ShellError constructor from the shell module
JSC::JSValue shellErrorValue = shellModule.getObject()->get(globalObject, JSC::Identifier::fromString(globalObject->vm(), "ShellError"_s));
if (!shellErrorValue.isObject())
return false;
shellErrorConstructor = shellErrorValue.getObject();
cachedShellErrorConstructor = shellErrorConstructor;
}
// Get the constructor property from the object
JSC::JSValue constructor = jsObject->get(globalObject, globalObject->vm().propertyNames->constructor);
// Compare with the ShellError constructor
return constructor.isObject() && constructor.getObject() == shellErrorConstructor;
return jsObject->hasProperty(globalObject, WebCore::builtinNames(globalObject->vm()).napiDlopenHandlePrivateName());
}
static JSValue

View File

@@ -1865,6 +1865,29 @@ bool inline parseArrayIndex(JSC::ThrowScope& scope, JSC::JSGlobalObject* globalO
return true;
}
extern "C" const unsigned char* jsBufferGetBytes(JSC::JSGlobalObject* globalObject, JSC::EncodedJSValue value, uint32_t* lenOut, bool* failed)
{
auto& vm = JSC::getVM(globalObject);
auto throwScope = DECLARE_THROW_SCOPE(vm);
auto jsvalue = JSValue::decode(value);
if (jsvalue.isUndefinedOrNull()) {
throwTypeError(globalObject, throwScope, "Cannot convert undefined or null to object"_s);
*failed = true;
return nullptr;
}
auto thisObject = JSC::jsDynamicCast<JSC::JSUint8Array*>(jsvalue);
if (UNLIKELY(!thisObject)) {
throwTypeError(globalObject, throwScope, "Not a JSBuffer"_s);
*failed = true;
return nullptr;
}
*lenOut = thisObject->byteLength();
return thisObject->span().data();
}
// https://github.com/nodejs/node/blob/v22.9.0/lib/buffer.js#L834
// using byteLength and byte offsets here is intentional
static JSC::EncodedJSValue jsBufferPrototypeFunction_toStringBody(JSC::JSGlobalObject* lexicalGlobalObject, JSC::CallFrame* callFrame, typename IDLOperation<JSArrayBufferView>::ClassParameter castedThis)

View File

@@ -217,6 +217,103 @@ pub const Encoder = struct {
return bun_string.transferToJS(global);
}
/// Used internally for the shell, assumes utf-8 strings
///
/// If the stdout/stderr Buffer is really long, we don't want to show the
/// entire string to the user so we'll display max 256 chars
///
/// But we don't want to call `stderr.toString().substring(0, 256)` because
/// that will decode the entire buffer (and it could be huge)
///
/// So this function takes as `input` a truncated byte array (e.g. bufferBytes[0 .. 256])
///
/// By truncating the byte array, we may accidentally cut off some bytes in a multi-byte char
/// encoding. So we'll skip those so we don't print out invalid UTF-8
///
/// In the case that it's not valid utf-8 we just fallback
pub fn toBunStringMaxCharsBestEffortForShell(input: []const u8) bun.String {
if (input.len == 0) return bun.String.empty;
bun.assert(input.len > 0); // don't call this function with an empty string
var last_index: i64 = @intCast(input.len - 1);
var allow_rollback = true;
outer_loop: while (last_index >= 0) {
switch (input[@bitCast(last_index)]) {
// ending is ascii and so it's good
0x0...0x7F => return toBunStringMaxCharsBestEffortForShellImpl(input[0..@bitCast(last_index + 1)]),
// we hit a continuation byte, there are two cases:
// 1. Valid string -> we hit the last continuation byte in a multi-byte sequence
// 2. Invalid string -> we accidentally cut off some continuation bytes
0x80...0xBF => {
var continuation_bytes_count: u32 = 1;
var j = last_index - 1;
while (j >= 0) {
if (continuation_bytes_count > 3) return toBunStringMaxCharsBestEffortForShellHandleInvalidUTF8(input);
switch (input[@bitCast(j)]) {
0x80...0xBF => {
continuation_bytes_count += 1;
j -= 1;
},
0xC0...0xDF => {
if (continuation_bytes_count == 1) return toBunStringMaxCharsBestEffortForShellImpl(input[0..@bitCast(last_index + 1)]);
if (allow_rollback) last_index = j - 1 else return toBunStringMaxCharsBestEffortForShellHandleInvalidUTF8(input[0..@bitCast(j)]);
allow_rollback = false;
continue :outer_loop;
},
0xE0...0xEF => {
if (continuation_bytes_count == 2) return toBunStringMaxCharsBestEffortForShellImpl(input[0..@bitCast(last_index + 1)]);
if (allow_rollback) last_index = j - 1 else return toBunStringMaxCharsBestEffortForShellHandleInvalidUTF8(input[0..@bitCast(j)]);
allow_rollback = false;
continue :outer_loop;
},
0xF0...0xF7 => {
if (continuation_bytes_count == 3) return toBunStringMaxCharsBestEffortForShellImpl(input[0..@bitCast(last_index + 1)]);
if (allow_rollback) last_index = j - 1 else return toBunStringMaxCharsBestEffortForShellHandleInvalidUTF8(input[0..@bitCast(j)]);
allow_rollback = false;
continue :outer_loop;
},
// invalid utf-8
else => {
return toBunStringMaxCharsBestEffortForShellHandleInvalidUTF8(input);
},
}
}
// didn't find the starting byte and looked through the whole string,
// means invalid
return toBunStringMaxCharsBestEffortForShellHandleInvalidUTF8(input);
},
// first byte of a 2 byte encoding
0xC0...0xDF => {
if (allow_rollback) last_index -= 1 else return toBunStringMaxCharsBestEffortForShellHandleInvalidUTF8(input[0..@bitCast(last_index)]);
allow_rollback = false;
},
// first byte of a 3 byte encoding
0xE0...0xEF => {
if (allow_rollback) last_index -= 1 else return toBunStringMaxCharsBestEffortForShellHandleInvalidUTF8(input[0..@bitCast(last_index)]);
allow_rollback = false;
},
// first byte of a 4 byte encoding
0xF0...0xF7 => {
if (allow_rollback) last_index -= 1 else return toBunStringMaxCharsBestEffortForShellHandleInvalidUTF8(input[0..@bitCast(last_index)]);
allow_rollback = false;
},
// invalid utf-8
else => {
return toBunStringMaxCharsBestEffortForShellHandleInvalidUTF8(input[0..@bitCast(last_index)]);
},
}
}
return toBunStringMaxCharsBestEffortForShellHandleInvalidUTF8(input);
}
fn toBunStringMaxCharsBestEffortForShellImpl(input: []const u8) bun.String {
return toBunStringComptime(std.mem.trimRight(u8, input, "\n\r"), .utf8);
}
fn toBunStringMaxCharsBestEffortForShellHandleInvalidUTF8(input: []const u8) bun.String {
return toBunStringComptime(input, .utf8);
}
pub fn toBunString(input: []const u8, encoding: JSC.Node.Encoding) bun.String {
return switch (encoding) {
inline else => |enc| toBunStringComptime(input, enc),
@@ -531,6 +628,43 @@ pub const Encoder = struct {
}
};
extern fn jsBufferGetBytes(*JSC.JSGlobalObject, JSValue, len_out: *u32, failed: *bool) [*]const u8;
pub fn BufferToBunStringMaxCharsBestEffort(g: *JSC.JSGlobalObject) bun.JSError!JSC.JSValue {
return JSC.JSFunction.create(
g,
"BufferToBunStringMaxCharsBestEffort",
struct {
fn impl(global: *JSC.JSGlobalObject, callframe: *JSC.CallFrame) bun.JSError!JSC.JSValue {
const nargs = callframe.argumentsCount();
if (nargs < 1) {
return global.throwNotEnoughArguments("BufferToBunStringMaxCharsBestEffort", 2, callframe.argumentsCount());
}
const buffer = callframe.argument(0);
const max_chars = 256;
if (!buffer.isBuffer(global)) {
return global.throwTypeError("first argument must be a buffer", .{});
}
var len: u32 = 0;
var failed = false;
const maybe_ptr: ?[*]const u8 = jsBufferGetBytes(global, buffer, &len, &failed);
if (failed) {
return bun.JSError.JSError;
}
const ptr = maybe_ptr orelse
return bun.String.empty.toJS(global);
const input = ptr[0..@min(len, max_chars)];
return Encoder.toBunStringMaxCharsBestEffortForShell(input).toJS(global);
}
}.impl,
1,
.{},
);
}
comptime {
_ = &TextEncoder.TextEncoder__encode8;
_ = &TextEncoder.TextEncoder__encode16;

View File

@@ -5,11 +5,19 @@ type ParsedShellScript = any;
type Resolve = (value: ShellOutput) => void;
export function createBunShellTemplateFunction(createShellInterpreter, createParsedShellScript) {
// Small amount for the part that goes `ShellError: ...`
const MAX_MESSAGE_BYTES = 256;
const MAX_INFO_BYTES = 4096;
const BufferToBunStringMaxCharsBestEffort = $zig("encoding.zig", "BufferToBunStringMaxCharsBestEffort");
function lazyBufferToHumanReadableString(this: Buffer) {
if (this.byteLength > MAX_INFO_BYTES)
return `<truncated ${this.byteLength} bytes> ${BufferToBunStringMaxCharsBestEffort(this, MAX_INFO_BYTES)}`;
return this.toString();
}
function bufferInspect(this: Buffer) {
return require("node:util").inspect(this.toString());
if (this.byteLength > MAX_INFO_BYTES)
return `<truncated ${this.byteLength} bytes> ${Bun.inspect(BufferToBunStringMaxCharsBestEffort(this, MAX_INFO_BYTES))}`;
return Bun.inspect(this.toString());
}
class ShellError extends Error {
@@ -24,8 +32,15 @@ export function createBunShellTemplateFunction(createShellInterpreter, createPar
}
initialize(output: ShellOutput, code: number) {
const stderrString = output.stderr.toString();
this.message = stderrString;
// dummy private symbol so we can check if the error is a shell error
this.$napiDlopenHandle = true;
let msg: string =
output.stderr.byteLength > MAX_MESSAGE_BYTES
? `<truncated ${output.stderr.byteLength} bytes> ${BufferToBunStringMaxCharsBestEffort(output.stderr, MAX_MESSAGE_BYTES)}`
: output.stderr.toString().trimEnd();
this.message = msg;
this.#output = output;
this.name = "ShellError";
@@ -34,7 +49,7 @@ export function createBunShellTemplateFunction(createShellInterpreter, createPar
Object.defineProperty(this, "info", {
value: {
exitCode: code,
stderr: stderrString,
stderr: output.stderr,
stdout: output.stdout,
},
writable: true,
@@ -43,11 +58,12 @@ export function createBunShellTemplateFunction(createShellInterpreter, createPar
});
this.info.stdout.toJSON = lazyBufferToHumanReadableString;
// this.info.stderr.toJSON = lazyBufferToHumanReadableString;
this.info.stderr.toJSON = lazyBufferToHumanReadableString;
this.stdout = output.stdout;
this.stderr = output.stderr;
this.exitCode = code;
this.stdout[Bun.inspect.custom] = bufferInspect;
this.stderr[Bun.inspect.custom] = bufferInspect;
}

View File

@@ -0,0 +1,72 @@
export type Kind =
| "ascii-at-end"
| "2-byte-sequence-at-end"
| "3-byte-sequence-at-end"
| "4-byte-sequence-at-end"
| "continuation-byte-at-end"
| "no-over-rollback-3byte"
| "no-over-rollback-4byte"
| "random";
const kind: Kind = process.argv[2];
let array: Uint8Array;
if (kind === "ascii-at-end") {
array = new Uint8Array(512);
array.fill(97);
} else if (kind === "2-byte-sequence-at-end") {
array = new Uint8Array(512);
array.fill(97);
// £
array[254] = 0xc2;
array[255] = 0xa3;
} else if (kind === "3-byte-sequence-at-end") {
array = new Uint8Array(512);
array.fill(97);
// ⛄
array[253] = 0xe2;
array[254] = 0x9b;
array[255] = 0x84;
} else if (kind === "4-byte-sequence-at-end") {
array = new Uint8Array(512);
array.fill(97);
// 𒀖
array[252] = 0xf0;
array[253] = 0x92;
array[254] = 0x80;
array[255] = 0x96;
} else if (kind === "continuation-byte-at-end") {
array = new Uint8Array(512);
array.fill(97);
// 3 byte sequence, but only 1 continuation byte
array[254] = 0xe0;
array[255] = 0x80;
} else if (kind === "no-over-rollback-3byte") {
array = new Uint8Array(512);
array.fill(97);
// 3 byte sequence, but only 1 continuation byte
array[252] = 0xe0;
array[253] = 0x80;
array[254] = 0xe0;
array[255] = 0x80;
} else if (kind === "no-over-rollback-4byte") {
array = new Uint8Array(512);
array.fill(97);
array[252] = 0xf0;
array[253] = 0xf0;
array[254] = 0x80;
array[255] = 0x80;
} else if (kind === "random") {
array = new Uint8Array(512);
for (let i = 0; i < array.length; i++) {
array[i] = Math.floor(Math.random() * 256);
if (array[i] === 0) {
array[i] = 0x61;
}
}
} else {
throw new Error("Invalid kind");
}
process.stderr.write(array);
process.exit(1);

View File

@@ -0,0 +1,36 @@
import { $, type ShellError } from "bun";
import { afterAll, beforeAll, describe, expect, test } from "bun:test";
import { join } from "path";
import type { Kind } from "./fixtures/error";
describe("shell-error", () => {
const fixture = join(__dirname, "fixtures", "error");
const kinds: [kind: Kind, expected: string | ((s: string) => void)][] = [
["ascii-at-end", "<truncated 512 bytes> " + "a".repeat(256)],
["2-byte-sequence-at-end", "<truncated 512 bytes> " + "a".repeat(254) + "£"],
["3-byte-sequence-at-end", "<truncated 512 bytes> " + "a".repeat(253) + "⛄"],
["4-byte-sequence-at-end", "<truncated 512 bytes> " + "a".repeat(252) + "𒀖"],
["continuation-byte-at-end", "<truncated 512 bytes> " + "a".repeat(254)],
["random", (s: string) => s.startsWith("<truncated 512 bytes> ")],
["no-over-rollback-3byte", "<truncated 512 bytes> " + "a".repeat(252)],
["no-over-rollback-4byte", "<truncated 512 bytes> " + "a".repeat(252)],
];
for (const [kind, expected] of kinds) {
test(kind, async () => {
try {
await $`bun ${fixture} ${kind}`.throws(true).quiet();
} catch (err_) {
let err = err_ as ShellError;
expect(err.exitCode).toBe(1);
if (typeof expected === "function") {
expected(err.message);
} else {
expect(err.message).toEqual(expected);
}
return;
}
expect.unreachable();
});
}
});