From 69f1331f7b591ea543aa8cdd05f894e5ef4fe34a Mon Sep 17 00:00:00 2001 From: Zack Radisic <56137411+zackradisic@users.noreply.github.com> Date: Wed, 2 Apr 2025 17:22:34 -0700 Subject: [PATCH] wip --- src/bun.js/bindings/BunObject.cpp | 31 +------ src/bun.js/bindings/JSBuffer.cpp | 23 +++++ src/bun.js/webcore/encoding.zig | 134 ++++++++++++++++++++++++++++ src/js/builtins/shell.ts | 26 ++++-- test/js/bun/shell/fixtures/error.ts | 72 +++++++++++++++ test/js/bun/shell/shellerr.test.ts | 36 ++++++++ 6 files changed, 288 insertions(+), 34 deletions(-) create mode 100644 test/js/bun/shell/fixtures/error.ts create mode 100644 test/js/bun/shell/shellerr.test.ts diff --git a/src/bun.js/bindings/BunObject.cpp b/src/bun.js/bindings/BunObject.cpp index 3536fa207d..8334b550ba 100644 --- a/src/bun.js/bindings/BunObject.cpp +++ b/src/bun.js/bindings/BunObject.cpp @@ -337,37 +337,10 @@ JSValue constructBunFetchObject(VM& vm, JSObject* bunObject) return fetchFn; } +/// WARNING: you must check that the JSObject* comes from an error instance extern "C" bool ShellError__isShellError(JSGlobalObject* globalObject, JSC::JSObject* jsObject) { - static JSC::JSObject* cachedShellErrorConstructor(nullptr); - - // Get the ShellError constructor from cache or look it up - JSC::JSObject* shellErrorConstructor = cachedShellErrorConstructor; - if (!shellErrorConstructor) { - // Get the Bun object from the global object - JSC::JSValue bunObject = globalObject->get(globalObject, JSC::Identifier::fromString(globalObject->vm(), "Bun"_s)); - if (!bunObject.isObject()) - return false; - - // Get the shell module from the Bun object - JSC::JSValue shellModule = bunObject.getObject()->get(globalObject, JSC::Identifier::fromString(globalObject->vm(), "$"_s)); - if (!shellModule.isObject()) - return false; - - // Get the ShellError constructor from the shell module - JSC::JSValue shellErrorValue = shellModule.getObject()->get(globalObject, JSC::Identifier::fromString(globalObject->vm(), "ShellError"_s)); - if (!shellErrorValue.isObject()) - return false; - - shellErrorConstructor = shellErrorValue.getObject(); - cachedShellErrorConstructor = shellErrorConstructor; - } - - // Get the constructor property from the object - JSC::JSValue constructor = jsObject->get(globalObject, globalObject->vm().propertyNames->constructor); - - // Compare with the ShellError constructor - return constructor.isObject() && constructor.getObject() == shellErrorConstructor; + return jsObject->hasProperty(globalObject, WebCore::builtinNames(globalObject->vm()).napiDlopenHandlePrivateName()); } static JSValue diff --git a/src/bun.js/bindings/JSBuffer.cpp b/src/bun.js/bindings/JSBuffer.cpp index 56d7cc035f..3f2fbc7618 100644 --- a/src/bun.js/bindings/JSBuffer.cpp +++ b/src/bun.js/bindings/JSBuffer.cpp @@ -1865,6 +1865,29 @@ bool inline parseArrayIndex(JSC::ThrowScope& scope, JSC::JSGlobalObject* globalO return true; } +extern "C" const unsigned char* jsBufferGetBytes(JSC::JSGlobalObject* globalObject, JSC::EncodedJSValue value, uint32_t* lenOut, bool* failed) +{ + + auto& vm = JSC::getVM(globalObject); + auto throwScope = DECLARE_THROW_SCOPE(vm); + auto jsvalue = JSValue::decode(value); + if (jsvalue.isUndefinedOrNull()) { + throwTypeError(globalObject, throwScope, "Cannot convert undefined or null to object"_s); + *failed = true; + return nullptr; + } + auto thisObject = JSC::jsDynamicCast(jsvalue); + if (UNLIKELY(!thisObject)) { + throwTypeError(globalObject, throwScope, "Not a JSBuffer"_s); + *failed = true; + return nullptr; + } + + *lenOut = thisObject->byteLength(); + + return thisObject->span().data(); +} + // https://github.com/nodejs/node/blob/v22.9.0/lib/buffer.js#L834 // using byteLength and byte offsets here is intentional static JSC::EncodedJSValue jsBufferPrototypeFunction_toStringBody(JSC::JSGlobalObject* lexicalGlobalObject, JSC::CallFrame* callFrame, typename IDLOperation::ClassParameter castedThis) diff --git a/src/bun.js/webcore/encoding.zig b/src/bun.js/webcore/encoding.zig index 600b704fc4..a864c5c95d 100644 --- a/src/bun.js/webcore/encoding.zig +++ b/src/bun.js/webcore/encoding.zig @@ -217,6 +217,103 @@ pub const Encoder = struct { return bun_string.transferToJS(global); } + /// Used internally for the shell, assumes utf-8 strings + /// + /// If the stdout/stderr Buffer is really long, we don't want to show the + /// entire string to the user so we'll display max 256 chars + /// + /// But we don't want to call `stderr.toString().substring(0, 256)` because + /// that will decode the entire buffer (and it could be huge) + /// + /// So this function takes as `input` a truncated byte array (e.g. bufferBytes[0 .. 256]) + /// + /// By truncating the byte array, we may accidentally cut off some bytes in a multi-byte char + /// encoding. So we'll skip those so we don't print out invalid UTF-8 + /// + /// In the case that it's not valid utf-8 we just fallback + pub fn toBunStringMaxCharsBestEffortForShell(input: []const u8) bun.String { + if (input.len == 0) return bun.String.empty; + bun.assert(input.len > 0); // don't call this function with an empty string + var last_index: i64 = @intCast(input.len - 1); + var allow_rollback = true; + outer_loop: while (last_index >= 0) { + switch (input[@bitCast(last_index)]) { + // ending is ascii and so it's good + 0x0...0x7F => return toBunStringMaxCharsBestEffortForShellImpl(input[0..@bitCast(last_index + 1)]), + // we hit a continuation byte, there are two cases: + // 1. Valid string -> we hit the last continuation byte in a multi-byte sequence + // 2. Invalid string -> we accidentally cut off some continuation bytes + 0x80...0xBF => { + var continuation_bytes_count: u32 = 1; + var j = last_index - 1; + while (j >= 0) { + if (continuation_bytes_count > 3) return toBunStringMaxCharsBestEffortForShellHandleInvalidUTF8(input); + switch (input[@bitCast(j)]) { + 0x80...0xBF => { + continuation_bytes_count += 1; + j -= 1; + }, + 0xC0...0xDF => { + if (continuation_bytes_count == 1) return toBunStringMaxCharsBestEffortForShellImpl(input[0..@bitCast(last_index + 1)]); + if (allow_rollback) last_index = j - 1 else return toBunStringMaxCharsBestEffortForShellHandleInvalidUTF8(input[0..@bitCast(j)]); + allow_rollback = false; + continue :outer_loop; + }, + 0xE0...0xEF => { + if (continuation_bytes_count == 2) return toBunStringMaxCharsBestEffortForShellImpl(input[0..@bitCast(last_index + 1)]); + if (allow_rollback) last_index = j - 1 else return toBunStringMaxCharsBestEffortForShellHandleInvalidUTF8(input[0..@bitCast(j)]); + allow_rollback = false; + continue :outer_loop; + }, + 0xF0...0xF7 => { + if (continuation_bytes_count == 3) return toBunStringMaxCharsBestEffortForShellImpl(input[0..@bitCast(last_index + 1)]); + if (allow_rollback) last_index = j - 1 else return toBunStringMaxCharsBestEffortForShellHandleInvalidUTF8(input[0..@bitCast(j)]); + allow_rollback = false; + continue :outer_loop; + }, + // invalid utf-8 + else => { + return toBunStringMaxCharsBestEffortForShellHandleInvalidUTF8(input); + }, + } + } + // didn't find the starting byte and looked through the whole string, + // means invalid + return toBunStringMaxCharsBestEffortForShellHandleInvalidUTF8(input); + }, + // first byte of a 2 byte encoding + 0xC0...0xDF => { + if (allow_rollback) last_index -= 1 else return toBunStringMaxCharsBestEffortForShellHandleInvalidUTF8(input[0..@bitCast(last_index)]); + allow_rollback = false; + }, + // first byte of a 3 byte encoding + 0xE0...0xEF => { + if (allow_rollback) last_index -= 1 else return toBunStringMaxCharsBestEffortForShellHandleInvalidUTF8(input[0..@bitCast(last_index)]); + allow_rollback = false; + }, + // first byte of a 4 byte encoding + 0xF0...0xF7 => { + if (allow_rollback) last_index -= 1 else return toBunStringMaxCharsBestEffortForShellHandleInvalidUTF8(input[0..@bitCast(last_index)]); + allow_rollback = false; + }, + // invalid utf-8 + else => { + return toBunStringMaxCharsBestEffortForShellHandleInvalidUTF8(input[0..@bitCast(last_index)]); + }, + } + } + + return toBunStringMaxCharsBestEffortForShellHandleInvalidUTF8(input); + } + + fn toBunStringMaxCharsBestEffortForShellImpl(input: []const u8) bun.String { + return toBunStringComptime(std.mem.trimRight(u8, input, "\n\r"), .utf8); + } + + fn toBunStringMaxCharsBestEffortForShellHandleInvalidUTF8(input: []const u8) bun.String { + return toBunStringComptime(input, .utf8); + } + pub fn toBunString(input: []const u8, encoding: JSC.Node.Encoding) bun.String { return switch (encoding) { inline else => |enc| toBunStringComptime(input, enc), @@ -531,6 +628,43 @@ pub const Encoder = struct { } }; +extern fn jsBufferGetBytes(*JSC.JSGlobalObject, JSValue, len_out: *u32, failed: *bool) [*]const u8; + +pub fn BufferToBunStringMaxCharsBestEffort(g: *JSC.JSGlobalObject) bun.JSError!JSC.JSValue { + return JSC.JSFunction.create( + g, + "BufferToBunStringMaxCharsBestEffort", + struct { + fn impl(global: *JSC.JSGlobalObject, callframe: *JSC.CallFrame) bun.JSError!JSC.JSValue { + const nargs = callframe.argumentsCount(); + if (nargs < 1) { + return global.throwNotEnoughArguments("BufferToBunStringMaxCharsBestEffort", 2, callframe.argumentsCount()); + } + + const buffer = callframe.argument(0); + const max_chars = 256; + + if (!buffer.isBuffer(global)) { + return global.throwTypeError("first argument must be a buffer", .{}); + } + + var len: u32 = 0; + var failed = false; + const maybe_ptr: ?[*]const u8 = jsBufferGetBytes(global, buffer, &len, &failed); + if (failed) { + return bun.JSError.JSError; + } + const ptr = maybe_ptr orelse + return bun.String.empty.toJS(global); + const input = ptr[0..@min(len, max_chars)]; + return Encoder.toBunStringMaxCharsBestEffortForShell(input).toJS(global); + } + }.impl, + 1, + .{}, + ); +} + comptime { _ = &TextEncoder.TextEncoder__encode8; _ = &TextEncoder.TextEncoder__encode16; diff --git a/src/js/builtins/shell.ts b/src/js/builtins/shell.ts index 087ad37e07..feae1bc088 100644 --- a/src/js/builtins/shell.ts +++ b/src/js/builtins/shell.ts @@ -5,11 +5,19 @@ type ParsedShellScript = any; type Resolve = (value: ShellOutput) => void; export function createBunShellTemplateFunction(createShellInterpreter, createParsedShellScript) { + // Small amount for the part that goes `ShellError: ...` + const MAX_MESSAGE_BYTES = 256; + const MAX_INFO_BYTES = 4096; + const BufferToBunStringMaxCharsBestEffort = $zig("encoding.zig", "BufferToBunStringMaxCharsBestEffort"); function lazyBufferToHumanReadableString(this: Buffer) { + if (this.byteLength > MAX_INFO_BYTES) + return ` ${BufferToBunStringMaxCharsBestEffort(this, MAX_INFO_BYTES)}`; return this.toString(); } function bufferInspect(this: Buffer) { - return require("node:util").inspect(this.toString()); + if (this.byteLength > MAX_INFO_BYTES) + return ` ${Bun.inspect(BufferToBunStringMaxCharsBestEffort(this, MAX_INFO_BYTES))}`; + return Bun.inspect(this.toString()); } class ShellError extends Error { @@ -24,8 +32,15 @@ export function createBunShellTemplateFunction(createShellInterpreter, createPar } initialize(output: ShellOutput, code: number) { - const stderrString = output.stderr.toString(); - this.message = stderrString; + // dummy private symbol so we can check if the error is a shell error + this.$napiDlopenHandle = true; + + let msg: string = + output.stderr.byteLength > MAX_MESSAGE_BYTES + ? ` ${BufferToBunStringMaxCharsBestEffort(output.stderr, MAX_MESSAGE_BYTES)}` + : output.stderr.toString().trimEnd(); + + this.message = msg; this.#output = output; this.name = "ShellError"; @@ -34,7 +49,7 @@ export function createBunShellTemplateFunction(createShellInterpreter, createPar Object.defineProperty(this, "info", { value: { exitCode: code, - stderr: stderrString, + stderr: output.stderr, stdout: output.stdout, }, writable: true, @@ -43,11 +58,12 @@ export function createBunShellTemplateFunction(createShellInterpreter, createPar }); this.info.stdout.toJSON = lazyBufferToHumanReadableString; - // this.info.stderr.toJSON = lazyBufferToHumanReadableString; + this.info.stderr.toJSON = lazyBufferToHumanReadableString; this.stdout = output.stdout; this.stderr = output.stderr; this.exitCode = code; + this.stdout[Bun.inspect.custom] = bufferInspect; this.stderr[Bun.inspect.custom] = bufferInspect; } diff --git a/test/js/bun/shell/fixtures/error.ts b/test/js/bun/shell/fixtures/error.ts new file mode 100644 index 0000000000..1f5a88abc0 --- /dev/null +++ b/test/js/bun/shell/fixtures/error.ts @@ -0,0 +1,72 @@ +export type Kind = + | "ascii-at-end" + | "2-byte-sequence-at-end" + | "3-byte-sequence-at-end" + | "4-byte-sequence-at-end" + | "continuation-byte-at-end" + | "no-over-rollback-3byte" + | "no-over-rollback-4byte" + | "random"; + +const kind: Kind = process.argv[2]; + +let array: Uint8Array; +if (kind === "ascii-at-end") { + array = new Uint8Array(512); + array.fill(97); +} else if (kind === "2-byte-sequence-at-end") { + array = new Uint8Array(512); + array.fill(97); + // £ + array[254] = 0xc2; + array[255] = 0xa3; +} else if (kind === "3-byte-sequence-at-end") { + array = new Uint8Array(512); + array.fill(97); + // ⛄ + array[253] = 0xe2; + array[254] = 0x9b; + array[255] = 0x84; +} else if (kind === "4-byte-sequence-at-end") { + array = new Uint8Array(512); + array.fill(97); + // 𒀖 + array[252] = 0xf0; + array[253] = 0x92; + array[254] = 0x80; + array[255] = 0x96; +} else if (kind === "continuation-byte-at-end") { + array = new Uint8Array(512); + array.fill(97); + // 3 byte sequence, but only 1 continuation byte + array[254] = 0xe0; + array[255] = 0x80; +} else if (kind === "no-over-rollback-3byte") { + array = new Uint8Array(512); + array.fill(97); + // 3 byte sequence, but only 1 continuation byte + array[252] = 0xe0; + array[253] = 0x80; + array[254] = 0xe0; + array[255] = 0x80; +} else if (kind === "no-over-rollback-4byte") { + array = new Uint8Array(512); + array.fill(97); + array[252] = 0xf0; + array[253] = 0xf0; + array[254] = 0x80; + array[255] = 0x80; +} else if (kind === "random") { + array = new Uint8Array(512); + for (let i = 0; i < array.length; i++) { + array[i] = Math.floor(Math.random() * 256); + if (array[i] === 0) { + array[i] = 0x61; + } + } +} else { + throw new Error("Invalid kind"); +} + +process.stderr.write(array); +process.exit(1); diff --git a/test/js/bun/shell/shellerr.test.ts b/test/js/bun/shell/shellerr.test.ts new file mode 100644 index 0000000000..446cde50e0 --- /dev/null +++ b/test/js/bun/shell/shellerr.test.ts @@ -0,0 +1,36 @@ +import { $, type ShellError } from "bun"; +import { afterAll, beforeAll, describe, expect, test } from "bun:test"; +import { join } from "path"; +import type { Kind } from "./fixtures/error"; + +describe("shell-error", () => { + const fixture = join(__dirname, "fixtures", "error"); + const kinds: [kind: Kind, expected: string | ((s: string) => void)][] = [ + ["ascii-at-end", " " + "a".repeat(256)], + ["2-byte-sequence-at-end", " " + "a".repeat(254) + "£"], + ["3-byte-sequence-at-end", " " + "a".repeat(253) + "⛄"], + ["4-byte-sequence-at-end", " " + "a".repeat(252) + "𒀖"], + ["continuation-byte-at-end", " " + "a".repeat(254)], + ["random", (s: string) => s.startsWith(" ")], + ["no-over-rollback-3byte", " " + "a".repeat(252)], + ["no-over-rollback-4byte", " " + "a".repeat(252)], + ]; + + for (const [kind, expected] of kinds) { + test(kind, async () => { + try { + await $`bun ${fixture} ${kind}`.throws(true).quiet(); + } catch (err_) { + let err = err_ as ShellError; + expect(err.exitCode).toBe(1); + if (typeof expected === "function") { + expected(err.message); + } else { + expect(err.message).toEqual(expected); + } + return; + } + expect.unreachable(); + }); + } +});