From 125a2cf920a0e660cf5c4368d06c19bdf1cf27bd Mon Sep 17 00:00:00 2001 From: Don Isaac Date: Wed, 18 Dec 2024 00:40:59 -0800 Subject: [PATCH] perf(node:util): fast path for `extractedSplitNewLines` --- src/bun.js/node/node_util_binding.zig | 45 +++++++++++++++++++++++++++ src/js/internal/util/inspect.js | 28 +++++++++++++++-- 2 files changed, 70 insertions(+), 3 deletions(-) diff --git a/src/bun.js/node/node_util_binding.zig b/src/bun.js/node/node_util_binding.zig index 2f886cb0fa..f2e4c17559 100644 --- a/src/bun.js/node/node_util_binding.zig +++ b/src/bun.js/node/node_util_binding.zig @@ -105,3 +105,48 @@ pub fn internalErrorName(globalThis: *JSC.JSGlobalObject, callframe: *JSC.CallFr var fmtstring = bun.String.createFormat("Unknown system error {d}", .{err_int}) catch bun.outOfMemory(); return fmtstring.transferToJS(globalThis); } + +/// `extractedSplitNewLines` for ASCII/Latin1 strings. Panics if passed a non-string. +//Returns `undefined` if param is utf8 or utf16 and not fully ascii. +/// +/// ```js +/// // util.js +/// const extractedNewLineRe = new RegExp("(?<=\\n)"); +/// extractedSplitNewLines = value => RegExpPrototypeSymbolSplit(extractedNewLineRe, value); +/// ``` +pub fn extractedSplitNewLinesFastPathStringsOnly(globalThis: *JSC.JSGlobalObject, callframe: *JSC.CallFrame) bun.JSError!JSC.JSValue { + var fallback = std.heap.stackFallback(1024, bun.default_allocator); + const allocator = fallback.get(); + bun.assert(callframe.argumentsCount() == 1); + const value = callframe.argument(0); + bun.assert(value.isString()); + + const str = try value.toBunString2(globalThis); + + if (str.is8Bit() or bun.strings.isAllASCII(str.byteSlice())) { + var lines: std.ArrayListUnmanaged(bun.String) = .{}; + defer { + for (lines.items) |out| { + out.deref(); + } + lines.deinit(allocator); + } + + var start: usize = 0; + const bytes = str.byteSlice(); + + while (std.mem.indexOfScalarPos(u8, bytes, start, '\n')) |delim_start| { + const end = delim_start + 1; + try lines.append(allocator, bun.String.fromBytes(bytes[start..end])); // include the newline + start = end; + } + + if (start < bytes.len) { + try lines.append(allocator, bun.String.fromBytes(bytes[start..])); + } + + return bun.String.toJSArray(globalThis, lines.items); + } + + return JSC.JSValue.jsUndefined(); +} diff --git a/src/js/internal/util/inspect.js b/src/js/internal/util/inspect.js index 5cdb40af5b..f98354aae0 100644 --- a/src/js/internal/util/inspect.js +++ b/src/js/internal/util/inspect.js @@ -141,6 +141,21 @@ const kRejected = Symbol("kRejected"); // state ID 2 const ALL_PROPERTIES = 0; const ONLY_ENUMERABLE = 2; +/** + * Fast path for {@link extractedSplitNewLines} for ASCII/Latin1 strings. + * @returns `value` split on newlines (newline included at end), or `undefined` + * if non-ascii UTF8/UTF16. + * + * Passing this a non-string will cause a panic. + * + * @type {(value: string) => string[] | undefined} + */ +const extractedSplitNewLinesFastPathStringsOnly = $newZigFunction( + "node_util_binding.zig", + "extractedSplitNewLinesFastPathStringsOnly", + 1, +); + const isAsyncFunction = v => typeof v === "function" && StringPrototypeStartsWith(FunctionPrototypeToString(v), "async"); const isGeneratorFunction = v => @@ -397,7 +412,7 @@ let strEscapeSequencesRegExp, strEscapeSequencesReplacer, strEscapeSequencesRegExpSingle, strEscapeSequencesReplacerSingle, - extractedSplitNewLines; + extractedSplitNewLinesSlow; try { // Change from regex literals to RegExp constructors to avoid unrecoverable // syntax error at load time. @@ -416,7 +431,7 @@ try { "g", ); const extractedNewLineRe = new RegExp("(?<=\\n)"); - extractedSplitNewLines = value => RegExpPrototypeSymbolSplit(extractedNewLineRe, value); + extractedSplitNewLinesSlow = value => RegExpPrototypeSymbolSplit(extractedNewLineRe, value); // CI doesn't run in an elderly runtime } catch { // These are from a previous version of node, @@ -426,7 +441,7 @@ try { strEscapeSequencesReplacer = /[\x00-\x1f\x27\x5c\x7f-\x9f]/g; strEscapeSequencesRegExpSingle = /[\x00-\x1f\x5c\x7f-\x9f]/; strEscapeSequencesReplacerSingle = /[\x00-\x1f\x5c\x7f-\x9f]/g; - extractedSplitNewLines = value => { + extractedSplitNewLinesSlow = value => { const lines = RegExpPrototypeSymbolSplit(/\n/, value); const last = ArrayPrototypePop(lines); const nlLines = ArrayPrototypeMap(lines, line => line + "\n"); @@ -437,6 +452,13 @@ try { }; } +const extractedSplitNewLines = value => { + if (typeof value === "string") { + return extractedSplitNewLinesFastPathStringsOnly(value) || extractedSplitNewLinesSlow(value); + } + return extractedSplitNewLinesSlow(value); +} + const keyStrRegExp = /^[a-zA-Z_][a-zA-Z_0-9]*$/; const numberRegExp = /^(0|[1-9][0-9]*)$/;