perf(node:util): fast path for extractedSplitNewLines

This commit is contained in:
Don Isaac
2024-12-18 00:40:59 -08:00
parent b5b51004e8
commit 125a2cf920
2 changed files with 70 additions and 3 deletions

View File

@@ -105,3 +105,48 @@ pub fn internalErrorName(globalThis: *JSC.JSGlobalObject, callframe: *JSC.CallFr
var fmtstring = bun.String.createFormat("Unknown system error {d}", .{err_int}) catch bun.outOfMemory();
return fmtstring.transferToJS(globalThis);
}
/// `extractedSplitNewLines` for ASCII/Latin1 strings. Panics if passed a non-string.
//Returns `undefined` if param is utf8 or utf16 and not fully ascii.
///
/// ```js
/// // util.js
/// const extractedNewLineRe = new RegExp("(?<=\\n)");
/// extractedSplitNewLines = value => RegExpPrototypeSymbolSplit(extractedNewLineRe, value);
/// ```
pub fn extractedSplitNewLinesFastPathStringsOnly(globalThis: *JSC.JSGlobalObject, callframe: *JSC.CallFrame) bun.JSError!JSC.JSValue {
var fallback = std.heap.stackFallback(1024, bun.default_allocator);
const allocator = fallback.get();
bun.assert(callframe.argumentsCount() == 1);
const value = callframe.argument(0);
bun.assert(value.isString());
const str = try value.toBunString2(globalThis);
if (str.is8Bit() or bun.strings.isAllASCII(str.byteSlice())) {
var lines: std.ArrayListUnmanaged(bun.String) = .{};
defer {
for (lines.items) |out| {
out.deref();
}
lines.deinit(allocator);
}
var start: usize = 0;
const bytes = str.byteSlice();
while (std.mem.indexOfScalarPos(u8, bytes, start, '\n')) |delim_start| {
const end = delim_start + 1;
try lines.append(allocator, bun.String.fromBytes(bytes[start..end])); // include the newline
start = end;
}
if (start < bytes.len) {
try lines.append(allocator, bun.String.fromBytes(bytes[start..]));
}
return bun.String.toJSArray(globalThis, lines.items);
}
return JSC.JSValue.jsUndefined();
}

View File

@@ -141,6 +141,21 @@ const kRejected = Symbol("kRejected"); // state ID 2
const ALL_PROPERTIES = 0;
const ONLY_ENUMERABLE = 2;
/**
* Fast path for {@link extractedSplitNewLines} for ASCII/Latin1 strings.
* @returns `value` split on newlines (newline included at end), or `undefined`
* if non-ascii UTF8/UTF16.
*
* Passing this a non-string will cause a panic.
*
* @type {(value: string) => string[] | undefined}
*/
const extractedSplitNewLinesFastPathStringsOnly = $newZigFunction(
"node_util_binding.zig",
"extractedSplitNewLinesFastPathStringsOnly",
1,
);
const isAsyncFunction = v =>
typeof v === "function" && StringPrototypeStartsWith(FunctionPrototypeToString(v), "async");
const isGeneratorFunction = v =>
@@ -397,7 +412,7 @@ let strEscapeSequencesRegExp,
strEscapeSequencesReplacer,
strEscapeSequencesRegExpSingle,
strEscapeSequencesReplacerSingle,
extractedSplitNewLines;
extractedSplitNewLinesSlow;
try {
// Change from regex literals to RegExp constructors to avoid unrecoverable
// syntax error at load time.
@@ -416,7 +431,7 @@ try {
"g",
);
const extractedNewLineRe = new RegExp("(?<=\\n)");
extractedSplitNewLines = value => RegExpPrototypeSymbolSplit(extractedNewLineRe, value);
extractedSplitNewLinesSlow = value => RegExpPrototypeSymbolSplit(extractedNewLineRe, value);
// CI doesn't run in an elderly runtime
} catch {
// These are from a previous version of node,
@@ -426,7 +441,7 @@ try {
strEscapeSequencesReplacer = /[\x00-\x1f\x27\x5c\x7f-\x9f]/g;
strEscapeSequencesRegExpSingle = /[\x00-\x1f\x5c\x7f-\x9f]/;
strEscapeSequencesReplacerSingle = /[\x00-\x1f\x5c\x7f-\x9f]/g;
extractedSplitNewLines = value => {
extractedSplitNewLinesSlow = value => {
const lines = RegExpPrototypeSymbolSplit(/\n/, value);
const last = ArrayPrototypePop(lines);
const nlLines = ArrayPrototypeMap(lines, line => line + "\n");
@@ -437,6 +452,13 @@ try {
};
}
const extractedSplitNewLines = value => {
if (typeof value === "string") {
return extractedSplitNewLinesFastPathStringsOnly(value) || extractedSplitNewLinesSlow(value);
}
return extractedSplitNewLinesSlow(value);
}
const keyStrRegExp = /^[a-zA-Z_][a-zA-Z_0-9]*$/;
const numberRegExp = /^(0|[1-9][0-9]*)$/;