Compare commits

...

11 Commits

Author SHA1 Message Date
Don Isaac
8d55733e8b fix filename 2024-12-19 14:25:20 -08:00
Don Isaac
d9142f903a test: repro for writeFileSync in child process bug 2024-12-19 13:54:10 -08:00
Don Isaac
0ed0ce9269 Merge branch 'main' into don/util/perf/extracted-split-fast-path 2024-12-19 13:31:10 -08:00
Don Isaac
1a063df8ee Merge branch 'main' into don/util/perf/extracted-split-fast-path 2024-12-19 10:39:24 -08:00
Don Isaac
ea125903e4 free line list with correct allocator 2024-12-18 19:33:15 -08:00
Don Isaac
19a68ab362 wip 2024-12-18 19:05:35 -08:00
DonIsaac
c307c7f8ae bun run zig-format 2024-12-19 00:57:37 +00:00
Don Isaac
45eaa5c81a fix: handle utf16-encoded strings 2024-12-18 16:55:28 -08:00
Don Isaac
6d1d3d689b Merge branch 'main' of github.com:oven-sh/bun into don/util/perf/extracted-split-fast-path 2024-12-18 16:29:54 -08:00
DonIsaac
e9e3c0db56 bun run zig-format 2024-12-18 08:43:00 +00:00
Don Isaac
125a2cf920 perf(node:util): fast path for extractedSplitNewLines 2024-12-18 00:40:59 -08:00
4 changed files with 162 additions and 4 deletions

View File

@@ -1,5 +1,6 @@
const std = @import("std");
const bun = @import("root").bun;
const Allocator = std.mem.Allocator;
const Environment = bun.Environment;
const JSC = bun.JSC;
const string = bun.string;
@@ -105,3 +106,92 @@ pub fn internalErrorName(globalThis: *JSC.JSGlobalObject, callframe: *JSC.CallFr
var fmtstring = bun.String.createFormat("Unknown system error {d}", .{err_int}) catch bun.outOfMemory();
return fmtstring.transferToJS(globalThis);
}
/// `extractedSplitNewLines` for ASCII/Latin1 strings. Panics if passed a non-string.
/// Returns `undefined` if param is utf8 or utf16 and not fully ascii.
///
/// ```js
/// // util.js
/// const extractedNewLineRe = new RegExp("(?<=\\n)");
/// extractedSplitNewLines = value => RegExpPrototypeSymbolSplit(extractedNewLineRe, value);
/// ```
pub fn extractedSplitNewLinesFastPathStringsOnly(globalThis: *JSC.JSGlobalObject, callframe: *JSC.CallFrame) bun.JSError!JSC.JSValue {
bun.assert(callframe.argumentsCount() == 1);
const value = callframe.argument(0);
bun.assert(value.isString());
const str = try value.toBunString2(globalThis);
defer str.deref();
return switch (str.encoding()) {
inline .utf16, .latin1 => |encoding| split(encoding, globalThis, bun.default_allocator, &str),
.utf8 => if (bun.strings.isAllASCII(str.byteSlice()))
return split(.utf8, globalThis, bun.default_allocator, &str)
else
return JSC.JSValue.jsUndefined(),
};
}
fn split(
comptime encoding: bun.strings.EncodingNonAscii,
globalThis: *JSC.JSGlobalObject,
allocator: Allocator,
str: *const bun.String,
) bun.JSError!JSC.JSValue {
std.debug.print("{any}\n", .{encoding});
var fallback = std.heap.stackFallback(1024, allocator);
const alloc = fallback.get();
const Char = switch (encoding) {
.utf8, .latin1 => u8,
.utf16 => u16,
};
var lines: std.ArrayListUnmanaged(bun.String) = .{};
defer {
for (lines.items) |out| {
out.deref();
}
lines.deinit(alloc);
}
const buffer: []const Char = if (encoding == .utf16)
str.utf16()
else
str.byteSlice();
var it: SplitNewlineIterator(Char) = .{ .buffer = buffer, .index = 0 };
while (it.next()) |line| {
const encoded_line = switch (encoding) {
inline .utf8 => bun.String.fromUTF8(line),
inline .latin1 => bun.String.createLatin1(line),
inline .utf16 => bun.String.fromUTF16(line),
};
errdefer encoded_line.deref();
try lines.append(alloc, encoded_line);
}
return bun.String.toJSArray(globalThis, lines.items);
}
pub fn SplitNewlineIterator(comptime T: type) type {
return struct {
buffer: []const T,
index: ?usize,
const Self = @This();
/// Returns a slice of the next field, or null if splitting is complete.
pub fn next(self: *Self) ?[]const T {
const start = self.index orelse return null;
if (std.mem.indexOfScalarPos(T, self.buffer, start, '\n')) |delim_start| {
const end = delim_start + 1;
const slice = self.buffer[start..end];
self.index = end;
return slice;
} else {
self.index = null;
return self.buffer[start..];
}
}
};
}

View File

@@ -141,6 +141,21 @@ const kRejected = Symbol("kRejected"); // state ID 2
const ALL_PROPERTIES = 0;
const ONLY_ENUMERABLE = 2;
/**
* Fast path for {@link extractedSplitNewLines} for ASCII/Latin1 strings.
* @returns `value` split on newlines (newline included at end), or `undefined`
* if non-ascii UTF8/UTF16.
*
* Passing this a non-string will cause a panic.
*
* @type {(value: string) => string[] | undefined}
*/
const extractedSplitNewLinesFastPathStringsOnly = $newZigFunction(
"node_util_binding.zig",
"extractedSplitNewLinesFastPathStringsOnly",
1,
);
const isAsyncFunction = v =>
typeof v === "function" && StringPrototypeStartsWith(FunctionPrototypeToString(v), "async");
const isGeneratorFunction = v =>
@@ -397,7 +412,7 @@ let strEscapeSequencesRegExp,
strEscapeSequencesReplacer,
strEscapeSequencesRegExpSingle,
strEscapeSequencesReplacerSingle,
extractedSplitNewLines;
extractedSplitNewLinesSlow;
try {
// Change from regex literals to RegExp constructors to avoid unrecoverable
// syntax error at load time.
@@ -416,7 +431,7 @@ try {
"g",
);
const extractedNewLineRe = new RegExp("(?<=\\n)");
extractedSplitNewLines = value => RegExpPrototypeSymbolSplit(extractedNewLineRe, value);
extractedSplitNewLinesSlow = value => RegExpPrototypeSymbolSplit(extractedNewLineRe, value);
// CI doesn't run in an elderly runtime
} catch {
// These are from a previous version of node,
@@ -426,7 +441,7 @@ try {
strEscapeSequencesReplacer = /[\x00-\x1f\x27\x5c\x7f-\x9f]/g;
strEscapeSequencesRegExpSingle = /[\x00-\x1f\x5c\x7f-\x9f]/;
strEscapeSequencesReplacerSingle = /[\x00-\x1f\x5c\x7f-\x9f]/g;
extractedSplitNewLines = value => {
extractedSplitNewLinesSlow = value => {
const lines = RegExpPrototypeSymbolSplit(/\n/, value);
const last = ArrayPrototypePop(lines);
const nlLines = ArrayPrototypeMap(lines, line => line + "\n");
@@ -437,6 +452,13 @@ try {
};
}
const extractedSplitNewLines = value => {
if (typeof value === "string") {
return extractedSplitNewLinesFastPathStringsOnly(value) || extractedSplitNewLinesSlow(value);
}
return extractedSplitNewLinesSlow(value);
}
const keyStrRegExp = /^[a-zA-Z_][a-zA-Z_0-9]*$/;
const numberRegExp = /^(0|[1-9][0-9]*)$/;

View File

@@ -1,11 +1,13 @@
import { semver, write } from "bun";
import { afterAll, beforeEach, describe, expect, it } from "bun:test";
import { afterAll, beforeAll, beforeEach, describe, expect, it } from "bun:test";
import fs from "fs";
import { bunEnv, bunExe, isWindows, nodeExe, runBunInstall, shellExe, tmpdirSync } from "harness";
import { ChildProcess, exec, execFile, execFileSync, execSync, spawn, spawnSync } from "node:child_process";
import { promisify } from "node:util";
import path from "path";
const debug = process.env.DEBUG ? console.log : () => {};
const fixturePath = (filename: string): string => path.join(import.meta.dir, "fixtures", filename);
const originalProcessEnv = process.env;
beforeEach(() => {
@@ -464,3 +466,36 @@ it("spawnSync(does-not-exist)", () => {
expect(x.stdout).toEqual(null);
expect(x.stderr).toEqual(null);
});
describe("spawnSync()", () => {
let tmpdir: string;
beforeAll(() => {
tmpdir = tmpdirSync("bun.test.spawnSync-cwd-");
});
afterAll(() => {
try {
fs.rmdirSync(tmpdir, { recursive: true });
} catch {
/* ignore */
}
});
it.only("can set the cwd", () => {
const fixture = fixturePath("child-process-fs-writeFileSync.js");
const before = fs.readdirSync(tmpdir, "utf8");
const { stdout: stdoutBuf } = spawnSync(bunExe(), [fixture], {
cwd: tmpdir,
env: bunEnv,
});
// this fixture echos its `process.cwd()`. it should be in the temp dir,
// not whatever is in process.env.PWD
const stdout = stdoutBuf.toString("utf8");
expect(stdout).toMatch(tmpdir);
// fs.writeFileSync should respect the cwd. if it doesn't, the fixture file
// is overwritten.
const after = fs.readdirSync(tmpdir, "utf8");
expect(after).toBe(before);
});
});

View File

@@ -0,0 +1,11 @@
/**
* @note cwd set to temp dir by spawning test
* @see fs-writeFile-child-process.test.ts
*/
const { writeFileSync } = require("node:fs");
const assert = require("assert");
const filename = __filename.split("/").pop();
assert(filename && filename.endsWith("writeFileSync.js"));
writeFileSync(__filename, "please don't override this source file");
process.stdout.write(process.cwd());