Compare commits

..

1 Commits

Author SHA1 Message Date
Claude Bot
f3f62a52b4 fix(fs): throw early for strings exceeding WebKit's 2GB limit
Fixes #2570

Previously, `readFileSync` with string encodings (utf8, ascii, etc.) only
checked against `synthetic_allocation_limit` (~4GB), but WebKit's
`WTF::String::MaxLength` is only ~2GB (`std::numeric_limits<int32_t>::max()`).

This mismatch caused large files (>2GB) to be silently truncated when
converted to strings, resulting in confusing "JSON Parse error: Unexpected EOF"
errors instead of a clear memory error.

The fix:
- Changes `string_allocation_limit` default from `maxInt(u32)` to `maxInt(i32)`
  to match WebKit's String::MaxLength
- Updates `shouldThrowOutOfMemoryEarlyForJavaScript` to use the appropriate
  limit based on encoding type (buffer encoding uses higher typed array limit,
  string encodings use the WebKit string limit)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-27 07:32:23 +00:00
6 changed files with 52 additions and 204 deletions

View File

@@ -10,7 +10,10 @@ pub export var isBunTest: bool = false;
// TODO: evaluate if this has any measurable performance impact.
pub var synthetic_allocation_limit: usize = std.math.maxInt(u32);
pub var string_allocation_limit: usize = std.math.maxInt(u32);
/// Max WTFStringImpl/JSString length, matches `WTF::String::MaxLength` / `JSString::MaxLength`.
/// This ensures readFileSync and other APIs that return strings will throw early
/// instead of allowing WebKit to silently truncate strings larger than ~2GB.
pub var string_allocation_limit: usize = std.math.maxInt(i32);
comptime {
_ = Bun__remapStackFramePositions;

View File

@@ -4870,9 +4870,10 @@ pub const NodeFS = struct {
}
fn shouldThrowOutOfMemoryEarlyForJavaScript(encoding: Encoding, size: usize, syscall: Syscall.Tag) ?Syscall.Error {
// Strings & typed arrays max out at 4.7 GB.
// But, it's **string length**
// So you can load an 8 GB hex string, for example, it should be fine.
// String length limits depend on the encoding:
// - For buffer encoding, we're limited by typed array size (~4.7 GB)
// - For string encodings, we're limited by WebKit's String::MaxLength (~2.15 GB)
// The adjusted_size accounts for encoding expansion/contraction.
const adjusted_size = switch (encoding) {
.utf16le, .ucs2, .utf8 => size / 4 -| 1,
.hex => size / 2 -| 1,
@@ -4880,9 +4881,15 @@ pub const NodeFS = struct {
.ascii, .latin1, .buffer => size,
};
if (
// Typed arrays in JavaScript are limited to 4.7 GB.
adjusted_size > jsc.VirtualMachine.synthetic_allocation_limit or
// Use the appropriate limit based on encoding type:
// - buffer returns a typed array (synthetic_allocation_limit ~4.7GB)
// - all other encodings return a string (string_allocation_limit ~2.15GB)
const allocation_limit = switch (encoding) {
.buffer => jsc.VirtualMachine.synthetic_allocation_limit,
else => jsc.VirtualMachine.string_allocation_limit,
};
if (adjusted_size > allocation_limit or
// If they do not have enough memory to open the file and they're on Linux, let's throw an error instead of dealing with the OOM killer.
(Environment.isLinux and size >= bun.getTotalMemorySize()))
{

View File

@@ -256,74 +256,4 @@ function unref(this: NativeReadable) {
}
}
// constructNativeSocket creates a net.Socket wrapping a native readable.
// This is used for child_process stdout/stderr to match Node.js behavior
// where these streams are Socket instances, not plain Readable streams.
// See: https://github.com/oven-sh/bun/issues/26505
function constructNativeSocket(readableStream: ReadableStream, options): NativeReadable {
$assert(typeof readableStream === "object" && readableStream instanceof ReadableStream, "Invalid readable stream");
const bunNativePtr = (readableStream as any).$bunNativePtr;
$assert(typeof bunNativePtr === "object", "Invalid native ptr");
// Create a Socket with readable=true, writable=false for stdout/stderr
// Spread options first, then enforce readable/writable to prevent overrides
const { Socket } = require("node:net");
const stream = new Socket({
...options,
readable: true,
writable: false,
});
// Override _read with our native implementation
stream._read = read;
// Create a custom _destroy that cleans up native resources and then emits close
const originalSocketDestroy = Socket.prototype._destroy;
stream._destroy = function socketDestroy(error: any, cb: () => void) {
const ptr = this.$bunNativePtr;
if (ptr) {
ptr.cancel(error);
}
// Call the original Socket._destroy which will emit "close"
// Since we don't have a _handle, it will call cb and emit close via emitCloseNT
return originalSocketDestroy.$call(this, error, cb);
};
// End the writable side immediately since this is a read-only socket
stream._writableState.ended = true;
stream._writableState.finished = true;
if (!!$debug) {
stream.debugId = ++debugId;
}
stream.$bunNativePtr = bunNativePtr;
stream[kRefCount] = 0;
stream[kConstructed] = false;
stream[kPendingRead] = false;
stream[kHasResized] = !dynamicallyAdjustChunkSize();
stream[kCloseState] = [false];
if (typeof options.highWaterMark === "number") {
stream[kHighWaterMark] = options.highWaterMark;
} else {
stream[kHighWaterMark] = 256 * 1024;
}
// Override ref/unref to use native implementation
stream.ref = ref;
stream.unref = unref;
// https://github.com/oven-sh/bun/pull/12801
// https://github.com/oven-sh/bun/issues/9555
// There may be a ReadableStream.Strong handle to the ReadableStream.
// We can't update those handles to point to the NativeReadable from JS
// So we instead mark it as no longer usable, and create a new NativeReadable
transferToNativeReadable(readableStream);
$debug(`[${stream.debugId}] constructed socket!`);
return stream;
}
export default { constructNativeReadable, constructNativeSocket };
export default { constructNativeReadable };

View File

@@ -1181,27 +1181,22 @@ class ChildProcess extends EventEmitter {
const value = handle?.[fdToStdioName(i as 1 | 2)!];
// This can happen if the process was already killed.
if (!value) {
// Return a destroyed Socket to match Node.js behavior
if (!NetModule) NetModule = require("node:net");
const stream = new NetModule.Socket({ readable: true, writable: false });
const Readable = require("internal/streams/readable");
const stream = new Readable({ read() {} });
// Mark as destroyed to indicate it's not usable
stream.destroy();
return stream;
}
// Use constructNativeSocket to return a Socket instance for stdout/stderr
// This matches Node.js behavior where child process stdio streams are Sockets
// See: https://github.com/oven-sh/bun/issues/26505
const pipe = require("internal/streams/native-readable").constructNativeSocket(value, { encoding });
const pipe = require("internal/streams/native-readable").constructNativeReadable(value, { encoding });
this.#closesNeeded++;
pipe.once("close", () => this.#maybeClose());
if (autoResume) pipe.resume();
return pipe;
}
case "destroyed": {
// Return a destroyed Socket to match Node.js behavior
if (!NetModule) NetModule = require("node:net");
const stream = new NetModule.Socket({ readable: true, writable: false });
const Readable = require("internal/streams/readable");
const stream = new Readable({ read() {} });
// Mark as destroyed to indicate it's not usable
stream.destroy();
return stream;

View File

@@ -0,0 +1,29 @@
import { constants, kMaxLength, kStringMaxLength } from "buffer";
import { expect, test } from "bun:test";
// Issue #2570: JSON.parse silently truncates strings larger than ~2GB
// Root cause: Mismatch between Bun's string allocation limit and WebKit's String::MaxLength
//
// The fix ensures that readFileSync with string encodings (utf8, ascii, etc.)
// checks against WebKit's String::MaxLength (~2GB) instead of the higher
// typed array limit (~4GB). This prevents silent string truncation that
// caused confusing "JSON Parse error: Unexpected EOF" errors.
test("kStringMaxLength matches WebKit's String::MaxLength (2^31 - 1)", () => {
// WebKit's String::MaxLength is std::numeric_limits<int32_t>::max()
const maxInt32 = Math.pow(2, 31) - 1;
expect(kStringMaxLength).toBe(maxInt32);
expect(constants.MAX_STRING_LENGTH).toBe(maxInt32);
});
test("buffer encoding has higher limit than string encoding", () => {
// Buffer encoding uses synthetic_allocation_limit (~4.7GB)
// String encoding uses string_allocation_limit (~2.15GB)
// kMaxLength is for buffers/typed arrays
// kStringMaxLength is for strings
expect(kMaxLength).toBeGreaterThan(kStringMaxLength);
// kStringMaxLength should be 2^31 - 1 (maxInt32)
expect(kStringMaxLength).toBe(2147483647);
});

View File

@@ -1,116 +0,0 @@
import { expect, test } from "bun:test";
import { bunEnv, bunExe } from "harness";
import { spawn } from "node:child_process";
import { Socket } from "node:net";
// https://github.com/oven-sh/bun/issues/26505
// Child process piped stdout/stderr should be Socket instances, not plain Readable streams
function collectStreamData(stream: NodeJS.ReadableStream): Promise<string> {
return new Promise((resolve, reject) => {
const chunks: Buffer[] = [];
stream.on("data", chunk => chunks.push(Buffer.from(chunk)));
stream.on("end", () => resolve(Buffer.concat(chunks).toString()));
stream.on("error", reject);
});
}
function waitForClose(cp: ReturnType<typeof spawn>): Promise<number | null> {
return new Promise(resolve => {
cp.on("close", code => resolve(code));
});
}
test("child process stdout is a Socket instance", async () => {
const cp = spawn(bunExe(), ["-e", "console.log('hello')"], {
stdio: "pipe",
env: bunEnv,
});
expect(cp.stdout).toBeInstanceOf(Socket);
expect(cp.stdout!.constructor.name).toBe("Socket");
expect(typeof cp.stdout!.ref).toBe("function");
expect(typeof cp.stdout!.unref).toBe("function");
const [stdout, exitCode] = await Promise.all([collectStreamData(cp.stdout!), waitForClose(cp)]);
expect(stdout.trim()).toBe("hello");
expect(exitCode).toBe(0);
});
test("child process stderr is a Socket instance", async () => {
const cp = spawn(bunExe(), ["-e", "console.error('error message')"], {
stdio: "pipe",
env: bunEnv,
});
expect(cp.stderr).toBeInstanceOf(Socket);
expect(cp.stderr!.constructor.name).toBe("Socket");
expect(typeof cp.stderr!.ref).toBe("function");
expect(typeof cp.stderr!.unref).toBe("function");
const [stderr, exitCode] = await Promise.all([collectStreamData(cp.stderr!), waitForClose(cp)]);
expect(stderr.trim()).toBe("error message");
expect(exitCode).toBe(0);
});
test("child process stdin is not a Socket (it's a Writable)", async () => {
const cp = spawn(bunExe(), ["-e", "process.stdin.pipe(process.stdout)"], {
stdio: "pipe",
env: bunEnv,
});
// stdin is a Writable, not a Socket
expect(cp.stdin).not.toBeInstanceOf(Socket);
expect(typeof cp.stdin!.write).toBe("function");
cp.stdin!.write("hello from stdin");
cp.stdin!.end();
const [stdout, exitCode] = await Promise.all([collectStreamData(cp.stdout!), waitForClose(cp)]);
expect(stdout).toBe("hello from stdin");
expect(exitCode).toBe(0);
});
test("socket ref/unref methods work correctly", async () => {
const cp = spawn(bunExe(), ["-e", "console.log('done')"], {
stdio: "pipe",
env: bunEnv,
});
// Should not throw when calling ref/unref
expect(() => cp.stdout!.ref()).not.toThrow();
expect(() => cp.stdout!.unref()).not.toThrow();
expect(() => cp.stderr!.ref()).not.toThrow();
expect(() => cp.stderr!.unref()).not.toThrow();
const [stdout, exitCode] = await Promise.all([collectStreamData(cp.stdout!), waitForClose(cp)]);
expect(stdout.trim()).toBe("done");
expect(exitCode).toBe(0);
});
test("socket streams work correctly when process exits with non-zero code", async () => {
const cp = spawn(bunExe(), ["-e", "console.error('error output'); process.exit(1)"], {
stdio: "pipe",
env: bunEnv,
});
// Verify stream types are correct even for failing processes
expect(cp.stdout).toBeInstanceOf(Socket);
expect(cp.stderr).toBeInstanceOf(Socket);
expect(cp.stdin).not.toBeInstanceOf(Socket);
// ref/unref should not throw on failing process streams
expect(() => cp.stdout!.ref()).not.toThrow();
expect(() => cp.stdout!.unref()).not.toThrow();
expect(() => cp.stderr!.ref()).not.toThrow();
expect(() => cp.stderr!.unref()).not.toThrow();
const [stderr, exitCode] = await Promise.all([collectStreamData(cp.stderr!), waitForClose(cp)]);
expect(stderr.trim()).toBe("error output");
expect(exitCode).toBe(1);
});