Compare commits

...

3 Commits

Author SHA1 Message Date
Claude Bot
b79277e13b perf(test): use Buffer.alloc instead of String.repeat in tests
Replace "x".repeat(...) with Buffer.alloc(count, "x").toString()
for better performance in debug JSC builds.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-21 08:17:16 +00:00
Claude Bot
29923ad770 fix: address review feedback
- Fix base64 output size calculation to use proper ceiling formula:
  ((size + 2) / 3) * 4 instead of (size * 4) / 3 + 1
- Clamp string_allocation_limit to WebKit's max (2^31-1) when setting
  from BUN_FEATURE_FLAG_SYNTHETIC_MEMORY_LIMIT env var to prevent
  reintroducing the crash
- Fix test description: "should allow reading files as buffer within
  the limit" (was incorrectly saying "larger than string limit")

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-21 08:03:47 +00:00
Claude Bot
d5558b6283 fix(fs): throw catchable error for large files in readFileSync instead of crashing
When reading files larger than ~2GB with `fs.readFileSync` using
string-producing encodings like "utf-8", Bun would crash with SIGTRAP
instead of throwing a catchable error like Node.js does.

The root cause was two-fold:

1. The size estimation in `shouldThrowOutOfMemoryEarlyForJavaScript` was
   backwards - it was dividing sizes instead of multiplying when
   computing output string length:
   - UTF-8: divided by 4 (wrong - ASCII UTF-8 is 1:1, so max output = input)
   - hex: divided by 2 (wrong - each byte produces 2 hex chars)
   - base64: divided by 3 (wrong - 3 bytes produce 4 chars)

2. The default `string_allocation_limit` was set to `maxInt(u32)` (~4GB)
   but WebKit's `String::MaxLength` is `maxInt(i32)` (~2GB), causing
   strings that exceeded WebKit's limit to be attempted but fail at
   runtime with SIGTRAP.

3. The check wasn't being performed for small files that fit in the
   optimization buffer (< 256KB), so they also bypassed the validation.

This fix:
- Corrects the output size calculations in the switch statement
- Sets `string_allocation_limit` default to `maxInt(i32)` to match WebKit
- Uses `string_allocation_limit` for string encodings and
  `synthetic_allocation_limit` for buffer encoding
- Adds the size check to the early return path for small files

Fixes #26323

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-21 07:45:59 +00:00
3 changed files with 234 additions and 13 deletions

View File

@@ -10,7 +10,8 @@ pub export var isBunTest: bool = false;
// TODO: evaluate if this has any measurable performance impact.
pub var synthetic_allocation_limit: usize = std.math.maxInt(u32);
pub var string_allocation_limit: usize = std.math.maxInt(u32);
/// WebKit's String::MaxLength is std::numeric_limits<int32_t>::max() = 2^31-1
pub var string_allocation_limit: usize = std.math.maxInt(i32);
comptime {
_ = Bun__remapStackFramePositions;
@@ -516,7 +517,10 @@ pub fn loadExtraEnvAndSourceCodePrinter(this: *VirtualMachine) void {
if (map.get("BUN_FEATURE_FLAG_SYNTHETIC_MEMORY_LIMIT")) |value| {
if (std.fmt.parseInt(usize, value, 10)) |limit| {
synthetic_allocation_limit = limit;
string_allocation_limit = limit;
// Clamp string_allocation_limit to WebKit's String::MaxLength (2^31-1)
// to prevent reintroducing the crash with large string allocations
const webkit_max: usize = std.math.maxInt(i32);
string_allocation_limit = @min(limit, webkit_max);
} else |_| {
Output.panic("BUN_FEATURE_FLAG_SYNTHETIC_MEMORY_LIMIT must be a positive integer", .{});
}

View File

@@ -4819,20 +4819,30 @@ pub const NodeFS = struct {
}
fn shouldThrowOutOfMemoryEarlyForJavaScript(encoding: Encoding, size: usize, syscall: Syscall.Tag) ?Syscall.Error {
// Strings & typed arrays max out at 4.7 GB.
// But, it's **string length**
// So you can load an 8 GB hex string, for example, it should be fine.
const adjusted_size = switch (encoding) {
.utf16le, .ucs2, .utf8 => size / 4 -| 1,
.hex => size / 2 -| 1,
.base64, .base64url => size / 3 -| 1,
.ascii, .latin1, .buffer => size,
// The encoding specifies how to ENCODE the raw bytes into a JavaScript string.
// We need to estimate the output string length in characters.
// WebKit's String::MaxLength is 2^31-1 characters (~2 billion).
const output_size, const limit = switch (encoding) {
// UTF-16le/UCS-2: 2 bytes per character
.utf16le, .ucs2 => .{ size / 2, jsc.VirtualMachine.string_allocation_limit },
// UTF-8: For valid UTF-8, output length <= input bytes (ASCII is 1:1,
// multi-byte sequences produce fewer UTF-16 code units than bytes)
.utf8 => .{ size, jsc.VirtualMachine.string_allocation_limit },
// Hex: Each byte produces 2 hex characters
.hex => .{ size *| 2, jsc.VirtualMachine.string_allocation_limit },
// Base64: ceil(size / 3) * 4 characters (every 3 bytes produce 4 chars, with padding)
.base64, .base64url => .{ ((size +| 2) / 3) *| 4, jsc.VirtualMachine.string_allocation_limit },
// ASCII/Latin1: 1 byte = 1 character
.ascii, .latin1 => .{ size, jsc.VirtualMachine.string_allocation_limit },
// Buffer: Returns a typed array, not a string - check against typed array limit
.buffer => .{ size, jsc.VirtualMachine.synthetic_allocation_limit },
};
if (
// Typed arrays in JavaScript are limited to 4.7 GB.
adjusted_size > jsc.VirtualMachine.synthetic_allocation_limit or
// If they do not have enough memory to open the file and they're on Linux, let's throw an error instead of dealing with the OOM killer.
// Check against the appropriate JavaScript limit (string or typed array)
output_size > limit or
// If they do not have enough memory to open the file and they're on Linux,
// let's throw an error instead of dealing with the OOM killer.
(Environment.isLinux and size >= bun.getTotalMemorySize()))
{
return Syscall.Error.fromCode(.NOMEM, syscall);
@@ -5054,6 +5064,12 @@ pub const NodeFS = struct {
};
if (did_succeed) {
// Check size limit even for small files read in one go
if (args.limit_size_for_javascript) {
if (shouldThrowOutOfMemoryEarlyForJavaScript(args.encoding, total, .read)) |err| {
return .{ .err = err.withPathLike(args.path) };
}
}
switch (args.encoding) {
.buffer => {
if (comptime flavor == .sync and string_type == .default) {

View File

@@ -0,0 +1,201 @@
import { describe, expect, test } from "bun:test";
import { bunEnv, bunExe, tempDir } from "harness";
describe("fs.readFileSync", () => {
test("should throw catchable error for files exceeding string length limit with utf-8 encoding", async () => {
// Create a file that exceeds the lowered string limit
// We set BUN_FEATURE_FLAG_SYNTHETIC_MEMORY_LIMIT to 1000 bytes
// and create a 2000 byte file, which should exceed the limit
using dir = tempDir("readfile-string-limit", {
"large.txt": Buffer.alloc(2000, "x").toString(),
});
await using proc = Bun.spawn({
cmd: [
bunExe(),
"-e",
`
const fs = require("fs");
try {
const content = fs.readFileSync("large.txt", "utf-8");
console.log("ERROR: Should have thrown but got content length:", content.length);
process.exit(1);
} catch (error) {
console.log("Caught error:", error.code);
process.exit(0);
}
`,
],
env: {
...bunEnv,
// BUN_GARBAGE_COLLECTOR_LEVEL must be set for BUN_FEATURE_FLAG_SYNTHETIC_MEMORY_LIMIT to be processed
BUN_GARBAGE_COLLECTOR_LEVEL: "0",
BUN_FEATURE_FLAG_SYNTHETIC_MEMORY_LIMIT: "1000",
},
cwd: String(dir),
stderr: "pipe",
stdout: "pipe",
});
const [stdout, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]);
// Should exit cleanly with code 0 (caught the error)
expect(stdout).toContain("Caught error: ENOMEM");
expect(exitCode).toBe(0);
});
test("should throw catchable error for files exceeding string length limit with ascii encoding", async () => {
using dir = tempDir("readfile-string-limit-ascii", {
"large.txt": Buffer.alloc(2000, "x").toString(),
});
await using proc = Bun.spawn({
cmd: [
bunExe(),
"-e",
`
const fs = require("fs");
try {
const content = fs.readFileSync("large.txt", "ascii");
console.log("ERROR: Should have thrown but got content length:", content.length);
process.exit(1);
} catch (error) {
console.log("Caught error:", error.code);
process.exit(0);
}
`,
],
env: {
...bunEnv,
// BUN_GARBAGE_COLLECTOR_LEVEL must be set for BUN_FEATURE_FLAG_SYNTHETIC_MEMORY_LIMIT to be processed
BUN_GARBAGE_COLLECTOR_LEVEL: "0",
BUN_FEATURE_FLAG_SYNTHETIC_MEMORY_LIMIT: "1000",
},
cwd: String(dir),
stderr: "pipe",
stdout: "pipe",
});
const [stdout, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]);
expect(stdout).toContain("Caught error: ENOMEM");
expect(exitCode).toBe(0);
});
test("should throw catchable error for hex encoding when output exceeds limit", async () => {
// Hex encoding doubles the size: 600 bytes -> 1200 chars > 1000 limit
using dir = tempDir("readfile-string-limit-hex", {
"data.bin": Buffer.alloc(600, "x").toString(),
});
await using proc = Bun.spawn({
cmd: [
bunExe(),
"-e",
`
const fs = require("fs");
try {
const content = fs.readFileSync("data.bin", "hex");
console.log("ERROR: Should have thrown but got content length:", content.length);
process.exit(1);
} catch (error) {
console.log("Caught error:", error.code);
process.exit(0);
}
`,
],
env: {
...bunEnv,
// BUN_GARBAGE_COLLECTOR_LEVEL must be set for BUN_FEATURE_FLAG_SYNTHETIC_MEMORY_LIMIT to be processed
BUN_GARBAGE_COLLECTOR_LEVEL: "0",
BUN_FEATURE_FLAG_SYNTHETIC_MEMORY_LIMIT: "1000",
},
cwd: String(dir),
stderr: "pipe",
stdout: "pipe",
});
const [stdout, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]);
expect(stdout).toContain("Caught error: ENOMEM");
expect(exitCode).toBe(0);
});
test("should allow reading files within the limit", async () => {
using dir = tempDir("readfile-within-limit", {
"small.txt": Buffer.alloc(500, "x").toString(),
});
await using proc = Bun.spawn({
cmd: [
bunExe(),
"-e",
`
const fs = require("fs");
try {
const content = fs.readFileSync("small.txt", "utf-8");
console.log("Success: read", content.length, "chars");
process.exit(0);
} catch (error) {
console.log("ERROR: Unexpected error:", error.code);
process.exit(1);
}
`,
],
env: {
...bunEnv,
// BUN_GARBAGE_COLLECTOR_LEVEL must be set for BUN_FEATURE_FLAG_SYNTHETIC_MEMORY_LIMIT to be processed
BUN_GARBAGE_COLLECTOR_LEVEL: "0",
BUN_FEATURE_FLAG_SYNTHETIC_MEMORY_LIMIT: "1000",
},
cwd: String(dir),
stderr: "pipe",
stdout: "pipe",
});
const [stdout, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]);
expect(stdout).toContain("Success: read 500 chars");
expect(exitCode).toBe(0);
});
test("should allow reading files as buffer within the limit", async () => {
// When reading as buffer (no encoding), the synthetic_allocation_limit is used
// This test verifies that buffers within the limit can still be read
using dir = tempDir("readfile-buffer-limit", {
"data.bin": Buffer.alloc(500, "x").toString(),
});
await using proc = Bun.spawn({
cmd: [
bunExe(),
"-e",
`
const fs = require("fs");
try {
const content = fs.readFileSync("data.bin");
console.log("Success: read", content.length, "bytes as buffer");
process.exit(0);
} catch (error) {
console.log("ERROR: Unexpected error:", error.code);
process.exit(1);
}
`,
],
env: {
...bunEnv,
// BUN_GARBAGE_COLLECTOR_LEVEL must be set for BUN_FEATURE_FLAG_SYNTHETIC_MEMORY_LIMIT to be processed
BUN_GARBAGE_COLLECTOR_LEVEL: "0",
BUN_FEATURE_FLAG_SYNTHETIC_MEMORY_LIMIT: "1000",
},
cwd: String(dir),
stderr: "pipe",
stdout: "pipe",
});
const [stdout, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]);
expect(stdout).toContain("Success: read 500 bytes as buffer");
expect(exitCode).toBe(0);
});
});