Fix bunfig.toml parsing with UTF-8 BOM (#23276)

Fixes #23275

### What does this PR do?

This PR fixes a bug where `bunfig.toml` files starting with a UTF-8 BOM
(byte order mark, `U+FEFF` or bytes `0xEF 0xBB 0xBF`) would fail to
parse with an "Unexpected" error.

The fix uses Bun's existing `File.toSource()` function with
`convert_bom: true` option when loading config files. This properly
detects and strips the BOM before parsing, matching the behavior of
other file readers in Bun (like the JavaScript lexer which treats
`0xFEFF` as whitespace).

**Changes:**
- Modified `src/cli/Arguments.zig` to use `bun.sys.File.toSource()` with
BOM conversion instead of manually reading the file
- Simplified the config loading code by removing intermediate file
handle and buffer logic

### How did you verify your code works?

Added comprehensive regression tests in
`test/regression/issue/23275.test.ts` that verify:
1.  `bunfig.toml` with UTF-8 BOM parses correctly without errors
2.  `bunfig.toml` without BOM still works (regression test)
3.  `bunfig.toml` with BOM and actual config content parses the content
correctly

All three tests pass with the debug build:
```
 3 pass
 0 fail
 11 expect() calls
Ran 3 tests across 1 file. [6.41s]
```

🤖 Generated with [Claude Code](https://claude.com/claude-code)

---------

Co-authored-by: Claude Bot <claude-bot@bun.sh>
Co-authored-by: Claude <noreply@anthropic.com>
Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
This commit is contained in:
robobun
2025-10-05 17:22:37 -07:00
committed by GitHub
parent 67647c3522
commit f0295ce0a5
3 changed files with 104 additions and 15 deletions

View File

@@ -212,11 +212,11 @@ pub const test_only_params = [_]ParamType{
pub const test_params = test_only_params ++ runtime_params_ ++ transpiler_params_ ++ base_params_;
pub fn loadConfigPath(allocator: std.mem.Allocator, auto_loaded: bool, config_path: [:0]const u8, ctx: Command.Context, comptime cmd: Command.Tag) !void {
var config_file = switch (bun.sys.openA(config_path, bun.O.RDONLY, 0)) {
.result => |fd| fd.stdFile(),
const source = switch (bun.sys.File.toSource(config_path, allocator, .{ .convert_bom = true })) {
.result => |s| s,
.err => |err| {
if (auto_loaded) return;
Output.prettyErrorln("{}\nwhile opening config \"{s}\"", .{
Output.prettyErrorln("{}\nwhile reading config \"{s}\"", .{
err,
config_path,
});
@@ -224,16 +224,6 @@ pub fn loadConfigPath(allocator: std.mem.Allocator, auto_loaded: bool, config_pa
},
};
defer config_file.close();
const contents = config_file.readToEndAlloc(allocator, std.math.maxInt(usize)) catch |err| {
if (auto_loaded) return;
Output.prettyErrorln("<r><red>error<r>: {s} reading config \"{s}\"", .{
@errorName(err),
config_path,
});
Global.exit(1);
};
js_ast.Stmt.Data.Store.create();
js_ast.Expr.Data.Store.create();
defer {
@@ -245,7 +235,7 @@ pub fn loadConfigPath(allocator: std.mem.Allocator, auto_loaded: bool, config_pa
ctx.log.level = original_level;
}
ctx.log.level = logger.Log.Level.warn;
try Bunfig.parse(allocator, &logger.Source.initPathString(bun.asByteSlice(config_path), contents), ctx, cmd);
try Bunfig.parse(allocator, &source, ctx, cmd);
}
fn getHomeConfigPath(buf: *bun.PathBuffer) ?[:0]const u8 {

View File

@@ -8,7 +8,7 @@
".jsBoolean(false)": 0,
".jsBoolean(true)": 0,
".stdDir()": 41,
".stdFile()": 18,
".stdFile()": 17,
"// autofix": 167,
": [^=]+= undefined,$": 256,
"== alloc.ptr": 0,

View File

@@ -0,0 +1,99 @@
// https://github.com/oven-sh/bun/issues/23275
// UTF-8 BOM in bunfig.toml should not cause parsing errors
import { expect, test } from "bun:test";
import { bunEnv, bunExe, tempDir } from "harness";
test("bunfig.toml with UTF-8 BOM should parse correctly", async () => {
// UTF-8 BOM is the byte sequence: 0xEF 0xBB 0xBF
const utf8BOM = "\uFEFF";
using dir = tempDir("bunfig-bom", {
"bunfig.toml":
utf8BOM +
`
[install]
exact = true
`,
"index.ts": `console.log("test");`,
"package.json": JSON.stringify({
name: "test-bom",
version: "1.0.0",
}),
});
await using proc = Bun.spawn({
cmd: [bunExe(), "index.ts"],
cwd: String(dir),
env: bunEnv,
stdout: "pipe",
stderr: "pipe",
});
const [stdout, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]);
// Should not have the "Unexpected" error that was reported in the issue
expect(stderr).not.toContain("Unexpected");
expect(stderr).not.toContain("error:");
expect(stdout).toContain("test");
expect(exitCode).toBe(0);
});
test("bunfig.toml without BOM should still work", async () => {
using dir = tempDir("bunfig-no-bom", {
"bunfig.toml": `
[install]
exact = true
`,
"index.ts": `console.log("test");`,
"package.json": JSON.stringify({
name: "test-no-bom",
version: "1.0.0",
}),
});
await using proc = Bun.spawn({
cmd: [bunExe(), "index.ts"],
cwd: String(dir),
env: bunEnv,
stdout: "pipe",
stderr: "pipe",
});
const [stdout, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]);
expect(stderr).not.toContain("Unexpected");
expect(stderr).not.toContain("error:");
expect(stdout).toContain("test");
expect(exitCode).toBe(0);
});
test("bunfig.toml with BOM and actual content should parse the content correctly", async () => {
const utf8BOM = "\uFEFF";
using dir = tempDir("bunfig-bom-content", {
"bunfig.toml":
utf8BOM +
`
logLevel = "debug"
[install]
production = true
`,
"index.ts": `console.log("hello");`,
});
await using proc = Bun.spawn({
cmd: [bunExe(), "index.ts"],
cwd: String(dir),
env: bunEnv,
stdout: "pipe",
stderr: "pipe",
});
const [stdout, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]);
expect(stdout).toContain("hello");
expect(stderr).not.toContain("Unexpected");
expect(exitCode).toBe(0);
});