Files
bun.sh/test/regression/issue/23275.test.ts
robobun f0295ce0a5 Fix bunfig.toml parsing with UTF-8 BOM (#23276)
Fixes #23275

### What does this PR do?

This PR fixes a bug where `bunfig.toml` files starting with a UTF-8 BOM
(byte order mark, `U+FEFF` or bytes `0xEF 0xBB 0xBF`) would fail to
parse with an "Unexpected" error.

The fix uses Bun's existing `File.toSource()` function with
`convert_bom: true` option when loading config files. This properly
detects and strips the BOM before parsing, matching the behavior of
other file readers in Bun (like the JavaScript lexer which treats
`0xFEFF` as whitespace).

**Changes:**
- Modified `src/cli/Arguments.zig` to use `bun.sys.File.toSource()` with
BOM conversion instead of manually reading the file
- Simplified the config loading code by removing intermediate file
handle and buffer logic

### How did you verify your code works?

Added comprehensive regression tests in
`test/regression/issue/23275.test.ts` that verify:
1.  `bunfig.toml` with UTF-8 BOM parses correctly without errors
2.  `bunfig.toml` without BOM still works (regression test)
3.  `bunfig.toml` with BOM and actual config content parses the content
correctly

All three tests pass with the debug build:
```
 3 pass
 0 fail
 11 expect() calls
Ran 3 tests across 1 file. [6.41s]
```

🤖 Generated with [Claude Code](https://claude.com/claude-code)

---------

Co-authored-by: Claude Bot <claude-bot@bun.sh>
Co-authored-by: Claude <noreply@anthropic.com>
Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
2025-10-05 17:22:37 -07:00

100 lines
2.4 KiB
TypeScript

// https://github.com/oven-sh/bun/issues/23275
// UTF-8 BOM in bunfig.toml should not cause parsing errors
import { expect, test } from "bun:test";
import { bunEnv, bunExe, tempDir } from "harness";
test("bunfig.toml with UTF-8 BOM should parse correctly", async () => {
// UTF-8 BOM is the byte sequence: 0xEF 0xBB 0xBF
const utf8BOM = "\uFEFF";
using dir = tempDir("bunfig-bom", {
"bunfig.toml":
utf8BOM +
`
[install]
exact = true
`,
"index.ts": `console.log("test");`,
"package.json": JSON.stringify({
name: "test-bom",
version: "1.0.0",
}),
});
await using proc = Bun.spawn({
cmd: [bunExe(), "index.ts"],
cwd: String(dir),
env: bunEnv,
stdout: "pipe",
stderr: "pipe",
});
const [stdout, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]);
// Should not have the "Unexpected" error that was reported in the issue
expect(stderr).not.toContain("Unexpected");
expect(stderr).not.toContain("error:");
expect(stdout).toContain("test");
expect(exitCode).toBe(0);
});
test("bunfig.toml without BOM should still work", async () => {
using dir = tempDir("bunfig-no-bom", {
"bunfig.toml": `
[install]
exact = true
`,
"index.ts": `console.log("test");`,
"package.json": JSON.stringify({
name: "test-no-bom",
version: "1.0.0",
}),
});
await using proc = Bun.spawn({
cmd: [bunExe(), "index.ts"],
cwd: String(dir),
env: bunEnv,
stdout: "pipe",
stderr: "pipe",
});
const [stdout, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]);
expect(stderr).not.toContain("Unexpected");
expect(stderr).not.toContain("error:");
expect(stdout).toContain("test");
expect(exitCode).toBe(0);
});
test("bunfig.toml with BOM and actual content should parse the content correctly", async () => {
const utf8BOM = "\uFEFF";
using dir = tempDir("bunfig-bom-content", {
"bunfig.toml":
utf8BOM +
`
logLevel = "debug"
[install]
production = true
`,
"index.ts": `console.log("hello");`,
});
await using proc = Bun.spawn({
cmd: [bunExe(), "index.ts"],
cwd: String(dir),
env: bunEnv,
stdout: "pipe",
stderr: "pipe",
});
const [stdout, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]);
expect(stdout).toContain("hello");
expect(stderr).not.toContain("Unexpected");
expect(exitCode).toBe(0);
});