Files
bun.sh/test/regression/issue/utf16-encoding-crash.test.ts
robobun 7110dc10a4 Fix UTF-16 encoding crash with odd-length byte arrays (#21966)
## Summary
- Fixes a panic: "exact division produced remainder" that occurs when
reading files with odd number of bytes using utf16le/ucs2 encoding
- The crash happened in `encoding.zig:136` when
`std.mem.bytesAsSlice(u16, input)` was called on a byte slice with odd
length
- Fixed by properly checking for odd-length input and truncating to the
nearest even length

## Test plan
- Added regression tests in
`test/regression/issue/utf16-encoding-crash.test.ts`
- Tests verify that reading files with odd byte counts doesn't crash
- Tests verify correct truncation behavior matches Node.js expectations
- Verified edge cases (0, 1 byte inputs) return empty strings

## Root Cause
The original code checked `if (input.len / 2 == 0)` which only caught 0
and 1-byte inputs, but `std.mem.bytesAsSlice(u16, input)` panics on any
odd-length input (3, 5, 7, etc. bytes).

## Fix Details
- Changed condition to check `input.len % 2 != 0` for any odd length
- Truncate odd-length inputs to the nearest even length for valid UTF-16
processing
- Handle edge cases by returning empty string for 0 or 1-byte inputs

🤖 Generated with [Claude Code](https://claude.ai/code)

---------

Co-authored-by: Claude Bot <claude-bot@bun.sh>
Co-authored-by: Claude <noreply@anthropic.com>
Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
Co-authored-by: Jarred Sumner <jarred@jarredsumner.com>
Co-authored-by: Dylan Conway <dylan.conway567@gmail.com>
2025-08-20 00:02:14 -07:00

67 lines
2.0 KiB
TypeScript

import { expect, test } from "bun:test";
import fs from "fs";
import { tempDirWithFiles } from "harness";
// Test cases that verify Bun's UTF-16le behavior matches Node.js exactly
const testCases = [
{
name: "1 byte",
bytes: [0x41],
expectedLength: 0,
expectedString: "",
},
// It needs to be big enough to trigger the code path that dynamically allocates the arraybuffer
// so at least 256 KB.
{
name: "large buffer - 256KB + 1",
bytes: (() => {
const buffer = Buffer.allocUnsafe(256 * 1024 + 1);
for (let i = 0; i < buffer.length; i++) {
buffer[i] = i % 2 === 0 ? 0x41 : 0x00;
}
return buffer;
})(),
expectedLength: 128 * 1024,
expectedString: "A".repeat(128 * 1024),
},
];
test("fs.readFile with utf16le encoding matches Node.js behavior for all byte lengths", () => {
const files: Record<string, Buffer> = {};
// Create test files for each case
testCases.forEach((testCase, i) => {
files[`test-${i}.bin`] = Buffer.from(testCase.bytes);
});
const dir = tempDirWithFiles("utf16-node-compatibility", files);
testCases.forEach((testCase, i) => {
const filePath = `${dir}/test-${i}.bin`;
// Test that reading doesn't crash
expect(() => {
fs.readFileSync(filePath, "utf16le");
}).not.toThrow();
// Test that result matches expected Node.js behavior
const result = fs.readFileSync(filePath, "utf16le");
expect(result.length).toBe(testCase.expectedLength);
expect(result).toBe(testCase.expectedString);
});
});
test("fs.readFile with ucs2 encoding matches utf16le behavior", () => {
const dir = tempDirWithFiles("ucs2-compatibility", {
"test.bin": Buffer.from([0x41, 0x42, 0x43]), // 3 bytes
});
const utf16leResult = fs.readFileSync(`${dir}/test.bin`, "utf16le");
const ucs2Result = fs.readFileSync(`${dir}/test.bin`, "ucs2");
// ucs2 and utf16le should behave identically
expect(ucs2Result.length).toBe(utf16leResult.length);
expect(ucs2Result).toBe(utf16leResult);
});