Compare commits

...

1 Commits

Author SHA1 Message Date
Claude Bot
3e36b88ff1 fix(transpiler): preserve null bytes in tagged template literals
The UnsignedCodepointIterator used `minInt(u32) = 0` as the error
sentinel for detecting invalid multibyte UTF-8 sequences. This collided
with the valid null byte codepoint (U+0000), causing null bytes to be
misidentified as decode errors and replaced with U+FFFD. The printer
then emitted the literal 6-character string `\uFFFD` in raw template
literals instead of preserving the null byte.

Fix: use `maxInt` instead of `minInt` as the error sentinel. For u32,
maxInt is 0xFFFFFFFF which is well beyond the valid Unicode range
(max 0x10FFFF) and can never collide with a valid codepoint.

Closes #27553

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-02-28 15:55:18 +00:00
2 changed files with 80 additions and 1 deletions

View File

@@ -101,7 +101,7 @@ pub fn NewCodePointIterator(comptime CodePointType_: type, comptime zeroValue: c
}
const cp_len = wtf8ByteSequenceLength(it.bytes[pos]);
const error_char = comptime std.math.minInt(CodePointType);
const error_char = comptime std.math.maxInt(CodePointType);
const codepoint = @as(
CodePointType,

View File

@@ -0,0 +1,79 @@
import { expect, test } from "bun:test";
import { bunEnv, bunExe, tempDir } from "harness";
test("String.raw preserves null bytes in tagged template literals", async () => {
// Create a source file with a literal null byte (0x00) inside a tagged template literal.
// The null byte must be an actual byte in the source, not an escape sequence.
const source = Buffer.concat([
Buffer.from("const s = String.raw`"),
Buffer.from([0x00]),
Buffer.from("`;\nconsole.log(s.length);\nconsole.log(s.charCodeAt(0));\n"),
]);
using dir = tempDir("issue-27553", {
"test.js": source,
});
await using proc = Bun.spawn({
cmd: [bunExe(), "test.js"],
env: bunEnv,
cwd: String(dir),
stdout: "pipe",
stderr: "pipe",
});
const [stdout, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]);
expect(stdout).toBe("1\n0\n");
expect(exitCode).toBe(0);
});
test("null bytes in untagged template literals are preserved", async () => {
const source = Buffer.concat([
Buffer.from("const s = `"),
Buffer.from([0x00]),
Buffer.from("`;\nconsole.log(s.length);\nconsole.log(s.charCodeAt(0));\n"),
]);
using dir = tempDir("issue-27553-untagged", {
"test.js": source,
});
await using proc = Bun.spawn({
cmd: [bunExe(), "test.js"],
env: bunEnv,
cwd: String(dir),
stdout: "pipe",
stderr: "pipe",
});
const [stdout, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]);
expect(stdout).toBe("1\n0\n");
expect(exitCode).toBe(0);
});
test("null bytes in String.raw with surrounding content", async () => {
const source = Buffer.concat([
Buffer.from("const s = String.raw`hello"),
Buffer.from([0x00]),
Buffer.from("world`;\nconsole.log(s.length);\nconsole.log(s.charCodeAt(5));\n"),
]);
using dir = tempDir("issue-27553-embedded", {
"test.js": source,
});
await using proc = Bun.spawn({
cmd: [bunExe(), "test.js"],
env: bunEnv,
cwd: String(dir),
stdout: "pipe",
stderr: "pipe",
});
const [stdout, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]);
expect(stdout).toBe("11\n0\n");
expect(exitCode).toBe(0);
});