Files
bun.sh/test/js/node/process/process-stdio-invalid-utf16.test.ts
Dylan Conway 312a86fd43 fix writing UTF-16 with a trailing unpaired surrogate to process.stdout/stderr (#23444)
### What does this PR do?
Fixes `bun -p "process.stderr.write('Hello' +
String.fromCharCode(0xd800))"`.

Also fixes potential index out of bounds if there are many invalid
sequences.

This also affects `TextEncoder`.
### How did you verify your code works?
Added tests for edgecases

---------

Co-authored-by: Jarred Sumner <jarred@jarredsumner.com>
2025-10-10 03:48:04 -07:00

406 lines
12 KiB
TypeScript
Raw Permalink Blame History

import { describe, expect, test } from "bun:test";
import { bunEnv, bunExe, tempDir } from "harness";
describe.concurrent.each(["stdout", "stderr"])("process.%s.write with invalid UTF-16", stream => {
test("single unpaired high surrogate (D800)", async () => {
using dir = tempDir("stdio-utf16", {
"test.js": `
process.${stream}.write(String.fromCharCode(0xD800));
process.${stream}.write("\\n");
${stream === "stdout" ? "" : 'console.log("Done");'}
`,
});
await using proc = Bun.spawn({
cmd: [bunExe(), "test.js"],
env: bunEnv,
cwd: String(dir),
stdout: "pipe",
stderr: "pipe",
});
const [stdout, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]);
expect(exitCode).toBe(0);
if (stream === "stdout") {
expect(stdout).toBe("<22>\n");
} else {
expect(stdout).toBe("Done\n");
expect(stderr).toBe("<22>\n");
}
});
test("single unpaired low surrogate (DC00)", async () => {
using dir = tempDir("stdio-utf16", {
"test.js": `
process.${stream}.write(String.fromCharCode(0xDC00));
process.${stream}.write("\\n");
${stream === "stdout" ? "" : 'console.log("Done");'}
`,
});
await using proc = Bun.spawn({
cmd: [bunExe(), "test.js"],
env: bunEnv,
cwd: String(dir),
stdout: "pipe",
stderr: "pipe",
});
const [stdout, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]);
expect(exitCode).toBe(0);
if (stream === "stdout") {
expect(stdout).toBe("<22>\n");
} else {
expect(stdout).toBe("Done\n");
expect(stderr).toBe("<22>\n");
}
});
test("trailing unpaired high surrogate should not duplicate output", async () => {
// This was the main bug: strings ending with high surrogates (D800-DBFF)
// would duplicate the output ~32 times
using dir = tempDir("stdio-utf16", {
"test.js": `
process.${stream}.write("Help" + String.fromCharCode(0xD800));
process.${stream}.write("\\n");
process.${stream}.write("Test" + String.fromCharCode(0xDBFF));
process.${stream}.write("\\n");
${stream === "stdout" ? "" : 'console.log("Done");'}
`,
});
await using proc = Bun.spawn({
cmd: [bunExe(), "test.js"],
env: bunEnv,
cwd: String(dir),
stdout: "pipe",
stderr: "pipe",
});
const [stdout, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]);
expect(exitCode).toBe(0);
const output = stream === "stdout" ? stdout : stderr;
expect(output).toBe("Help<6C>\nTest<73>\n");
// Also verify no duplication
expect((output.match(/Help/g) || []).length).toBe(1);
expect((output.match(/Test/g) || []).length).toBe(1);
if (stream === "stderr") {
expect(stdout).toBe("Done\n");
}
});
test("trailing unpaired low surrogate", async () => {
using dir = tempDir("stdio-utf16", {
"test.js": `
process.${stream}.write("Hello" + String.fromCharCode(0xDC00));
process.${stream}.write("\\n");
process.${stream}.write("World" + String.fromCharCode(0xDFFF));
process.${stream}.write("\\n");
${stream === "stdout" ? "" : 'console.log("Done");'}
`,
});
await using proc = Bun.spawn({
cmd: [bunExe(), "test.js"],
env: bunEnv,
cwd: String(dir),
stdout: "pipe",
stderr: "pipe",
});
const [stdout, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]);
expect(exitCode).toBe(0);
if (stream === "stdout") {
expect(stdout).toBe("Hello<6C>\nWorld<6C>\n");
} else {
expect(stdout).toBe("Done\n");
expect(stderr).toBe("Hello<6C>\nWorld<6C>\n");
}
});
test("leading unpaired surrogates", async () => {
using dir = tempDir("stdio-utf16", {
"test.js": `
process.${stream}.write(String.fromCharCode(0xD800) + "Hello");
process.${stream}.write("\\n");
process.${stream}.write(String.fromCharCode(0xDC00) + "World");
process.${stream}.write("\\n");
${stream === "stdout" ? "" : 'console.log("Done");'}
`,
});
await using proc = Bun.spawn({
cmd: [bunExe(), "test.js"],
env: bunEnv,
cwd: String(dir),
stdout: "pipe",
stderr: "pipe",
});
const [stdout, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]);
expect(exitCode).toBe(0);
if (stream === "stdout") {
expect(stdout).toBe("<22>Hello\n<>World\n");
} else {
expect(stdout).toBe("Done\n");
expect(stderr).toBe("<22>Hello\n<>World\n");
}
});
test("unpaired surrogates at both ends", async () => {
using dir = tempDir("stdio-utf16", {
"test.js": `
process.${stream}.write(String.fromCharCode(0xD800) + "Middle" + String.fromCharCode(0xDC00));
process.${stream}.write("\\n");
process.${stream}.write(String.fromCharCode(0xDC00) + "Text" + String.fromCharCode(0xD800));
process.${stream}.write("\\n");
${stream === "stdout" ? "" : 'console.log("Done");'}
`,
});
await using proc = Bun.spawn({
cmd: [bunExe(), "test.js"],
env: bunEnv,
cwd: String(dir),
stdout: "pipe",
stderr: "pipe",
});
const [stdout, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]);
expect(exitCode).toBe(0);
if (stream === "stdout") {
expect(stdout).toBe("<22>Middle<6C>\n<>Text<78>\n");
} else {
expect(stdout).toBe("Done\n");
expect(stderr).toBe("<22>Middle<6C>\n<>Text<78>\n");
}
});
test("multiple unpaired high surrogates", async () => {
using dir = tempDir("stdio-utf16", {
"test.js": `
// Multiple high surrogates only
process.${stream}.write(String.fromCharCode(0xD800, 0xD801, 0xD802));
process.${stream}.write("\\n");
// Text with multiple trailing high surrogates
process.${stream}.write("Test" + String.fromCharCode(0xD800, 0xD801, 0xD802));
process.${stream}.write("\\n");
${stream === "stdout" ? "" : 'console.log("Done");'}
`,
});
await using proc = Bun.spawn({
cmd: [bunExe(), "test.js"],
env: bunEnv,
cwd: String(dir),
stdout: "pipe",
stderr: "pipe",
});
const [stdout, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]);
expect(exitCode).toBe(0);
if (stream === "stdout") {
expect(stdout).toBe("<22><><EFBFBD>\nTest<73><74><EFBFBD>\n");
} else {
expect(stdout).toBe("Done\n");
expect(stderr).toBe("<22><><EFBFBD>\nTest<73><74><EFBFBD>\n");
}
});
test("multiple unpaired low surrogates", async () => {
using dir = tempDir("stdio-utf16", {
"test.js": `
// Multiple low surrogates only
process.${stream}.write(String.fromCharCode(0xDC00, 0xDC01, 0xDC02));
process.${stream}.write("\\n");
// Text with multiple trailing low surrogates
process.${stream}.write("Test" + String.fromCharCode(0xDC00, 0xDC01, 0xDC02));
process.${stream}.write("\\n");
${stream === "stdout" ? "" : 'console.log("Done");'}
`,
});
await using proc = Bun.spawn({
cmd: [bunExe(), "test.js"],
env: bunEnv,
cwd: String(dir),
stdout: "pipe",
stderr: "pipe",
});
const [stdout, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]);
expect(exitCode).toBe(0);
if (stream === "stdout") {
expect(stdout).toBe("<22><><EFBFBD>\nTest<73><74><EFBFBD>\n");
} else {
expect(stdout).toBe("Done\n");
expect(stderr).toBe("<22><><EFBFBD>\nTest<73><74><EFBFBD>\n");
}
});
test("valid surrogate pairs are preserved", async () => {
using dir = tempDir("stdio-utf16", {
"test.js": `
// Valid surrogate pair (𝄞 - musical symbol)
process.${stream}.write(String.fromCharCode(0xD834, 0xDD1E));
process.${stream}.write("\\n");
// Valid pair with unpaired surrogates
process.${stream}.write(
String.fromCharCode(0xD800) +
String.fromCharCode(0xD834, 0xDD1E) +
String.fromCharCode(0xDC00)
);
process.${stream}.write("\\n");
${stream === "stdout" ? "" : 'console.log("Done");'}
`,
});
await using proc = Bun.spawn({
cmd: [bunExe(), "test.js"],
env: bunEnv,
cwd: String(dir),
stdout: "pipe",
stderr: "pipe",
});
const [stdout, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]);
expect(exitCode).toBe(0);
if (stream === "stdout") {
expect(stdout).toBe("𝄞\n<>𝄞<EFBFBD>\n");
} else {
expect(stdout).toBe("Done\n");
expect(stderr).toBe("𝄞\n<>𝄞<EFBFBD>\n");
}
});
test("surrogate pair combinations", async () => {
using dir = tempDir("stdio-utf16", {
"test.js": `
// D800,D801,DC00: D800 is unpaired, D801+DC00 forms valid pair
process.${stream}.write(String.fromCharCode(0xD800, 0xD801, 0xDC00));
process.${stream}.write("\\n");
// DC00,D800,DC01,D801: DC00 unpaired, D800+DC01 valid, D801 unpaired
process.${stream}.write(String.fromCharCode(0xDC00, 0xD800, 0xDC01, 0xD801));
process.${stream}.write("\\n");
// Two high surrogates (both unpaired)
process.${stream}.write(String.fromCharCode(0xD800, 0xD801));
process.${stream}.write("\\n");
// Two low surrogates (both unpaired)
process.${stream}.write(String.fromCharCode(0xDC00, 0xDC01));
process.${stream}.write("\\n");
${stream === "stdout" ? "" : 'console.log("Done");'}
`,
});
await using proc = Bun.spawn({
cmd: [bunExe(), "test.js"],
env: bunEnv,
cwd: String(dir),
stdout: "pipe",
stderr: "pipe",
});
const [stdout, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]);
expect(exitCode).toBe(0);
const expectedOutput =
"<22>" +
String.fromCharCode(0xd801, 0xdc00) +
"\n" +
"<22>" +
String.fromCharCode(0xd800, 0xdc01) +
"<22>\n" +
"<22><>\n" +
"<22><>\n";
if (stream === "stdout") {
expect(stdout).toBe(expectedOutput);
} else {
expect(stdout).toBe("Done\n");
expect(stderr).toBe(expectedOutput);
}
});
test("large strings with trailing unpaired surrogates", async () => {
using dir = tempDir("stdio-utf16", {
"test.js": `
// Large string to test buffer boundaries
const largeStr = "A".repeat(10000) + String.fromCharCode(0xD800);
process.${stream}.write(largeStr);
process.${stream}.write("\\n");
${stream === "stdout" ? "" : 'console.log("Done");'}
`,
});
await using proc = Bun.spawn({
cmd: [bunExe(), "test.js"],
env: bunEnv,
cwd: String(dir),
stdout: "pipe",
stderr: "pipe",
});
const [stdout, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]);
expect(exitCode).toBe(0);
const output = stream === "stdout" ? stdout : stderr;
// Should be exactly 10000 A's plus one replacement character
const aCount = (output.match(/A/g) || []).length;
expect(aCount).toBe(10000);
expect(output.endsWith("<22>\n")).toBe(true);
if (stream === "stderr") {
expect(stdout).toBe("Done\n");
}
});
test("empty string and edge cases", async () => {
using dir = tempDir("stdio-utf16", {
"test.js": `
// Empty string
process.${stream}.write("");
process.${stream}.write("\\n");
// Single char before/after unpaired
process.${stream}.write("A" + String.fromCharCode(0xD800));
process.${stream}.write("\\n");
process.${stream}.write(String.fromCharCode(0xD800) + "B");
process.${stream}.write("\\n");
${stream === "stdout" ? "" : 'console.log("Done");'}
`,
});
await using proc = Bun.spawn({
cmd: [bunExe(), "test.js"],
env: bunEnv,
cwd: String(dir),
stdout: "pipe",
stderr: "pipe",
});
const [stdout, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]);
expect(exitCode).toBe(0);
if (stream === "stdout") {
expect(stdout).toBe("\nA<6E>\n<>B\n");
} else {
expect(stdout).toBe("Done\n");
expect(stderr).toBe("\nA<6E>\n<>B\n");
}
});
});