fix writing UTF-16 with a trailing unpaired surrogate to process.stdout/stderr (#23444)

### What does this PR do?
Fixes `bun -p "process.stderr.write('Hello' +
String.fromCharCode(0xd800))"`.

Also fixes potential index out of bounds if there are many invalid
sequences.

This also affects `TextEncoder`.
### How did you verify your code works?
Added tests for edgecases

---------

Co-authored-by: Jarred Sumner <jarred@jarredsumner.com>
This commit is contained in:
Dylan Conway
2025-10-10 03:48:04 -07:00
committed by GitHub
parent 8826b4f5f5
commit 312a86fd43
30 changed files with 765 additions and 253 deletions

View File

@@ -183,7 +183,7 @@ pub const String = extern struct {
pub fn cloneUTF16(bytes: []const u16) String {
if (bytes.len == 0) return String.empty;
if (bun.strings.firstNonASCII16([]const u16, bytes) == null) {
if (bun.strings.firstNonASCII16(bytes) == null) {
return validateRefCount(bun.cpp.BunString__fromUTF16ToLatin1(bytes.ptr, bytes.len));
}
return validateRefCount(bun.cpp.BunString__fromUTF16(bytes.ptr, bytes.len));