Rename some instances of latin1 to cp1252 (#22059)

in JS, `new TextDecoder("latin1").decode(...)` uses cp1252. In python,
latin1 is half-width utf-16. In our code, latin1 typically refers to
half-width utf-16 because JavaScriptCore uses that for most strings, but
sometimes it refers to cp1252. Rename the cp1252 functions to be called
cp1252

Also fixes an issue where Buffer.from with utf-16le would sometimes
output the wrong value:

```js
$> bun -p "Buffer.from('\x80', 'utf-16le')"
<Buffer ac 20>
$> node -p "Buffer.from('\x80', 'utf-16le')"
<Buffer 80 00>
$> bun-debug -p "Buffer.from('\x80', 'utf-16le')"
<Buffer 80 00>
```
This commit is contained in:
pfg
2025-08-28 17:28:38 -07:00
committed by GitHub
parent edea077947
commit c69ed120e9
5 changed files with 58 additions and 24 deletions

View File

@@ -2239,6 +2239,7 @@ pub const convertUTF8toUTF16InBuffer = unicode.convertUTF8toUTF16InBuffer;
pub const convertUTF8toUTF16InBufferZ = unicode.convertUTF8toUTF16InBufferZ;
pub const copyLatin1IntoASCII = unicode.copyLatin1IntoASCII;
pub const copyLatin1IntoUTF16 = unicode.copyLatin1IntoUTF16;
pub const copyCP1252IntoUTF16 = unicode.copyCP1252IntoUTF16;
pub const copyLatin1IntoUTF8 = unicode.copyLatin1IntoUTF8;
pub const copyLatin1IntoUTF8StopOnNonASCII = unicode.copyLatin1IntoUTF8StopOnNonASCII;
pub const copyU16IntoU8 = unicode.copyU16IntoU8;
@@ -2251,7 +2252,7 @@ pub const copyUTF16IntoUTF8WithBufferImpl = unicode.copyUTF16IntoUTF8WithBufferI
pub const decodeCheck = unicode.decodeCheck;
pub const decodeWTF8RuneT = unicode.decodeWTF8RuneT;
pub const decodeWTF8RuneTMultibyte = unicode.decodeWTF8RuneTMultibyte;
pub const elementLengthLatin1IntoUTF16 = unicode.elementLengthLatin1IntoUTF16;
pub const elementLengthCP1252IntoUTF16 = unicode.elementLengthCP1252IntoUTF16;
pub const elementLengthLatin1IntoUTF8 = unicode.elementLengthLatin1IntoUTF8;
pub const elementLengthUTF16IntoUTF8 = unicode.elementLengthUTF16IntoUTF8;
pub const elementLengthUTF8IntoUTF16 = unicode.elementLengthUTF8IntoUTF16;
@@ -2262,9 +2263,9 @@ pub const eqlUtf16 = unicode.eqlUtf16;
pub const isAllASCII = unicode.isAllASCII;
pub const isValidUTF8 = unicode.isValidUTF8;
pub const isValidUTF8WithoutSIMD = unicode.isValidUTF8WithoutSIMD;
pub const latin1ToCodepointAssumeNotASCII = unicode.latin1ToCodepointAssumeNotASCII;
pub const latin1ToCodepointBytesAssumeNotASCII = unicode.latin1ToCodepointBytesAssumeNotASCII;
pub const latin1ToCodepointBytesAssumeNotASCII16 = unicode.latin1ToCodepointBytesAssumeNotASCII16;
pub const cp1252ToCodepointAssumeNotASCII = unicode.cp1252ToCodepointAssumeNotASCII;
pub const cp1252ToCodepointBytesAssumeNotASCII = unicode.cp1252ToCodepointBytesAssumeNotASCII;
pub const cp1252ToCodepointBytesAssumeNotASCII16 = unicode.cp1252ToCodepointBytesAssumeNotASCII16;
pub const literal = unicode.literal;
pub const nonASCIISequenceLength = unicode.nonASCIISequenceLength;
pub const replaceLatin1WithUTF8 = unicode.replaceLatin1WithUTF8;