mirror of
https://github.com/oven-sh/bun
synced 2026-02-09 18:38:55 +00:00
## Summary
This PR integrates WebKit's text codec implementations into Bun's
TextDecoder, adding support for 24 additional character encodings beyond
the native UTF-8, UTF-16, and Latin1.
Fixes https://github.com/oven-sh/bun/issues/11564
## What's New
### Supported Encodings (24 total)
- **11 single-byte encodings**: IBM866, ISO-8859-3/6/7/8/8-I, KOI8-U,
windows-874/1253/1255/1257
- **7 CJK encodings**: Big5, EUC-JP, ISO-2022-JP, Shift_JIS, EUC-KR,
GBK, GB18030
- **2 special encodings**: x-user-defined, replacement
### Implementation Details
- Integrated WebKit's text codec C++ implementations
- Generated static encoding tables from WHATWG spec (no ICU dependency)
- Created C++ wrapper for Zig/C++ interop
- All encoding aliases are supported (e.g., `sjis` → `shift_jis`)
- Proper whitespace trimming for encoding labels
## Testing
- ✅ Added comprehensive tests for all supported encodings
- ✅ Passes Web Platform Tests for single-byte decoders
- ✅ Passes Web Platform Tests for encoding labels
- ✅ All 2,227 tests pass
## Test Output
```
bun test v1.2.19 (9feaab47)
2207 pass
0 fail
5012 expect() calls
Ran 2207 tests across 1 file. [899.00ms]
```
## Not Included
The following encodings were not added due to ICU data loading
constraints:
- ISO-8859-2, 4, 5, 10, 13, 14, 15, 16
- Windows-1250, 1251, 1254, 1256, 1258
- KOI8-R, macintosh, x-mac-cyrillic
## Example Usage
```javascript
// CJK encodings
const decoder = new TextDecoder("shift_jis");
const bytes = new Uint8Array([0x82, 0xb1, 0x82, 0xf1]);
console.log(decoder.decode(bytes)); // "こん"
// Single-byte encodings
const greekDecoder = new TextDecoder("iso-8859-7");
const greekBytes = new Uint8Array([0xC3, 0xe5, 0xe9, 0xdc]);
console.log(greekDecoder.decode(greekBytes)); // "Γειά"
```
🤖 Generated with [Claude Code](https://claude.ai/code)
---------
Co-authored-by: Claude <claude@anthropic.ai>
Co-authored-by: Claude <noreply@anthropic.com>
Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
1243 lines
20 KiB
TypeScript
1243 lines
20 KiB
TypeScript
/**
|
|
* Web Platform Tests for TextDecoder
|
|
* Based on: https://github.com/web-platform-tests/wpt/tree/master/encoding
|
|
*
|
|
* This file contains tests from:
|
|
* - single-byte-decoder.window.js
|
|
* - textdecoder-labels.any.js
|
|
*/
|
|
|
|
import { describe, expect, test } from "bun:test";
|
|
|
|
// From https://github.com/web-platform-tests/wpt/blob/master/encoding/single-byte-decoder.window.js
|
|
describe("WPT: single-byte-decoder", () => {
|
|
// Single-byte encoding indexes from https://encoding.spec.whatwg.org/indexes.json
|
|
const singleByteIndexes = {
|
|
"IBM866": [
|
|
1040, 1041, 1042, 1043, 1044, 1045, 1046, 1047, 1048, 1049, 1050, 1051, 1052, 1053, 1054, 1055, 1056, 1057, 1058,
|
|
1059, 1060, 1061, 1062, 1063, 1064, 1065, 1066, 1067, 1068, 1069, 1070, 1071, 1072, 1073, 1074, 1075, 1076, 1077,
|
|
1078, 1079, 1080, 1081, 1082, 1083, 1084, 1085, 1086, 1087, 9617, 9618, 9619, 9474, 9508, 9569, 9570, 9558, 9557,
|
|
9571, 9553, 9559, 9565, 9564, 9563, 9488, 9492, 9524, 9516, 9500, 9472, 9532, 9566, 9567, 9562, 9556, 9577, 9574,
|
|
9568, 9552, 9580, 9575, 9576, 9572, 9573, 9561, 9560, 9554, 9555, 9579, 9578, 9496, 9484, 9608, 9604, 9612, 9616,
|
|
9600, 1088, 1089, 1090, 1091, 1092, 1093, 1094, 1095, 1096, 1097, 1098, 1099, 1100, 1101, 1102, 1103, 1025, 1105,
|
|
1028, 1108, 1031, 1111, 1038, 1118, 176, 8729, 183, 8730, 8470, 164, 9632, 160,
|
|
],
|
|
"ISO-8859-3": [
|
|
128,
|
|
129,
|
|
130,
|
|
131,
|
|
132,
|
|
133,
|
|
134,
|
|
135,
|
|
136,
|
|
137,
|
|
138,
|
|
139,
|
|
140,
|
|
141,
|
|
142,
|
|
143,
|
|
144,
|
|
145,
|
|
146,
|
|
147,
|
|
148,
|
|
149,
|
|
150,
|
|
151,
|
|
152,
|
|
153,
|
|
154,
|
|
155,
|
|
156,
|
|
157,
|
|
158,
|
|
159,
|
|
160,
|
|
294,
|
|
728,
|
|
163,
|
|
164,
|
|
null,
|
|
292,
|
|
167,
|
|
168,
|
|
304,
|
|
350,
|
|
286,
|
|
308,
|
|
173,
|
|
null,
|
|
379,
|
|
176,
|
|
295,
|
|
178,
|
|
179,
|
|
180,
|
|
181,
|
|
293,
|
|
183,
|
|
184,
|
|
305,
|
|
351,
|
|
287,
|
|
309,
|
|
189,
|
|
null,
|
|
380,
|
|
192,
|
|
193,
|
|
194,
|
|
null,
|
|
196,
|
|
266,
|
|
264,
|
|
199,
|
|
200,
|
|
201,
|
|
202,
|
|
203,
|
|
204,
|
|
205,
|
|
206,
|
|
207,
|
|
null,
|
|
209,
|
|
210,
|
|
211,
|
|
212,
|
|
288,
|
|
214,
|
|
215,
|
|
284,
|
|
217,
|
|
218,
|
|
219,
|
|
220,
|
|
364,
|
|
348,
|
|
223,
|
|
224,
|
|
225,
|
|
226,
|
|
null,
|
|
228,
|
|
267,
|
|
265,
|
|
231,
|
|
232,
|
|
233,
|
|
234,
|
|
235,
|
|
236,
|
|
237,
|
|
238,
|
|
239,
|
|
null,
|
|
241,
|
|
242,
|
|
243,
|
|
244,
|
|
289,
|
|
246,
|
|
247,
|
|
285,
|
|
249,
|
|
250,
|
|
251,
|
|
252,
|
|
365,
|
|
349,
|
|
729,
|
|
],
|
|
"ISO-8859-6": [
|
|
128,
|
|
129,
|
|
130,
|
|
131,
|
|
132,
|
|
133,
|
|
134,
|
|
135,
|
|
136,
|
|
137,
|
|
138,
|
|
139,
|
|
140,
|
|
141,
|
|
142,
|
|
143,
|
|
144,
|
|
145,
|
|
146,
|
|
147,
|
|
148,
|
|
149,
|
|
150,
|
|
151,
|
|
152,
|
|
153,
|
|
154,
|
|
155,
|
|
156,
|
|
157,
|
|
158,
|
|
159,
|
|
160,
|
|
null,
|
|
null,
|
|
null,
|
|
164,
|
|
null,
|
|
null,
|
|
null,
|
|
null,
|
|
null,
|
|
null,
|
|
null,
|
|
1548,
|
|
173,
|
|
null,
|
|
null,
|
|
null,
|
|
null,
|
|
null,
|
|
null,
|
|
null,
|
|
null,
|
|
null,
|
|
null,
|
|
null,
|
|
null,
|
|
null,
|
|
1563,
|
|
null,
|
|
null,
|
|
null,
|
|
1567,
|
|
null,
|
|
1569,
|
|
1570,
|
|
1571,
|
|
1572,
|
|
1573,
|
|
1574,
|
|
1575,
|
|
1576,
|
|
1577,
|
|
1578,
|
|
1579,
|
|
1580,
|
|
1581,
|
|
1582,
|
|
1583,
|
|
1584,
|
|
1585,
|
|
1586,
|
|
1587,
|
|
1588,
|
|
1589,
|
|
1590,
|
|
1591,
|
|
1592,
|
|
1593,
|
|
1594,
|
|
null,
|
|
null,
|
|
null,
|
|
null,
|
|
null,
|
|
1600,
|
|
1601,
|
|
1602,
|
|
1603,
|
|
1604,
|
|
1605,
|
|
1606,
|
|
1607,
|
|
1608,
|
|
1609,
|
|
1610,
|
|
1611,
|
|
1612,
|
|
1613,
|
|
1614,
|
|
1615,
|
|
1616,
|
|
1617,
|
|
1618,
|
|
null,
|
|
null,
|
|
null,
|
|
null,
|
|
null,
|
|
null,
|
|
null,
|
|
null,
|
|
null,
|
|
null,
|
|
null,
|
|
null,
|
|
null,
|
|
],
|
|
"ISO-8859-7": [
|
|
128,
|
|
129,
|
|
130,
|
|
131,
|
|
132,
|
|
133,
|
|
134,
|
|
135,
|
|
136,
|
|
137,
|
|
138,
|
|
139,
|
|
140,
|
|
141,
|
|
142,
|
|
143,
|
|
144,
|
|
145,
|
|
146,
|
|
147,
|
|
148,
|
|
149,
|
|
150,
|
|
151,
|
|
152,
|
|
153,
|
|
154,
|
|
155,
|
|
156,
|
|
157,
|
|
158,
|
|
159,
|
|
160,
|
|
8216,
|
|
8217,
|
|
163,
|
|
8364,
|
|
8367,
|
|
166,
|
|
167,
|
|
168,
|
|
169,
|
|
890,
|
|
171,
|
|
172,
|
|
173,
|
|
null,
|
|
8213,
|
|
176,
|
|
177,
|
|
178,
|
|
179,
|
|
900,
|
|
901,
|
|
902,
|
|
183,
|
|
904,
|
|
905,
|
|
906,
|
|
187,
|
|
908,
|
|
189,
|
|
910,
|
|
911,
|
|
912,
|
|
913,
|
|
914,
|
|
915,
|
|
916,
|
|
917,
|
|
918,
|
|
919,
|
|
920,
|
|
921,
|
|
922,
|
|
923,
|
|
924,
|
|
925,
|
|
926,
|
|
927,
|
|
928,
|
|
929,
|
|
null,
|
|
931,
|
|
932,
|
|
933,
|
|
934,
|
|
935,
|
|
936,
|
|
937,
|
|
938,
|
|
939,
|
|
940,
|
|
941,
|
|
942,
|
|
943,
|
|
944,
|
|
945,
|
|
946,
|
|
947,
|
|
948,
|
|
949,
|
|
950,
|
|
951,
|
|
952,
|
|
953,
|
|
954,
|
|
955,
|
|
956,
|
|
957,
|
|
958,
|
|
959,
|
|
960,
|
|
961,
|
|
962,
|
|
963,
|
|
964,
|
|
965,
|
|
966,
|
|
967,
|
|
968,
|
|
969,
|
|
970,
|
|
971,
|
|
972,
|
|
973,
|
|
974,
|
|
null,
|
|
],
|
|
"ISO-8859-8": [
|
|
128,
|
|
129,
|
|
130,
|
|
131,
|
|
132,
|
|
133,
|
|
134,
|
|
135,
|
|
136,
|
|
137,
|
|
138,
|
|
139,
|
|
140,
|
|
141,
|
|
142,
|
|
143,
|
|
144,
|
|
145,
|
|
146,
|
|
147,
|
|
148,
|
|
149,
|
|
150,
|
|
151,
|
|
152,
|
|
153,
|
|
154,
|
|
155,
|
|
156,
|
|
157,
|
|
158,
|
|
159,
|
|
160,
|
|
null,
|
|
162,
|
|
163,
|
|
164,
|
|
165,
|
|
166,
|
|
167,
|
|
168,
|
|
169,
|
|
215,
|
|
171,
|
|
172,
|
|
173,
|
|
174,
|
|
175,
|
|
176,
|
|
177,
|
|
178,
|
|
179,
|
|
180,
|
|
181,
|
|
182,
|
|
183,
|
|
184,
|
|
185,
|
|
247,
|
|
187,
|
|
188,
|
|
189,
|
|
190,
|
|
null,
|
|
null,
|
|
null,
|
|
null,
|
|
null,
|
|
null,
|
|
null,
|
|
null,
|
|
null,
|
|
null,
|
|
null,
|
|
null,
|
|
null,
|
|
null,
|
|
null,
|
|
null,
|
|
null,
|
|
null,
|
|
null,
|
|
null,
|
|
null,
|
|
null,
|
|
null,
|
|
null,
|
|
null,
|
|
null,
|
|
null,
|
|
null,
|
|
null,
|
|
null,
|
|
null,
|
|
null,
|
|
8215,
|
|
1488,
|
|
1489,
|
|
1490,
|
|
1491,
|
|
1492,
|
|
1493,
|
|
1494,
|
|
1495,
|
|
1496,
|
|
1497,
|
|
1498,
|
|
1499,
|
|
1500,
|
|
1501,
|
|
1502,
|
|
1503,
|
|
1504,
|
|
1505,
|
|
1506,
|
|
1507,
|
|
1508,
|
|
1509,
|
|
1510,
|
|
1511,
|
|
1512,
|
|
1513,
|
|
1514,
|
|
null,
|
|
null,
|
|
8206,
|
|
8207,
|
|
null,
|
|
],
|
|
"KOI8-U": [
|
|
9472, 9474, 9484, 9488, 9492, 9496, 9500, 9508, 9516, 9524, 9532, 9600, 9604, 9608, 9612, 9616, 9617, 9618, 9619,
|
|
8992, 9632, 8729, 8730, 8776, 8804, 8805, 160, 8993, 176, 178, 183, 247, 9552, 9553, 9554, 1105, 1108, 9556, 1110,
|
|
1111, 9559, 9560, 9561, 9562, 9563, 1169, 1118, 9566, 9567, 9568, 9569, 1025, 1028, 9571, 1030, 1031, 9574, 9575,
|
|
9576, 9577, 9578, 1168, 1038, 169, 1102, 1072, 1073, 1094, 1076, 1077, 1092, 1075, 1093, 1080, 1081, 1082, 1083,
|
|
1084, 1085, 1086, 1087, 1103, 1088, 1089, 1090, 1091, 1078, 1074, 1100, 1099, 1079, 1096, 1101, 1097, 1095, 1098,
|
|
1070, 1040, 1041, 1062, 1044, 1045, 1060, 1043, 1061, 1048, 1049, 1050, 1051, 1052, 1053, 1054, 1055, 1071, 1056,
|
|
1057, 1058, 1059, 1046, 1042, 1068, 1067, 1047, 1064, 1069, 1065, 1063, 1066,
|
|
],
|
|
"windows-874": [
|
|
8364,
|
|
129,
|
|
130,
|
|
131,
|
|
132,
|
|
8230,
|
|
134,
|
|
135,
|
|
136,
|
|
137,
|
|
138,
|
|
139,
|
|
140,
|
|
141,
|
|
142,
|
|
143,
|
|
144,
|
|
8216,
|
|
8217,
|
|
8220,
|
|
8221,
|
|
8226,
|
|
8211,
|
|
8212,
|
|
152,
|
|
153,
|
|
154,
|
|
155,
|
|
156,
|
|
157,
|
|
158,
|
|
159,
|
|
160,
|
|
3585,
|
|
3586,
|
|
3587,
|
|
3588,
|
|
3589,
|
|
3590,
|
|
3591,
|
|
3592,
|
|
3593,
|
|
3594,
|
|
3595,
|
|
3596,
|
|
3597,
|
|
3598,
|
|
3599,
|
|
3600,
|
|
3601,
|
|
3602,
|
|
3603,
|
|
3604,
|
|
3605,
|
|
3606,
|
|
3607,
|
|
3608,
|
|
3609,
|
|
3610,
|
|
3611,
|
|
3612,
|
|
3613,
|
|
3614,
|
|
3615,
|
|
3616,
|
|
3617,
|
|
3618,
|
|
3619,
|
|
3620,
|
|
3621,
|
|
3622,
|
|
3623,
|
|
3624,
|
|
3625,
|
|
3626,
|
|
3627,
|
|
3628,
|
|
3629,
|
|
3630,
|
|
3631,
|
|
3632,
|
|
3633,
|
|
3634,
|
|
3635,
|
|
3636,
|
|
3637,
|
|
3638,
|
|
3639,
|
|
3640,
|
|
3641,
|
|
3642,
|
|
null,
|
|
null,
|
|
null,
|
|
null,
|
|
3647,
|
|
3648,
|
|
3649,
|
|
3650,
|
|
3651,
|
|
3652,
|
|
3653,
|
|
3654,
|
|
3655,
|
|
3656,
|
|
3657,
|
|
3658,
|
|
3659,
|
|
3660,
|
|
3661,
|
|
3662,
|
|
3663,
|
|
3664,
|
|
3665,
|
|
3666,
|
|
3667,
|
|
3668,
|
|
3669,
|
|
3670,
|
|
3671,
|
|
3672,
|
|
3673,
|
|
3674,
|
|
3675,
|
|
null,
|
|
null,
|
|
null,
|
|
null,
|
|
],
|
|
"windows-1252": [
|
|
8364, 129, 8218, 402, 8222, 8230, 8224, 8225, 710, 8240, 352, 8249, 338, 141, 381, 143, 144, 8216, 8217, 8220,
|
|
8221, 8226, 8211, 8212, 732, 8482, 353, 8250, 339, 157, 382, 376, 160, 161, 162, 163, 164, 165, 166, 167, 168,
|
|
169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191,
|
|
192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214,
|
|
215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237,
|
|
238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255,
|
|
],
|
|
"windows-1253": [
|
|
8364,
|
|
129,
|
|
8218,
|
|
402,
|
|
8222,
|
|
8230,
|
|
8224,
|
|
8225,
|
|
136,
|
|
8240,
|
|
138,
|
|
8249,
|
|
140,
|
|
141,
|
|
142,
|
|
143,
|
|
144,
|
|
8216,
|
|
8217,
|
|
8220,
|
|
8221,
|
|
8226,
|
|
8211,
|
|
8212,
|
|
152,
|
|
8482,
|
|
154,
|
|
8250,
|
|
156,
|
|
157,
|
|
158,
|
|
159,
|
|
160,
|
|
901,
|
|
902,
|
|
163,
|
|
164,
|
|
165,
|
|
166,
|
|
167,
|
|
168,
|
|
169,
|
|
null,
|
|
171,
|
|
172,
|
|
173,
|
|
174,
|
|
8213,
|
|
176,
|
|
177,
|
|
178,
|
|
179,
|
|
900,
|
|
181,
|
|
182,
|
|
183,
|
|
904,
|
|
905,
|
|
906,
|
|
187,
|
|
908,
|
|
189,
|
|
910,
|
|
911,
|
|
912,
|
|
913,
|
|
914,
|
|
915,
|
|
916,
|
|
917,
|
|
918,
|
|
919,
|
|
920,
|
|
921,
|
|
922,
|
|
923,
|
|
924,
|
|
925,
|
|
926,
|
|
927,
|
|
928,
|
|
929,
|
|
null,
|
|
931,
|
|
932,
|
|
933,
|
|
934,
|
|
935,
|
|
936,
|
|
937,
|
|
938,
|
|
939,
|
|
940,
|
|
941,
|
|
942,
|
|
943,
|
|
944,
|
|
945,
|
|
946,
|
|
947,
|
|
948,
|
|
949,
|
|
950,
|
|
951,
|
|
952,
|
|
953,
|
|
954,
|
|
955,
|
|
956,
|
|
957,
|
|
958,
|
|
959,
|
|
960,
|
|
961,
|
|
962,
|
|
963,
|
|
964,
|
|
965,
|
|
966,
|
|
967,
|
|
968,
|
|
969,
|
|
970,
|
|
971,
|
|
972,
|
|
973,
|
|
974,
|
|
null,
|
|
],
|
|
"windows-1255": [
|
|
8364,
|
|
129,
|
|
8218,
|
|
402,
|
|
8222,
|
|
8230,
|
|
8224,
|
|
8225,
|
|
710,
|
|
8240,
|
|
138,
|
|
8249,
|
|
140,
|
|
141,
|
|
142,
|
|
143,
|
|
144,
|
|
8216,
|
|
8217,
|
|
8220,
|
|
8221,
|
|
8226,
|
|
8211,
|
|
8212,
|
|
732,
|
|
8482,
|
|
154,
|
|
8250,
|
|
156,
|
|
157,
|
|
158,
|
|
159,
|
|
160,
|
|
161,
|
|
162,
|
|
163,
|
|
8362,
|
|
165,
|
|
166,
|
|
167,
|
|
168,
|
|
169,
|
|
215,
|
|
171,
|
|
172,
|
|
173,
|
|
174,
|
|
175,
|
|
176,
|
|
177,
|
|
178,
|
|
179,
|
|
180,
|
|
181,
|
|
182,
|
|
183,
|
|
184,
|
|
185,
|
|
247,
|
|
187,
|
|
188,
|
|
189,
|
|
190,
|
|
191,
|
|
1456,
|
|
1457,
|
|
1458,
|
|
1459,
|
|
1460,
|
|
1461,
|
|
1462,
|
|
1463,
|
|
1464,
|
|
1465,
|
|
1466,
|
|
1467,
|
|
1468,
|
|
1469,
|
|
1470,
|
|
1471,
|
|
1472,
|
|
1473,
|
|
1474,
|
|
1475,
|
|
1520,
|
|
1521,
|
|
1522,
|
|
1523,
|
|
1524,
|
|
null,
|
|
null,
|
|
null,
|
|
null,
|
|
null,
|
|
null,
|
|
null,
|
|
1488,
|
|
1489,
|
|
1490,
|
|
1491,
|
|
1492,
|
|
1493,
|
|
1494,
|
|
1495,
|
|
1496,
|
|
1497,
|
|
1498,
|
|
1499,
|
|
1500,
|
|
1501,
|
|
1502,
|
|
1503,
|
|
1504,
|
|
1505,
|
|
1506,
|
|
1507,
|
|
1508,
|
|
1509,
|
|
1510,
|
|
1511,
|
|
1512,
|
|
1513,
|
|
1514,
|
|
null,
|
|
null,
|
|
8206,
|
|
8207,
|
|
null,
|
|
],
|
|
"windows-1257": [
|
|
8364,
|
|
129,
|
|
8218,
|
|
131,
|
|
8222,
|
|
8230,
|
|
8224,
|
|
8225,
|
|
136,
|
|
8240,
|
|
138,
|
|
8249,
|
|
140,
|
|
168,
|
|
711,
|
|
184,
|
|
144,
|
|
8216,
|
|
8217,
|
|
8220,
|
|
8221,
|
|
8226,
|
|
8211,
|
|
8212,
|
|
152,
|
|
8482,
|
|
154,
|
|
8250,
|
|
156,
|
|
175,
|
|
731,
|
|
159,
|
|
160,
|
|
null,
|
|
162,
|
|
163,
|
|
164,
|
|
null,
|
|
166,
|
|
167,
|
|
216,
|
|
169,
|
|
342,
|
|
171,
|
|
172,
|
|
173,
|
|
174,
|
|
198,
|
|
176,
|
|
177,
|
|
178,
|
|
179,
|
|
180,
|
|
181,
|
|
182,
|
|
183,
|
|
248,
|
|
185,
|
|
343,
|
|
187,
|
|
188,
|
|
189,
|
|
190,
|
|
230,
|
|
260,
|
|
302,
|
|
256,
|
|
262,
|
|
196,
|
|
197,
|
|
280,
|
|
274,
|
|
268,
|
|
201,
|
|
377,
|
|
278,
|
|
290,
|
|
310,
|
|
298,
|
|
315,
|
|
352,
|
|
323,
|
|
325,
|
|
211,
|
|
332,
|
|
213,
|
|
214,
|
|
215,
|
|
370,
|
|
321,
|
|
346,
|
|
362,
|
|
220,
|
|
379,
|
|
381,
|
|
223,
|
|
261,
|
|
303,
|
|
257,
|
|
263,
|
|
228,
|
|
229,
|
|
281,
|
|
275,
|
|
269,
|
|
233,
|
|
378,
|
|
279,
|
|
291,
|
|
311,
|
|
299,
|
|
316,
|
|
353,
|
|
324,
|
|
326,
|
|
243,
|
|
333,
|
|
245,
|
|
246,
|
|
247,
|
|
371,
|
|
322,
|
|
347,
|
|
363,
|
|
252,
|
|
380,
|
|
382,
|
|
729,
|
|
],
|
|
};
|
|
|
|
// Supported encodings (those not removed due to ICU removal)
|
|
const supportedEncodings = [
|
|
"IBM866",
|
|
"ISO-8859-3",
|
|
"ISO-8859-6",
|
|
"ISO-8859-7",
|
|
"ISO-8859-8",
|
|
"KOI8-U",
|
|
"windows-874",
|
|
"windows-1252",
|
|
"windows-1253",
|
|
"windows-1255",
|
|
"windows-1257",
|
|
];
|
|
|
|
// Create test buffer with all bytes 0-255
|
|
const buffer = new ArrayBuffer(256);
|
|
const view = new Uint8Array(buffer);
|
|
for (let i = 0; i < 256; i++) {
|
|
view[i] = i;
|
|
}
|
|
|
|
for (const encoding of supportedEncodings) {
|
|
test(`${encoding} decodes all bytes correctly`, () => {
|
|
const decoder = new TextDecoder(encoding);
|
|
const decoded = decoder.decode(view);
|
|
|
|
// Check each character
|
|
for (let i = 0; i < 256; i++) {
|
|
const actual = decoded.charCodeAt(i);
|
|
let expected;
|
|
|
|
if (i < 0x80) {
|
|
expected = i; // ASCII range
|
|
} else {
|
|
expected = singleByteIndexes[encoding][i - 0x80];
|
|
if (expected == null) {
|
|
expected = 0xfffd; // Replacement character
|
|
}
|
|
}
|
|
|
|
expect(actual).toBe(expected);
|
|
}
|
|
});
|
|
}
|
|
});
|
|
|
|
// From https://github.com/web-platform-tests/wpt/blob/master/encoding/textdecoder-labels.any.js
|
|
describe("WPT: textdecoder-labels", () => {
|
|
const whitespace = [" ", "\t", "\n", "\f", "\r"];
|
|
|
|
const encodingLabels: Record<string, string[]> = {
|
|
"utf-8": ["utf-8", "utf8", "unicode-1-1-utf-8"],
|
|
"ibm866": ["ibm866", "866", "cp866", "csibm866"],
|
|
"iso-8859-3": ["iso-8859-3", "iso8859-3", "iso_8859-3", "latin3", "iso-ir-109", "l3", "csisolatin3"],
|
|
"iso-8859-6": ["iso-8859-6", "iso8859-6", "iso_8859-6", "arabic", "asmo-708", "ecma-114", "iso-ir-127"],
|
|
"iso-8859-7": [
|
|
"iso-8859-7",
|
|
"iso8859-7",
|
|
"iso_8859-7",
|
|
"greek",
|
|
"greek8",
|
|
"iso-ir-126",
|
|
"elot_928",
|
|
"ecma-118",
|
|
"csisolatingreek",
|
|
],
|
|
"iso-8859-8": ["iso-8859-8", "iso8859-8", "iso_8859-8", "hebrew", "iso-ir-138", "csisolatinhebrew", "visual"],
|
|
"iso-8859-8-i": ["iso-8859-8-i", "csiso88598i", "logical"],
|
|
"koi8-u": ["koi8-u", "koi8-ru"],
|
|
"windows-874": ["windows-874", "dos-874", "iso-8859-11", "iso8859-11", "iso885911", "tis-620"],
|
|
"windows-1252": [
|
|
"windows-1252",
|
|
"cp1252",
|
|
"x-cp1252",
|
|
"ansi_x3.4-1968",
|
|
"ascii",
|
|
"cp819",
|
|
"csisolatin1",
|
|
"ibm819",
|
|
"iso-8859-1",
|
|
"iso-ir-100",
|
|
"iso8859-1",
|
|
"iso88591",
|
|
"iso_8859-1",
|
|
"iso_8859-1:1987",
|
|
"l1",
|
|
"latin1",
|
|
"us-ascii",
|
|
],
|
|
"windows-1253": ["windows-1253", "cp1253", "x-cp1253"],
|
|
"windows-1255": ["windows-1255", "cp1255", "x-cp1255"],
|
|
"windows-1257": ["windows-1257", "cp1257", "x-cp1257"],
|
|
"utf-16be": ["utf-16be"],
|
|
"utf-16le": ["utf-16le", "utf-16", "csunicode", "iso-10646-ucs-2", "ucs-2", "unicode", "unicodefeff"],
|
|
"x-user-defined": ["x-user-defined"],
|
|
"replacement": ["replacement"],
|
|
"big5": ["big5", "big5-hkscs", "cn-big5", "csbig5", "x-x-big5"],
|
|
"euc-jp": ["euc-jp", "cseucpkdfmtjapanese", "x-euc-jp"],
|
|
"iso-2022-jp": ["iso-2022-jp", "csiso2022jp"],
|
|
"shift_jis": ["shift_jis", "shift-jis", "csshiftjis", "ms932", "ms_kanji", "sjis", "windows-31j", "x-sjis"],
|
|
"euc-kr": [
|
|
"euc-kr",
|
|
"cseuckr",
|
|
"csksc56011987",
|
|
"iso-ir-149",
|
|
"korean",
|
|
"ks_c_5601-1987",
|
|
"ks_c_5601-1989",
|
|
"ksc5601",
|
|
"ksc_5601",
|
|
"windows-949",
|
|
],
|
|
"gbk": ["gbk", "chinese", "csgb2312", "csiso58gb231280", "gb2312", "gb_2312", "gb_2312-80", "iso-ir-58", "x-gbk"],
|
|
"gb18030": ["gb18030"],
|
|
};
|
|
|
|
for (const [canonical, labels] of Object.entries(encodingLabels)) {
|
|
for (const label of labels) {
|
|
describe(`${label} => ${canonical}`, () => {
|
|
test("exact label", () => {
|
|
const decoder = new TextDecoder(label);
|
|
expect(decoder.encoding).toBe(canonical);
|
|
});
|
|
|
|
test("uppercase label", () => {
|
|
const decoder = new TextDecoder(label.toUpperCase());
|
|
expect(decoder.encoding).toBe(canonical);
|
|
});
|
|
|
|
test("mixed case label", () => {
|
|
const mixedCase = label
|
|
.split("")
|
|
.map((c, i) => (i % 2 ? c.toUpperCase() : c))
|
|
.join("");
|
|
const decoder = new TextDecoder(mixedCase);
|
|
expect(decoder.encoding).toBe(canonical);
|
|
});
|
|
|
|
for (const ws of whitespace) {
|
|
test(`with leading whitespace '${ws === "\n" ? "\\n" : ws === "\r" ? "\\r" : ws === "\t" ? "\\t" : ws === "\f" ? "\\f" : ws}'`, () => {
|
|
const decoder = new TextDecoder(ws + label);
|
|
expect(decoder.encoding).toBe(canonical);
|
|
});
|
|
|
|
test(`with trailing whitespace '${ws === "\n" ? "\\n" : ws === "\r" ? "\\r" : ws === "\t" ? "\\t" : ws === "\f" ? "\\f" : ws}'`, () => {
|
|
const decoder = new TextDecoder(label + ws);
|
|
expect(decoder.encoding).toBe(canonical);
|
|
});
|
|
|
|
test(`with surrounding whitespace '${ws === "\n" ? "\\n" : ws === "\r" ? "\\r" : ws === "\t" ? "\\t" : ws === "\f" ? "\\f" : ws}'`, () => {
|
|
const decoder = new TextDecoder(ws + label + ws);
|
|
expect(decoder.encoding).toBe(canonical);
|
|
});
|
|
}
|
|
});
|
|
}
|
|
}
|
|
});
|