Move TextDecoder, TextEncoderStreamEncoder, TextEncoder, EncodingLabel into separate files (#18430)

This commit is contained in:
Jarred Sumner
2025-03-24 17:10:48 -07:00
committed by GitHub
parent 438d8555c6
commit 50eaea19cb
5 changed files with 998 additions and 958 deletions

View File

@@ -0,0 +1,160 @@
/// https://encoding.spec.whatwg.org/encodings.json
pub const EncodingLabel = enum {
@"UTF-8",
IBM866,
@"ISO-8859-2",
@"ISO-8859-3",
@"ISO-8859-4",
@"ISO-8859-5",
@"ISO-8859-6",
@"ISO-8859-7",
@"ISO-8859-8",
@"ISO-8859-8-I",
@"ISO-8859-10",
@"ISO-8859-13",
@"ISO-8859-14",
@"ISO-8859-15",
@"ISO-8859-16",
@"KOI8-R",
@"KOI8-U",
macintosh,
@"windows-874",
@"windows-1250",
@"windows-1251",
/// Also known as
/// - ASCII
/// - latin1
@"windows-1252",
@"windows-1253",
@"windows-1254",
@"windows-1255",
@"windows-1256",
@"windows-1257",
@"windows-1258",
@"x-mac-cyrillic",
Big5,
@"EUC-JP",
@"ISO-2022-JP",
Shift_JIS,
@"EUC-KR",
@"UTF-16BE",
@"UTF-16LE",
@"x-user-defined",
pub const Map = std.enums.EnumMap(EncodingLabel, string);
pub const label: Map = brk: {
var map = Map.initFull("");
map.put(EncodingLabel.@"UTF-8", "utf-8");
map.put(EncodingLabel.@"UTF-16LE", "utf-16le");
map.put(EncodingLabel.@"windows-1252", "windows-1252");
break :brk map;
};
const utf16_names = [_]string{
"ucs-2",
"utf-16",
"unicode",
"utf-16le",
"csunicode",
"unicodefeff",
"iso-10646-ucs-2",
};
const utf8_names = [_]string{
"utf8",
"utf-8",
"unicode11utf8",
"unicode20utf8",
"x-unicode20utf8",
"unicode-1-1-utf-8",
};
const latin1_names = [_]string{
"l1",
"ascii",
"cp819",
"cp1252",
"ibm819",
"latin1",
"iso88591",
"us-ascii",
"x-cp1252",
"iso8859-1",
"iso_8859-1",
"iso-8859-1",
"iso-ir-100",
"csisolatin1",
"windows-1252",
"ansi_x3.4-1968",
"iso_8859-1:1987",
};
pub const latin1 = EncodingLabel.@"windows-1252";
pub fn which(input_: string) ?EncodingLabel {
const input = strings.trim(input_, " \t\r\n");
const ExactMatcher = strings.ExactSizeMatcher;
const Eight = ExactMatcher(8);
const Sixteen = ExactMatcher(16);
return switch (input.len) {
1, 0 => null,
2...8 => switch (Eight.matchLower(input)) {
Eight.case("l1"),
Eight.case("ascii"),
Eight.case("cp819"),
Eight.case("cp1252"),
Eight.case("ibm819"),
Eight.case("latin1"),
Eight.case("iso88591"),
Eight.case("us-ascii"),
Eight.case("x-cp1252"),
=> EncodingLabel.latin1,
Eight.case("ucs-2"),
Eight.case("utf-16"),
Eight.case("unicode"),
Eight.case("utf-16le"),
=> EncodingLabel.@"UTF-16LE",
Eight.case("utf-16be"),
=> EncodingLabel.@"UTF-16BE",
Eight.case("utf8"), Eight.case("utf-8") => EncodingLabel.@"UTF-8",
else => null,
},
9...16 => switch (Sixteen.matchLower(input)) {
Sixteen.case("iso8859-1"),
Sixteen.case("iso_8859-1"),
Sixteen.case("iso-8859-1"),
Sixteen.case("iso-ir-100"),
Sixteen.case("csisolatin1"),
Sixteen.case("windows-1252"),
Sixteen.case("ansi_x3.4-1968"),
Sixteen.case("iso_8859-1:1987"),
=> EncodingLabel.latin1,
Sixteen.case("unicode11utf8"),
Sixteen.case("unicode20utf8"),
Sixteen.case("x-unicode20utf8"),
=> EncodingLabel.@"UTF-8",
Sixteen.case("csunicode"),
Sixteen.case("unicodefeff"),
Sixteen.case("iso-10646-ucs-2"),
=> EncodingLabel.@"UTF-16LE",
else => null,
},
else => if (strings.eqlCaseInsensitiveASCII(input, "unicode-1-1-utf-8", true))
EncodingLabel.@"UTF-8"
else
null,
};
}
};
const std = @import("std");
const bun = @import("root").bun;
const encoding = @import("encoding.zig");
const string = []const u8;
const strings = bun.strings;