mirror of
https://github.com/oven-sh/bun
synced 2026-02-12 11:59:00 +00:00
Move TextDecoder, TextEncoderStreamEncoder, TextEncoder, EncodingLabel into separate files (#18430)
This commit is contained in:
160
src/bun.js/webcore/EncodingLabel.zig
Normal file
160
src/bun.js/webcore/EncodingLabel.zig
Normal file
@@ -0,0 +1,160 @@
|
||||
/// https://encoding.spec.whatwg.org/encodings.json
|
||||
pub const EncodingLabel = enum {
|
||||
@"UTF-8",
|
||||
IBM866,
|
||||
@"ISO-8859-2",
|
||||
@"ISO-8859-3",
|
||||
@"ISO-8859-4",
|
||||
@"ISO-8859-5",
|
||||
@"ISO-8859-6",
|
||||
@"ISO-8859-7",
|
||||
@"ISO-8859-8",
|
||||
@"ISO-8859-8-I",
|
||||
@"ISO-8859-10",
|
||||
@"ISO-8859-13",
|
||||
@"ISO-8859-14",
|
||||
@"ISO-8859-15",
|
||||
@"ISO-8859-16",
|
||||
@"KOI8-R",
|
||||
@"KOI8-U",
|
||||
macintosh,
|
||||
@"windows-874",
|
||||
@"windows-1250",
|
||||
@"windows-1251",
|
||||
/// Also known as
|
||||
/// - ASCII
|
||||
/// - latin1
|
||||
@"windows-1252",
|
||||
@"windows-1253",
|
||||
@"windows-1254",
|
||||
@"windows-1255",
|
||||
@"windows-1256",
|
||||
@"windows-1257",
|
||||
@"windows-1258",
|
||||
@"x-mac-cyrillic",
|
||||
Big5,
|
||||
@"EUC-JP",
|
||||
@"ISO-2022-JP",
|
||||
Shift_JIS,
|
||||
@"EUC-KR",
|
||||
@"UTF-16BE",
|
||||
@"UTF-16LE",
|
||||
@"x-user-defined",
|
||||
|
||||
pub const Map = std.enums.EnumMap(EncodingLabel, string);
|
||||
pub const label: Map = brk: {
|
||||
var map = Map.initFull("");
|
||||
map.put(EncodingLabel.@"UTF-8", "utf-8");
|
||||
map.put(EncodingLabel.@"UTF-16LE", "utf-16le");
|
||||
map.put(EncodingLabel.@"windows-1252", "windows-1252");
|
||||
break :brk map;
|
||||
};
|
||||
|
||||
const utf16_names = [_]string{
|
||||
"ucs-2",
|
||||
"utf-16",
|
||||
"unicode",
|
||||
"utf-16le",
|
||||
"csunicode",
|
||||
"unicodefeff",
|
||||
"iso-10646-ucs-2",
|
||||
};
|
||||
|
||||
const utf8_names = [_]string{
|
||||
"utf8",
|
||||
"utf-8",
|
||||
"unicode11utf8",
|
||||
"unicode20utf8",
|
||||
"x-unicode20utf8",
|
||||
"unicode-1-1-utf-8",
|
||||
};
|
||||
|
||||
const latin1_names = [_]string{
|
||||
"l1",
|
||||
"ascii",
|
||||
"cp819",
|
||||
"cp1252",
|
||||
"ibm819",
|
||||
"latin1",
|
||||
"iso88591",
|
||||
"us-ascii",
|
||||
"x-cp1252",
|
||||
"iso8859-1",
|
||||
"iso_8859-1",
|
||||
"iso-8859-1",
|
||||
"iso-ir-100",
|
||||
"csisolatin1",
|
||||
"windows-1252",
|
||||
"ansi_x3.4-1968",
|
||||
"iso_8859-1:1987",
|
||||
};
|
||||
|
||||
pub const latin1 = EncodingLabel.@"windows-1252";
|
||||
|
||||
pub fn which(input_: string) ?EncodingLabel {
|
||||
const input = strings.trim(input_, " \t\r\n");
|
||||
const ExactMatcher = strings.ExactSizeMatcher;
|
||||
const Eight = ExactMatcher(8);
|
||||
const Sixteen = ExactMatcher(16);
|
||||
return switch (input.len) {
|
||||
1, 0 => null,
|
||||
2...8 => switch (Eight.matchLower(input)) {
|
||||
Eight.case("l1"),
|
||||
Eight.case("ascii"),
|
||||
Eight.case("cp819"),
|
||||
Eight.case("cp1252"),
|
||||
Eight.case("ibm819"),
|
||||
Eight.case("latin1"),
|
||||
Eight.case("iso88591"),
|
||||
Eight.case("us-ascii"),
|
||||
Eight.case("x-cp1252"),
|
||||
=> EncodingLabel.latin1,
|
||||
|
||||
Eight.case("ucs-2"),
|
||||
Eight.case("utf-16"),
|
||||
Eight.case("unicode"),
|
||||
Eight.case("utf-16le"),
|
||||
=> EncodingLabel.@"UTF-16LE",
|
||||
|
||||
Eight.case("utf-16be"),
|
||||
=> EncodingLabel.@"UTF-16BE",
|
||||
|
||||
Eight.case("utf8"), Eight.case("utf-8") => EncodingLabel.@"UTF-8",
|
||||
else => null,
|
||||
},
|
||||
|
||||
9...16 => switch (Sixteen.matchLower(input)) {
|
||||
Sixteen.case("iso8859-1"),
|
||||
Sixteen.case("iso_8859-1"),
|
||||
Sixteen.case("iso-8859-1"),
|
||||
Sixteen.case("iso-ir-100"),
|
||||
Sixteen.case("csisolatin1"),
|
||||
Sixteen.case("windows-1252"),
|
||||
Sixteen.case("ansi_x3.4-1968"),
|
||||
Sixteen.case("iso_8859-1:1987"),
|
||||
=> EncodingLabel.latin1,
|
||||
|
||||
Sixteen.case("unicode11utf8"),
|
||||
Sixteen.case("unicode20utf8"),
|
||||
Sixteen.case("x-unicode20utf8"),
|
||||
=> EncodingLabel.@"UTF-8",
|
||||
|
||||
Sixteen.case("csunicode"),
|
||||
Sixteen.case("unicodefeff"),
|
||||
Sixteen.case("iso-10646-ucs-2"),
|
||||
=> EncodingLabel.@"UTF-16LE",
|
||||
|
||||
else => null,
|
||||
},
|
||||
else => if (strings.eqlCaseInsensitiveASCII(input, "unicode-1-1-utf-8", true))
|
||||
EncodingLabel.@"UTF-8"
|
||||
else
|
||||
null,
|
||||
};
|
||||
}
|
||||
};
|
||||
const std = @import("std");
|
||||
const bun = @import("root").bun;
|
||||
const encoding = @import("encoding.zig");
|
||||
const string = []const u8;
|
||||
const strings = bun.strings;
|
||||
Reference in New Issue
Block a user