Move TextDecoder, TextEncoderStreamEncoder, TextEncoder, EncodingLabel into separate files (#18430)

2026-02-12 11:59:00 +00:00 · 2025-03-24 17:10:48 -07:00
parent 438d8555c6
commit 50eaea19cb
5 changed files with 998 additions and 958 deletions
--- a/src/bun.js/webcore/EncodingLabel.zig
+++ b/src/bun.js/webcore/EncodingLabel.zig
@@ -0,0 +1,160 @@
+/// https://encoding.spec.whatwg.org/encodings.json
+pub const EncodingLabel = enum {
+    @"UTF-8",
+    IBM866,
+    @"ISO-8859-2",
+    @"ISO-8859-3",
+    @"ISO-8859-4",
+    @"ISO-8859-5",
+    @"ISO-8859-6",
+    @"ISO-8859-7",
+    @"ISO-8859-8",
+    @"ISO-8859-8-I",
+    @"ISO-8859-10",
+    @"ISO-8859-13",
+    @"ISO-8859-14",
+    @"ISO-8859-15",
+    @"ISO-8859-16",
+    @"KOI8-R",
+    @"KOI8-U",
+    macintosh,
+    @"windows-874",
+    @"windows-1250",
+    @"windows-1251",
+    /// Also known as
+    /// - ASCII
+    /// - latin1
+    @"windows-1252",
+    @"windows-1253",
+    @"windows-1254",
+    @"windows-1255",
+    @"windows-1256",
+    @"windows-1257",
+    @"windows-1258",
+    @"x-mac-cyrillic",
+    Big5,
+    @"EUC-JP",
+    @"ISO-2022-JP",
+    Shift_JIS,
+    @"EUC-KR",
+    @"UTF-16BE",
+    @"UTF-16LE",
+    @"x-user-defined",
+
+    pub const Map = std.enums.EnumMap(EncodingLabel, string);
+    pub const label: Map = brk: {
+        var map = Map.initFull("");
+        map.put(EncodingLabel.@"UTF-8", "utf-8");
+        map.put(EncodingLabel.@"UTF-16LE", "utf-16le");
+        map.put(EncodingLabel.@"windows-1252", "windows-1252");
+        break :brk map;
+    };
+
+    const utf16_names = [_]string{
+        "ucs-2",
+        "utf-16",
+        "unicode",
+        "utf-16le",
+        "csunicode",
+        "unicodefeff",
+        "iso-10646-ucs-2",
+    };
+
+    const utf8_names = [_]string{
+        "utf8",
+        "utf-8",
+        "unicode11utf8",
+        "unicode20utf8",
+        "x-unicode20utf8",
+        "unicode-1-1-utf-8",
+    };
+
+    const latin1_names = [_]string{
+        "l1",
+        "ascii",
+        "cp819",
+        "cp1252",
+        "ibm819",
+        "latin1",
+        "iso88591",
+        "us-ascii",
+        "x-cp1252",
+        "iso8859-1",
+        "iso_8859-1",
+        "iso-8859-1",
+        "iso-ir-100",
+        "csisolatin1",
+        "windows-1252",
+        "ansi_x3.4-1968",
+        "iso_8859-1:1987",
+    };
+
+    pub const latin1 = EncodingLabel.@"windows-1252";
+
+    pub fn which(input_: string) ?EncodingLabel {
+        const input = strings.trim(input_, " \t\r\n");
+        const ExactMatcher = strings.ExactSizeMatcher;
+        const Eight = ExactMatcher(8);
+        const Sixteen = ExactMatcher(16);
+        return switch (input.len) {
+            1, 0 => null,
+            2...8 => switch (Eight.matchLower(input)) {
+                Eight.case("l1"),
+                Eight.case("ascii"),
+                Eight.case("cp819"),
+                Eight.case("cp1252"),
+                Eight.case("ibm819"),
+                Eight.case("latin1"),
+                Eight.case("iso88591"),
+                Eight.case("us-ascii"),
+                Eight.case("x-cp1252"),
+                => EncodingLabel.latin1,
+
+                Eight.case("ucs-2"),
+                Eight.case("utf-16"),
+                Eight.case("unicode"),
+                Eight.case("utf-16le"),
+                => EncodingLabel.@"UTF-16LE",
+
+                Eight.case("utf-16be"),
+                => EncodingLabel.@"UTF-16BE",
+
+                Eight.case("utf8"), Eight.case("utf-8") => EncodingLabel.@"UTF-8",
+                else => null,
+            },
+
+            9...16 => switch (Sixteen.matchLower(input)) {
+                Sixteen.case("iso8859-1"),
+                Sixteen.case("iso_8859-1"),
+                Sixteen.case("iso-8859-1"),
+                Sixteen.case("iso-ir-100"),
+                Sixteen.case("csisolatin1"),
+                Sixteen.case("windows-1252"),
+                Sixteen.case("ansi_x3.4-1968"),
+                Sixteen.case("iso_8859-1:1987"),
+                => EncodingLabel.latin1,
+
+                Sixteen.case("unicode11utf8"),
+                Sixteen.case("unicode20utf8"),
+                Sixteen.case("x-unicode20utf8"),
+                => EncodingLabel.@"UTF-8",
+
+                Sixteen.case("csunicode"),
+                Sixteen.case("unicodefeff"),
+                Sixteen.case("iso-10646-ucs-2"),
+                => EncodingLabel.@"UTF-16LE",
+
+                else => null,
+            },
+            else => if (strings.eqlCaseInsensitiveASCII(input, "unicode-1-1-utf-8", true))
+                EncodingLabel.@"UTF-8"
+            else
+                null,
+        };
+    }
+};
+const std = @import("std");
+const bun = @import("root").bun;
+const encoding = @import("encoding.zig");
+const string = []const u8;
+const strings = bun.strings;