// used for utf8 decoding
buffered: struct {
    buf: [3]u8 = .{0} ** 3,
    len: u2 = 0,

    pub fn slice(this: *@This()) []const u8 {
        return this.buf[0..this.len];
    }
} = .{},

// used for utf16 decoding
lead_byte: ?u8 = null,
lead_surrogate: ?u16 = null,

ignore_bom: bool = false,
fatal: bool = false,
encoding: EncodingLabel = EncodingLabel.@"UTF-8",

pub usingnamespace bun.New(TextDecoder);
pub usingnamespace JSC.Codegen.JSTextDecoder;

pub fn finalize(this: *TextDecoder) void {
    this.destroy();
}

pub fn getIgnoreBOM(
    this: *TextDecoder,
    _: *JSC.JSGlobalObject,
) JSC.JSValue {
    return JSC.JSValue.jsBoolean(this.ignore_bom);
}

pub fn getFatal(
    this: *TextDecoder,
    _: *JSC.JSGlobalObject,
) JSC.JSValue {
    return JSC.JSValue.jsBoolean(this.fatal);
}

pub fn getEncoding(
    this: *TextDecoder,
    globalThis: *JSC.JSGlobalObject,
) JSC.JSValue {
    return ZigString.init(EncodingLabel.label.get(this.encoding).?).toJS(globalThis);
}
const Vector16 = std.meta.Vector(16, u16);
const max_16_ascii: Vector16 = @splat(@as(u16, 127));

fn processCodeUnitUTF16(
    this: *TextDecoder,
    output: *std.ArrayListUnmanaged(u16),
    saw_error: *bool,
    code_unit: u16,
) error{OutOfMemory}!void {
    if (this.lead_surrogate) |lead_surrogate| {
        this.lead_surrogate = null;

        if (strings.u16IsTrail(code_unit)) {
            // TODO: why is this here?
            // const code_point = strings.u16GetSupplementary(lead_surrogate, code_unit);
            try output.appendSlice(
                bun.default_allocator,
                &.{ lead_surrogate, code_unit },
            );
            return;
        }
        try output.append(bun.default_allocator, strings.unicode_replacement);
        saw_error.* = true;
    }

    if (strings.u16IsLead(code_unit)) {
        this.lead_surrogate = code_unit;
        return;
    }

    if (strings.u16IsTrail(code_unit)) {
        try output.append(bun.default_allocator, strings.unicode_replacement);
        saw_error.* = true;
        return;
    }

    try output.append(bun.default_allocator, code_unit);
    return;
}

pub fn codeUnitFromBytesUTF16(
    first: u16,
    second: u16,
    comptime big_endian: bool,
) u16 {
    return if (comptime big_endian)
        (first << 8) | second
    else
        first | (second << 8);
}

pub fn decodeUTF16(
    this: *TextDecoder,
    bytes: []const u8,
    comptime big_endian: bool,
    comptime flush: bool,
) error{OutOfMemory}!struct { std.ArrayListUnmanaged(u16), bool } {
    var output: std.ArrayListUnmanaged(u16) = .{};
    try output.ensureTotalCapacity(bun.default_allocator, @divFloor(bytes.len, 2));

    var remain = bytes;
    var saw_error = false;

    if (this.lead_byte) |lead_byte| {
        if (remain.len > 0) {
            this.lead_byte = null;

            try this.processCodeUnitUTF16(
                &output,
                &saw_error,
                codeUnitFromBytesUTF16(@intCast(lead_byte), @intCast(remain[0]), big_endian),
            );
            remain = remain[1..];
        }
    }

    var i: usize = 0;

    while (i < remain.len -| 1) {
        try this.processCodeUnitUTF16(
            &output,
            &saw_error,
            codeUnitFromBytesUTF16(@intCast(remain[i]), @intCast(remain[i + 1]), big_endian),
        );
        i += 2;
    }

    if (remain.len != 0 and i == remain.len - 1) {
        this.lead_byte = remain[i];
    } else {
        bun.assertWithLocation(i == remain.len, @src());
    }

    if (comptime flush) {
        if (this.lead_byte != null or this.lead_surrogate != null) {
            this.lead_byte = null;
            this.lead_surrogate = null;
            try output.append(bun.default_allocator, strings.unicode_replacement);
            saw_error = true;
            return .{ output, saw_error };
        }
    }

    return .{ output, saw_error };
}

pub fn decode(this: *TextDecoder, globalThis: *JSC.JSGlobalObject, callframe: *JSC.CallFrame) bun.JSError!JSValue {
    const arguments = callframe.arguments_old(2).slice();

    const input_slice = input_slice: {
        if (arguments.len == 0 or arguments[0].isUndefined()) {
            break :input_slice "";
        }

        if (arguments[0].asArrayBuffer(globalThis)) |array_buffer| {
            break :input_slice array_buffer.slice();
        }

        return globalThis.throwInvalidArguments("TextDecoder.decode expects an ArrayBuffer or TypedArray", .{});
    };

    const stream = stream: {
        if (arguments.len > 1 and arguments[1].isObject()) {
            if (arguments[1].fastGet(globalThis, .stream)) |stream_value| {
                const stream_bool = stream_value.coerce(bool, globalThis);
                if (globalThis.hasException()) {
                    return .zero;
                }
                break :stream stream_bool;
            }
        }

        break :stream false;
    };

    return switch (!stream) {
        inline else => |flush| this.decodeSlice(globalThis, input_slice, flush),
    };
}

pub fn decodeWithoutTypeChecks(this: *TextDecoder, globalThis: *JSC.JSGlobalObject, uint8array: *JSC.JSUint8Array) bun.JSError!JSValue {
    return this.decodeSlice(globalThis, uint8array.slice(), false);
}

fn decodeSlice(this: *TextDecoder, globalThis: *JSC.JSGlobalObject, buffer_slice: []const u8, comptime flush: bool) bun.JSError!JSValue {
    switch (this.encoding) {
        EncodingLabel.latin1 => {
            if (strings.isAllASCII(buffer_slice)) {
                return ZigString.init(buffer_slice).toJS(globalThis);
            }

            // It's unintuitive that we encode Latin1 as UTF16 even though the engine natively supports Latin1 strings...
            // However, this is also what WebKit seems to do.
            //
            // It's not clear why we couldn't jusst use Latin1 here, but tests failures proved it necessary.
            const out_length = strings.elementLengthLatin1IntoUTF16([]const u8, buffer_slice);
            const bytes = try globalThis.allocator().alloc(u16, out_length);

            const out = strings.copyLatin1IntoUTF16([]u16, bytes, []const u8, buffer_slice);
            return ZigString.toExternalU16(bytes.ptr, out.written, globalThis);
        },
        EncodingLabel.@"UTF-8" => {
            const input, const deinit = input: {
                const maybe_without_bom = if (!this.ignore_bom and strings.hasPrefixComptime(buffer_slice, "\xef\xbb\xbf"))
                    buffer_slice[3..]
                else
                    buffer_slice;

                if (this.buffered.len > 0) {
                    defer this.buffered.len = 0;
                    const joined = try bun.default_allocator.alloc(u8, maybe_without_bom.len + this.buffered.len);
                    @memcpy(joined[0..this.buffered.len], this.buffered.slice());
                    @memcpy(joined[this.buffered.len..][0..maybe_without_bom.len], maybe_without_bom);
                    break :input .{ joined, true };
                }

                break :input .{ maybe_without_bom, false };
            };

            const maybe_decode_result = switch (this.fatal) {
                inline else => |fail_if_invalid| strings.toUTF16AllocMaybeBuffered(bun.default_allocator, input, fail_if_invalid, flush) catch |err| {
                    if (deinit) bun.default_allocator.free(input);
                    if (comptime fail_if_invalid) {
                        if (err == error.InvalidByteSequence) {
                            return globalThis.ERR_ENCODING_INVALID_ENCODED_DATA("Invalid byte sequence", .{}).throw();
                        }
                    }

                    bun.assert(err == error.OutOfMemory);
                    return globalThis.throwOutOfMemory();
                },
            };

            if (maybe_decode_result) |decode_result| {
                if (deinit) bun.default_allocator.free(input);
                const decoded, const leftover, const leftover_len = decode_result;
                bun.assert(this.buffered.len == 0);
                if (comptime !flush) {
                    if (leftover_len != 0) {
                        this.buffered.buf = leftover;
                        this.buffered.len = leftover_len;
                    }
                }
                return ZigString.toExternalU16(decoded.ptr, decoded.len, globalThis);
            }

            bun.debugAssert(input.len == 0 or !deinit);

            // Experiment: using mimalloc directly is slightly slower
            return ZigString.init(input).toJS(globalThis);
        },

        inline .@"UTF-16LE", .@"UTF-16BE" => |utf16_encoding| {
            const bom = if (comptime utf16_encoding == .@"UTF-16LE") "\xff\xfe" else "\xfe\xff";
            const input = if (!this.ignore_bom and strings.hasPrefixComptime(buffer_slice, bom))
                buffer_slice[2..]
            else
                buffer_slice;

            var decoded, const saw_error = try this.decodeUTF16(input, utf16_encoding == .@"UTF-16BE", flush);

            if (saw_error and this.fatal) {
                decoded.deinit(bun.default_allocator);
                return globalThis.ERR_ENCODING_INVALID_ENCODED_DATA("The encoded data was not valid {s} data", .{@tagName(utf16_encoding)}).throw();
            }

            var output = bun.String.fromUTF16(decoded.items);
            return output.toJS(globalThis);
        },
        else => {
            return globalThis.throwInvalidArguments("TextDecoder.decode set to unsupported encoding", .{});
        },
    }
}

pub fn constructor(globalThis: *JSC.JSGlobalObject, callframe: *JSC.CallFrame) bun.JSError!*TextDecoder {
    var args_ = callframe.arguments_old(2);
    var arguments: []const JSC.JSValue = args_.ptr[0..args_.len];

    var decoder = TextDecoder{};

    if (arguments.len > 0) {
        // encoding
        if (arguments[0].isString()) {
            var str = try arguments[0].toSlice(globalThis, bun.default_allocator);
            defer if (str.isAllocated()) str.deinit();

            if (EncodingLabel.which(str.slice())) |label| {
                decoder.encoding = label;
            } else {
                return globalThis.throwInvalidArguments("Unsupported encoding label \"{s}\"", .{str.slice()});
            }
        } else if (arguments[0].isUndefined()) {
            // default to utf-8
            decoder.encoding = EncodingLabel.@"UTF-8";
        } else {
            return globalThis.throwInvalidArguments("TextDecoder(encoding) label is invalid", .{});
        }

        if (arguments.len >= 2) {
            const options = arguments[1];

            if (!options.isObject()) {
                return globalThis.throwInvalidArguments("TextDecoder(options) is invalid", .{});
            }

            if (try options.get(globalThis, "fatal")) |fatal| {
                if (fatal.isBoolean()) {
                    decoder.fatal = fatal.asBoolean();
                } else {
                    return globalThis.throwInvalidArguments("TextDecoder(options) fatal is invalid. Expected boolean value", .{});
                }
            }

            if (try options.get(globalThis, "ignoreBOM")) |ignoreBOM| {
                if (ignoreBOM.isBoolean()) {
                    decoder.ignore_bom = ignoreBOM.asBoolean();
                } else {
                    return globalThis.throwInvalidArguments("TextDecoder(options) ignoreBOM is invalid. Expected boolean value", .{});
                }
            }
        }
    }

    return TextDecoder.new(decoder);
}

const TextDecoder = @This();

const std = @import("std");
const bun = @import("root").bun;
const JSC = bun.JSC;
const Output = bun.Output;
const MutableString = bun.MutableString;
const strings = bun.strings;
const string = bun.string;
const FeatureFlags = bun.FeatureFlags;
const ArrayBuffer = JSC.ArrayBuffer;
const JSUint8Array = JSC.JSUint8Array;
const ZigString = JSC.ZigString;
const JSInternalPromise = JSC.JSInternalPromise;
const JSPromise = JSC.JSPromise;
const JSValue = JSC.JSValue;
const JSGlobalObject = JSC.JSGlobalObject;
const EncodingLabel = JSC.WebCore.EncodingLabel;