Files
bun.sh/src/bun.js/webcore/TextEncoder.zig

256 lines
9.7 KiB
Zig

pub export fn TextEncoder__encode8(
globalThis: *JSGlobalObject,
ptr: [*]const u8,
len: usize,
) JSValue {
// as much as possible, rely on JSC to own the memory
// their code is more battle-tested than bun's code
// so we do a stack allocation here
// and then copy into JSC memory
// unless it's huge
// JSC will GC Uint8Array that occupy less than 512 bytes
// so it's extra good for that case
// this also means there won't be reallocations for small strings
var buf: [2048]u8 = undefined;
const slice = ptr[0..len];
if (slice.len <= buf.len / 2) {
const result = strings.copyLatin1IntoUTF8(&buf, []const u8, slice);
const uint8array = JSC.JSValue.createUninitializedUint8Array(globalThis, result.written);
bun.assert(result.written <= buf.len);
bun.assert(result.read == slice.len);
const array_buffer = uint8array.asArrayBuffer(globalThis) orelse return .zero;
bun.assert(result.written == array_buffer.len);
@memcpy(array_buffer.byteSlice()[0..result.written], buf[0..result.written]);
return uint8array;
} else {
const bytes = strings.allocateLatin1IntoUTF8(globalThis.bunVM().allocator, []const u8, slice) catch {
return globalThis.throwOutOfMemoryValue();
};
bun.assert(bytes.len >= slice.len);
return ArrayBuffer.fromBytes(bytes, .Uint8Array).toJSUnchecked(globalThis, null);
}
}
pub export fn TextEncoder__encode16(
globalThis: *JSGlobalObject,
ptr: [*]const u16,
len: usize,
) JSValue {
// as much as possible, rely on JSC to own the memory
// their code is more battle-tested than bun's code
// so we do a stack allocation here
// and then copy into JSC memory
// unless it's huge
// JSC will GC Uint8Array that occupy less than 512 bytes
// so it's extra good for that case
// this also means there won't be reallocations for small strings
var buf: [2048]u8 = undefined;
const slice = ptr[0..len];
// max utf16 -> utf8 length
if (slice.len <= buf.len / 4) {
const result = strings.copyUTF16IntoUTF8(&buf, @TypeOf(slice), slice, true);
if (result.read == 0 or result.written == 0) {
const uint8array = JSC.JSValue.createUninitializedUint8Array(globalThis, 3);
const array_buffer = uint8array.asArrayBuffer(globalThis).?;
const replacement_char = [_]u8{ 239, 191, 189 };
@memcpy(array_buffer.slice()[0..replacement_char.len], &replacement_char);
return uint8array;
}
const uint8array = JSC.JSValue.createUninitializedUint8Array(globalThis, result.written);
bun.assert(result.written <= buf.len);
bun.assert(result.read == slice.len);
const array_buffer = uint8array.asArrayBuffer(globalThis).?;
bun.assert(result.written == array_buffer.len);
@memcpy(array_buffer.slice()[0..result.written], buf[0..result.written]);
return uint8array;
} else {
const bytes = strings.toUTF8AllocWithType(
bun.default_allocator,
@TypeOf(slice),
slice,
) catch {
return JSC.toInvalidArguments("Out of memory", .{}, globalThis);
};
return ArrayBuffer.fromBytes(bytes, .Uint8Array).toJSUnchecked(globalThis, null);
}
}
pub export fn c(
globalThis: *JSGlobalObject,
ptr: [*]const u16,
len: usize,
) JSValue {
// as much as possible, rely on JSC to own the memory
// their code is more battle-tested than bun's code
// so we do a stack allocation here
// and then copy into JSC memory
// unless it's huge
// JSC will GC Uint8Array that occupy less than 512 bytes
// so it's extra good for that case
// this also means there won't be reallocations for small strings
var buf: [2048]u8 = undefined;
const slice = ptr[0..len];
// max utf16 -> utf8 length
if (slice.len <= buf.len / 4) {
const result = strings.copyUTF16IntoUTF8(&buf, @TypeOf(slice), slice, true);
if (result.read == 0 or result.written == 0) {
const uint8array = JSC.JSValue.createUninitializedUint8Array(globalThis, 3);
const array_buffer = uint8array.asArrayBuffer(globalThis).?;
const replacement_char = [_]u8{ 239, 191, 189 };
@memcpy(array_buffer.slice()[0..replacement_char.len], &replacement_char);
return uint8array;
}
const uint8array = JSC.JSValue.createUninitializedUint8Array(globalThis, result.written);
bun.assert(result.written <= buf.len);
bun.assert(result.read == slice.len);
const array_buffer = uint8array.asArrayBuffer(globalThis).?;
bun.assert(result.written == array_buffer.len);
@memcpy(array_buffer.slice()[0..result.written], buf[0..result.written]);
return uint8array;
} else {
const bytes = strings.toUTF8AllocWithType(
bun.default_allocator,
@TypeOf(slice),
slice,
) catch {
return globalThis.throwOutOfMemoryValue();
};
return ArrayBuffer.fromBytes(bytes, .Uint8Array).toJSUnchecked(globalThis, null);
}
}
// This is a fast path for copying a Rope string into a Uint8Array.
// This keeps us from an extra string temporary allocation
const RopeStringEncoder = struct {
globalThis: *JSGlobalObject,
buf: []u8,
tail: usize = 0,
any_non_ascii: bool = false,
pub fn append8(it: *JSC.JSString.Iterator, ptr: [*]const u8, len: u32) callconv(.C) void {
var this = bun.cast(*RopeStringEncoder, it.data.?);
const result = strings.copyLatin1IntoUTF8StopOnNonASCII(this.buf[this.tail..], []const u8, ptr[0..len], true);
if (result.read == std.math.maxInt(u32) and result.written == std.math.maxInt(u32)) {
it.stop = 1;
this.any_non_ascii = true;
} else {
this.tail += result.written;
}
}
pub fn append16(it: *JSC.JSString.Iterator, _: [*]const u16, _: u32) callconv(.C) void {
var this = bun.cast(*RopeStringEncoder, it.data.?);
this.any_non_ascii = true;
it.stop = 1;
}
pub fn write8(it: *JSC.JSString.Iterator, ptr: [*]const u8, len: u32, offset: u32) callconv(.C) void {
var this = bun.cast(*RopeStringEncoder, it.data.?);
const result = strings.copyLatin1IntoUTF8StopOnNonASCII(this.buf[offset..], []const u8, ptr[0..len], true);
if (result.read == std.math.maxInt(u32) and result.written == std.math.maxInt(u32)) {
it.stop = 1;
this.any_non_ascii = true;
}
}
pub fn write16(it: *JSC.JSString.Iterator, _: [*]const u16, _: u32, _: u32) callconv(.C) void {
var this = bun.cast(*RopeStringEncoder, it.data.?);
this.any_non_ascii = true;
it.stop = 1;
}
pub fn iter(this: *RopeStringEncoder) JSC.JSString.Iterator {
return .{
.data = this,
.stop = 0,
.append8 = append8,
.append16 = append16,
.write8 = write8,
.write16 = write16,
};
}
};
// This fast path is only suitable for ASCII strings
// It's not suitable for UTF-16 strings, because getting the byteLength is unpredictable
// It also isn't usable for latin1 strings which contain non-ascii characters
pub export fn TextEncoder__encodeRopeString(
globalThis: *JSGlobalObject,
rope_str: *JSC.JSString,
) JSValue {
if (comptime Environment.allow_assert) bun.assert(rope_str.is8Bit());
var stack_buf: [2048]u8 = undefined;
var buf_to_use: []u8 = &stack_buf;
const length = rope_str.length();
var array: JSValue = .zero;
if (length > stack_buf.len / 2) {
array = JSC.JSValue.createUninitializedUint8Array(globalThis, length);
array.ensureStillAlive();
buf_to_use = array.asArrayBuffer(globalThis).?.slice();
}
var encoder = RopeStringEncoder{
.globalThis = globalThis,
.buf = buf_to_use,
};
var iter = encoder.iter();
array.ensureStillAlive();
rope_str.iterator(globalThis, &iter);
array.ensureStillAlive();
if (encoder.any_non_ascii) {
return .undefined;
}
if (array == .zero) {
array = JSC.JSValue.createUninitializedUint8Array(globalThis, length);
array.ensureStillAlive();
@memcpy(array.asArrayBuffer(globalThis).?.ptr[0..length], buf_to_use[0..length]);
}
return array;
}
pub export fn TextEncoder__encodeInto16(
input_ptr: [*]const u16,
input_len: usize,
buf_ptr: [*]u8,
buf_len: usize,
) u64 {
const output = buf_ptr[0..buf_len];
const input = input_ptr[0..input_len];
var result: strings.EncodeIntoResult = strings.copyUTF16IntoUTF8(output, []const u16, input, false);
if (output.len >= 3 and (result.read == 0 or result.written == 0)) {
const replacement_char = [_]u8{ 239, 191, 189 };
@memcpy(buf_ptr[0..replacement_char.len], &replacement_char);
result.read = 1;
result.written = 3;
}
const sized: [2]u32 = .{ result.read, result.written };
return @bitCast(sized);
}
pub export fn TextEncoder__encodeInto8(
input_ptr: [*]const u8,
input_len: usize,
buf_ptr: [*]u8,
buf_len: usize,
) u64 {
const output = buf_ptr[0..buf_len];
const input = input_ptr[0..input_len];
const result: strings.EncodeIntoResult =
strings.copyLatin1IntoUTF8(output, []const u8, input);
const sized: [2]u32 = .{ result.read, result.written };
return @bitCast(sized);
}
const std = @import("std");
const bun = @import("root").bun;
const strings = bun.strings;
const JSC = bun.JSC;
const Environment = bun.Environment;
const JSGlobalObject = JSC.JSGlobalObject;
const JSValue = JSC.JSValue;
const ArrayBuffer = JSC.ArrayBuffer;
const TextEncoder = @This();