mirror of
https://github.com/oven-sh/bun
synced 2026-02-17 06:12:08 +00:00
256 lines
9.7 KiB
Zig
256 lines
9.7 KiB
Zig
pub export fn TextEncoder__encode8(
|
|
globalThis: *JSGlobalObject,
|
|
ptr: [*]const u8,
|
|
len: usize,
|
|
) JSValue {
|
|
// as much as possible, rely on JSC to own the memory
|
|
// their code is more battle-tested than bun's code
|
|
// so we do a stack allocation here
|
|
// and then copy into JSC memory
|
|
// unless it's huge
|
|
// JSC will GC Uint8Array that occupy less than 512 bytes
|
|
// so it's extra good for that case
|
|
// this also means there won't be reallocations for small strings
|
|
var buf: [2048]u8 = undefined;
|
|
const slice = ptr[0..len];
|
|
|
|
if (slice.len <= buf.len / 2) {
|
|
const result = strings.copyLatin1IntoUTF8(&buf, []const u8, slice);
|
|
const uint8array = JSC.JSValue.createUninitializedUint8Array(globalThis, result.written);
|
|
bun.assert(result.written <= buf.len);
|
|
bun.assert(result.read == slice.len);
|
|
const array_buffer = uint8array.asArrayBuffer(globalThis) orelse return .zero;
|
|
bun.assert(result.written == array_buffer.len);
|
|
@memcpy(array_buffer.byteSlice()[0..result.written], buf[0..result.written]);
|
|
return uint8array;
|
|
} else {
|
|
const bytes = strings.allocateLatin1IntoUTF8(globalThis.bunVM().allocator, []const u8, slice) catch {
|
|
return globalThis.throwOutOfMemoryValue();
|
|
};
|
|
bun.assert(bytes.len >= slice.len);
|
|
return ArrayBuffer.fromBytes(bytes, .Uint8Array).toJSUnchecked(globalThis, null);
|
|
}
|
|
}
|
|
pub export fn TextEncoder__encode16(
|
|
globalThis: *JSGlobalObject,
|
|
ptr: [*]const u16,
|
|
len: usize,
|
|
) JSValue {
|
|
// as much as possible, rely on JSC to own the memory
|
|
// their code is more battle-tested than bun's code
|
|
// so we do a stack allocation here
|
|
// and then copy into JSC memory
|
|
// unless it's huge
|
|
// JSC will GC Uint8Array that occupy less than 512 bytes
|
|
// so it's extra good for that case
|
|
// this also means there won't be reallocations for small strings
|
|
var buf: [2048]u8 = undefined;
|
|
|
|
const slice = ptr[0..len];
|
|
|
|
// max utf16 -> utf8 length
|
|
if (slice.len <= buf.len / 4) {
|
|
const result = strings.copyUTF16IntoUTF8(&buf, @TypeOf(slice), slice, true);
|
|
if (result.read == 0 or result.written == 0) {
|
|
const uint8array = JSC.JSValue.createUninitializedUint8Array(globalThis, 3);
|
|
const array_buffer = uint8array.asArrayBuffer(globalThis).?;
|
|
const replacement_char = [_]u8{ 239, 191, 189 };
|
|
@memcpy(array_buffer.slice()[0..replacement_char.len], &replacement_char);
|
|
return uint8array;
|
|
}
|
|
const uint8array = JSC.JSValue.createUninitializedUint8Array(globalThis, result.written);
|
|
bun.assert(result.written <= buf.len);
|
|
bun.assert(result.read == slice.len);
|
|
const array_buffer = uint8array.asArrayBuffer(globalThis).?;
|
|
bun.assert(result.written == array_buffer.len);
|
|
@memcpy(array_buffer.slice()[0..result.written], buf[0..result.written]);
|
|
return uint8array;
|
|
} else {
|
|
const bytes = strings.toUTF8AllocWithType(
|
|
bun.default_allocator,
|
|
@TypeOf(slice),
|
|
slice,
|
|
) catch {
|
|
return JSC.toInvalidArguments("Out of memory", .{}, globalThis);
|
|
};
|
|
return ArrayBuffer.fromBytes(bytes, .Uint8Array).toJSUnchecked(globalThis, null);
|
|
}
|
|
}
|
|
|
|
pub export fn c(
|
|
globalThis: *JSGlobalObject,
|
|
ptr: [*]const u16,
|
|
len: usize,
|
|
) JSValue {
|
|
// as much as possible, rely on JSC to own the memory
|
|
// their code is more battle-tested than bun's code
|
|
// so we do a stack allocation here
|
|
// and then copy into JSC memory
|
|
// unless it's huge
|
|
// JSC will GC Uint8Array that occupy less than 512 bytes
|
|
// so it's extra good for that case
|
|
// this also means there won't be reallocations for small strings
|
|
var buf: [2048]u8 = undefined;
|
|
|
|
const slice = ptr[0..len];
|
|
|
|
// max utf16 -> utf8 length
|
|
if (slice.len <= buf.len / 4) {
|
|
const result = strings.copyUTF16IntoUTF8(&buf, @TypeOf(slice), slice, true);
|
|
if (result.read == 0 or result.written == 0) {
|
|
const uint8array = JSC.JSValue.createUninitializedUint8Array(globalThis, 3);
|
|
const array_buffer = uint8array.asArrayBuffer(globalThis).?;
|
|
const replacement_char = [_]u8{ 239, 191, 189 };
|
|
@memcpy(array_buffer.slice()[0..replacement_char.len], &replacement_char);
|
|
return uint8array;
|
|
}
|
|
const uint8array = JSC.JSValue.createUninitializedUint8Array(globalThis, result.written);
|
|
bun.assert(result.written <= buf.len);
|
|
bun.assert(result.read == slice.len);
|
|
const array_buffer = uint8array.asArrayBuffer(globalThis).?;
|
|
bun.assert(result.written == array_buffer.len);
|
|
@memcpy(array_buffer.slice()[0..result.written], buf[0..result.written]);
|
|
return uint8array;
|
|
} else {
|
|
const bytes = strings.toUTF8AllocWithType(
|
|
bun.default_allocator,
|
|
@TypeOf(slice),
|
|
slice,
|
|
) catch {
|
|
return globalThis.throwOutOfMemoryValue();
|
|
};
|
|
return ArrayBuffer.fromBytes(bytes, .Uint8Array).toJSUnchecked(globalThis, null);
|
|
}
|
|
}
|
|
|
|
// This is a fast path for copying a Rope string into a Uint8Array.
|
|
// This keeps us from an extra string temporary allocation
|
|
const RopeStringEncoder = struct {
|
|
globalThis: *JSGlobalObject,
|
|
buf: []u8,
|
|
tail: usize = 0,
|
|
any_non_ascii: bool = false,
|
|
|
|
pub fn append8(it: *JSC.JSString.Iterator, ptr: [*]const u8, len: u32) callconv(.C) void {
|
|
var this = bun.cast(*RopeStringEncoder, it.data.?);
|
|
const result = strings.copyLatin1IntoUTF8StopOnNonASCII(this.buf[this.tail..], []const u8, ptr[0..len], true);
|
|
if (result.read == std.math.maxInt(u32) and result.written == std.math.maxInt(u32)) {
|
|
it.stop = 1;
|
|
this.any_non_ascii = true;
|
|
} else {
|
|
this.tail += result.written;
|
|
}
|
|
}
|
|
pub fn append16(it: *JSC.JSString.Iterator, _: [*]const u16, _: u32) callconv(.C) void {
|
|
var this = bun.cast(*RopeStringEncoder, it.data.?);
|
|
this.any_non_ascii = true;
|
|
it.stop = 1;
|
|
}
|
|
pub fn write8(it: *JSC.JSString.Iterator, ptr: [*]const u8, len: u32, offset: u32) callconv(.C) void {
|
|
var this = bun.cast(*RopeStringEncoder, it.data.?);
|
|
const result = strings.copyLatin1IntoUTF8StopOnNonASCII(this.buf[offset..], []const u8, ptr[0..len], true);
|
|
if (result.read == std.math.maxInt(u32) and result.written == std.math.maxInt(u32)) {
|
|
it.stop = 1;
|
|
this.any_non_ascii = true;
|
|
}
|
|
}
|
|
pub fn write16(it: *JSC.JSString.Iterator, _: [*]const u16, _: u32, _: u32) callconv(.C) void {
|
|
var this = bun.cast(*RopeStringEncoder, it.data.?);
|
|
this.any_non_ascii = true;
|
|
it.stop = 1;
|
|
}
|
|
|
|
pub fn iter(this: *RopeStringEncoder) JSC.JSString.Iterator {
|
|
return .{
|
|
.data = this,
|
|
.stop = 0,
|
|
.append8 = append8,
|
|
.append16 = append16,
|
|
.write8 = write8,
|
|
.write16 = write16,
|
|
};
|
|
}
|
|
};
|
|
|
|
// This fast path is only suitable for ASCII strings
|
|
// It's not suitable for UTF-16 strings, because getting the byteLength is unpredictable
|
|
// It also isn't usable for latin1 strings which contain non-ascii characters
|
|
pub export fn TextEncoder__encodeRopeString(
|
|
globalThis: *JSGlobalObject,
|
|
rope_str: *JSC.JSString,
|
|
) JSValue {
|
|
if (comptime Environment.allow_assert) bun.assert(rope_str.is8Bit());
|
|
var stack_buf: [2048]u8 = undefined;
|
|
var buf_to_use: []u8 = &stack_buf;
|
|
const length = rope_str.length();
|
|
var array: JSValue = .zero;
|
|
if (length > stack_buf.len / 2) {
|
|
array = JSC.JSValue.createUninitializedUint8Array(globalThis, length);
|
|
array.ensureStillAlive();
|
|
buf_to_use = array.asArrayBuffer(globalThis).?.slice();
|
|
}
|
|
var encoder = RopeStringEncoder{
|
|
.globalThis = globalThis,
|
|
.buf = buf_to_use,
|
|
};
|
|
var iter = encoder.iter();
|
|
array.ensureStillAlive();
|
|
rope_str.iterator(globalThis, &iter);
|
|
array.ensureStillAlive();
|
|
|
|
if (encoder.any_non_ascii) {
|
|
return .undefined;
|
|
}
|
|
|
|
if (array == .zero) {
|
|
array = JSC.JSValue.createUninitializedUint8Array(globalThis, length);
|
|
array.ensureStillAlive();
|
|
@memcpy(array.asArrayBuffer(globalThis).?.ptr[0..length], buf_to_use[0..length]);
|
|
}
|
|
|
|
return array;
|
|
}
|
|
|
|
pub export fn TextEncoder__encodeInto16(
|
|
input_ptr: [*]const u16,
|
|
input_len: usize,
|
|
buf_ptr: [*]u8,
|
|
buf_len: usize,
|
|
) u64 {
|
|
const output = buf_ptr[0..buf_len];
|
|
const input = input_ptr[0..input_len];
|
|
var result: strings.EncodeIntoResult = strings.copyUTF16IntoUTF8(output, []const u16, input, false);
|
|
if (output.len >= 3 and (result.read == 0 or result.written == 0)) {
|
|
const replacement_char = [_]u8{ 239, 191, 189 };
|
|
@memcpy(buf_ptr[0..replacement_char.len], &replacement_char);
|
|
result.read = 1;
|
|
result.written = 3;
|
|
}
|
|
const sized: [2]u32 = .{ result.read, result.written };
|
|
return @bitCast(sized);
|
|
}
|
|
|
|
pub export fn TextEncoder__encodeInto8(
|
|
input_ptr: [*]const u8,
|
|
input_len: usize,
|
|
buf_ptr: [*]u8,
|
|
buf_len: usize,
|
|
) u64 {
|
|
const output = buf_ptr[0..buf_len];
|
|
const input = input_ptr[0..input_len];
|
|
const result: strings.EncodeIntoResult =
|
|
strings.copyLatin1IntoUTF8(output, []const u8, input);
|
|
const sized: [2]u32 = .{ result.read, result.written };
|
|
return @bitCast(sized);
|
|
}
|
|
|
|
const std = @import("std");
|
|
const bun = @import("root").bun;
|
|
const strings = bun.strings;
|
|
const JSC = bun.JSC;
|
|
const Environment = bun.Environment;
|
|
const JSGlobalObject = JSC.JSGlobalObject;
|
|
const JSValue = JSC.JSValue;
|
|
const ArrayBuffer = JSC.ArrayBuffer;
|
|
const TextEncoder = @This();
|