mirror of
https://github.com/oven-sh/bun
synced 2026-02-09 10:28:47 +00:00
Co-authored-by: Jarred Sumner <709451+Jarred-Sumner@users.noreply.github.com>
This commit is contained in:
@@ -3815,15 +3815,17 @@ pub const Blob = struct {
|
||||
return this.store != null and this.store.?.data == .file;
|
||||
}
|
||||
|
||||
pub fn toStringWithBytes(this: *Blob, global: *JSGlobalObject, buf: []const u8, comptime lifetime: Lifetime) JSValue {
|
||||
pub fn toStringWithBytes(this: *Blob, global: *JSGlobalObject, buf_: []const u8, comptime lifetime: Lifetime) JSValue {
|
||||
// null == unknown
|
||||
// false == can't be
|
||||
const could_be_all_ascii = this.is_all_ascii orelse this.store.?.is_all_ascii;
|
||||
|
||||
const buf = strings.withoutUTF8BOM(buf_);
|
||||
|
||||
if (could_be_all_ascii == null or !could_be_all_ascii.?) {
|
||||
// if toUTF16Alloc returns null, it means there are no non-ASCII characters
|
||||
// instead of erroring, invalid characters will become a U+FFFD replacement character
|
||||
if (strings.toUTF16AllocAllowBOM(bun.default_allocator, buf, false, true) catch unreachable) |external| {
|
||||
if (strings.toUTF16Alloc(bun.default_allocator, buf, false) catch unreachable) |external| {
|
||||
if (lifetime != .temporary)
|
||||
this.setIsASCIIFlag(false);
|
||||
|
||||
@@ -3850,21 +3852,36 @@ pub const Blob = struct {
|
||||
// we don't need to clone
|
||||
.clone => {
|
||||
this.store.?.ref();
|
||||
// we don't need to worry about UTF-8 BOM in this case because the store owns the memory.
|
||||
return ZigString.init(buf).external(global, this.store.?, Store.external);
|
||||
},
|
||||
.transfer => {
|
||||
var store = this.store.?;
|
||||
std.debug.assert(store.data == .bytes);
|
||||
this.transfer();
|
||||
// we don't need to worry about UTF-8 BOM in this case because the store owns the memory.
|
||||
return ZigString.init(buf).external(global, store, Store.external);
|
||||
},
|
||||
// strings are immutable
|
||||
// sharing isn't really a thing
|
||||
.share => {
|
||||
this.store.?.ref();
|
||||
// we don't need to worry about UTF-8 BOM in this case because the store owns the memory.s
|
||||
return ZigString.init(buf).external(global, this.store.?, Store.external);
|
||||
},
|
||||
.temporary => {
|
||||
// if there was a UTF-8 BOM, we need to clone the buffer because
|
||||
// external doesn't support this case here yet.
|
||||
if (buf.len != buf_.len) {
|
||||
var out = bun.String.createLatin1(buf);
|
||||
defer {
|
||||
bun.default_allocator.free(buf_);
|
||||
out.deref();
|
||||
}
|
||||
|
||||
return out.toJS(global);
|
||||
}
|
||||
|
||||
return ZigString.init(buf).toExternalValue(global);
|
||||
},
|
||||
}
|
||||
@@ -3894,7 +3911,8 @@ pub const Blob = struct {
|
||||
return toJSONWithBytes(this, global, view_, lifetime);
|
||||
}
|
||||
|
||||
pub fn toJSONWithBytes(this: *Blob, global: *JSGlobalObject, buf: []const u8, comptime lifetime: Lifetime) JSValue {
|
||||
pub fn toJSONWithBytes(this: *Blob, global: *JSGlobalObject, buf_: []const u8, comptime lifetime: Lifetime) JSValue {
|
||||
const buf = strings.withoutUTF8BOM(buf_);
|
||||
if (buf.len == 0) return global.createSyntaxErrorInstance("Unexpected end of JSON input", .{});
|
||||
// null == unknown
|
||||
// false == can't be
|
||||
@@ -3905,7 +3923,7 @@ pub const Blob = struct {
|
||||
var stack_fallback = std.heap.stackFallback(4096, bun.default_allocator);
|
||||
const allocator = stack_fallback.get();
|
||||
// if toUTF16Alloc returns null, it means there are no non-ASCII characters
|
||||
if (strings.toUTF16AllocAllowBOM(allocator, buf, false, true) catch null) |external| {
|
||||
if (strings.toUTF16Alloc(allocator, buf, false) catch null) |external| {
|
||||
if (comptime lifetime != .temporary) this.setIsASCIIFlag(false);
|
||||
const result = ZigString.init16(external).toJSONObject(global);
|
||||
allocator.free(external);
|
||||
@@ -4534,11 +4552,19 @@ pub const InternalBlob = struct {
|
||||
was_string: bool = false,
|
||||
|
||||
pub fn toStringOwned(this: *@This(), globalThis: *JSC.JSGlobalObject) JSValue {
|
||||
if (strings.toUTF16AllocAllowBOM(globalThis.allocator(), this.bytes.items, false, true) catch &[_]u16{}) |out| {
|
||||
const bytes_without_bom = strings.withoutUTF8BOM(this.bytes.items);
|
||||
if (strings.toUTF16Alloc(globalThis.allocator(), bytes_without_bom, false) catch &[_]u16{}) |out| {
|
||||
const return_value = ZigString.toExternalU16(out.ptr, out.len, globalThis);
|
||||
return_value.ensureStillAlive();
|
||||
this.deinit();
|
||||
return return_value;
|
||||
} else if
|
||||
// If there was a UTF8 BOM, we clone it
|
||||
(bytes_without_bom.len != this.bytes.items.len) {
|
||||
defer this.deinit();
|
||||
var out = bun.String.createLatin1(this.bytes.items[3..]);
|
||||
defer out.deref();
|
||||
return out.toJS(globalThis);
|
||||
} else {
|
||||
var str = ZigString.init(this.toOwnedSlice());
|
||||
str.mark();
|
||||
|
||||
@@ -1313,13 +1313,9 @@ pub fn copyLatin1IntoASCII(dest: []u8, src: []const u8) void {
|
||||
|
||||
const utf8_bom = [_]u8{ 0xef, 0xbb, 0xbf };
|
||||
|
||||
pub fn toUTF16Alloc(allocator: std.mem.Allocator, bytes: []const u8, comptime fail_if_invalid: bool) !?[]u16 {
|
||||
return toUTF16AllocAllowBOM(allocator, bytes, fail_if_invalid, false);
|
||||
}
|
||||
|
||||
pub fn withoutUTF8BOM(bytes: []const u8) []const u8 {
|
||||
if (bytes.len > 3 and strings.eqlComptime(bytes[0..3], utf8_bom)) {
|
||||
return bytes[3..];
|
||||
if (strings.hasPrefixComptime(bytes, utf8_bom)) {
|
||||
return bytes[utf8_bom.len..];
|
||||
} else {
|
||||
return bytes;
|
||||
}
|
||||
@@ -1328,20 +1324,8 @@ pub fn withoutUTF8BOM(bytes: []const u8) []const u8 {
|
||||
/// Convert a UTF-8 string to a UTF-16 string IF there are any non-ascii characters
|
||||
/// If there are no non-ascii characters, this returns null
|
||||
/// This is intended to be used for strings that go to JavaScript
|
||||
pub fn toUTF16AllocAllowBOM(allocator: std.mem.Allocator, bytes_: []const u8, comptime fail_if_invalid: bool, comptime allow_bom: bool) !?[]u16 {
|
||||
var bytes = bytes_;
|
||||
pub fn toUTF16Alloc(allocator: std.mem.Allocator, bytes: []const u8, comptime fail_if_invalid: bool) !?[]u16 {
|
||||
if (strings.firstNonASCII(bytes)) |i| {
|
||||
if (comptime allow_bom) {
|
||||
// we could avoid the allocation here when it's otherwise ASCII. But
|
||||
// it gets really complicated because most memory allocators need
|
||||
// the head pointer to be the allocated one so if we instead return
|
||||
// a non-head pointer and try to free that the allocator might not
|
||||
// be able to free it, and we would have a big problem.
|
||||
if (i == 0 and bytes.len > 3 and strings.eqlComptime(bytes[0..3], utf8_bom)) {
|
||||
bytes = bytes[3..];
|
||||
}
|
||||
}
|
||||
|
||||
const output_: ?std.ArrayList(u16) = if (comptime bun.FeatureFlags.use_simdutf) simd: {
|
||||
const trimmed = bun.simdutf.trim.utf8(bytes);
|
||||
|
||||
|
||||
@@ -1,7 +1,39 @@
|
||||
import { describe, expect, it, test } from "bun:test";
|
||||
|
||||
describe("UTF-8 BOM should be ignored", () => {
|
||||
test("handles empty strings", async () => {
|
||||
const blob = new Response(new Blob([Buffer.from([0xef, 0xbb, 0xbf])]));
|
||||
|
||||
expect(await blob.text()).toHaveLength(0);
|
||||
expect(async () => await blob.json()).toThrow();
|
||||
});
|
||||
|
||||
test("handles UTF8 BOM + emoji", async () => {
|
||||
const blob = new Response(new Blob([Buffer.from([0xef, 0xbb, 0xbf]), Buffer.from("🌎")]));
|
||||
|
||||
expect(await blob.text()).toHaveLength(2);
|
||||
expect(async () => await blob.json()).toThrow();
|
||||
});
|
||||
|
||||
describe("Blob", () => {
|
||||
describe("with emoji", () => {
|
||||
it("in text()", async () => {
|
||||
const blob = new Blob(["\uFEFFHello, World! 🌎"], { type: "text/plain" });
|
||||
expect(await blob.text()).toBe("Hello, World! 🌎");
|
||||
});
|
||||
|
||||
it("in json()", async () => {
|
||||
const blob = new Blob(['\uFEFF{"hello":"World 🌎"}'], { type: "application/json" });
|
||||
expect(await blob.json()).toStrictEqual({ "hello": "World 🌎" } as any);
|
||||
});
|
||||
|
||||
it("in formData()", async () => {
|
||||
const blob = new Blob(["\uFEFFhello=world 🌎"], { type: "application/x-www-form-urlencoded" });
|
||||
const formData = await blob.formData();
|
||||
expect(formData.get("hello")).toBe("world 🌎");
|
||||
});
|
||||
});
|
||||
|
||||
it("in text()", async () => {
|
||||
const blob = new Blob(["\uFEFFHello, World!"], { type: "text/plain" });
|
||||
expect(await blob.text()).toBe("Hello, World!");
|
||||
|
||||
Reference in New Issue
Block a user