Compare commits

...

10 Commits

Author SHA1 Message Date
Jarred Sumner
532749a275 Merge branch 'main' into dave/bom2 2024-01-22 17:22:03 -08:00
Jarred Sumner
d8e5f4d865 Merge branch 'main' into dave/bom2 2024-01-21 04:48:04 -08:00
autofix-ci[bot]
000d596b4e [autofix.ci] apply automated fixes 2024-01-17 23:34:27 +00:00
dave caruso
05684ef68e Merge remote-tracking branch 'origin/main' into dave/bom2 2024-01-17 15:33:01 -08:00
dave caruso
84924b7b7f fix stuff 2024-01-17 15:26:57 -08:00
dave caruso
c1ec43f96f fix submodule 2024-01-16 18:06:32 -08:00
dave caruso
1b89e71348 Merge remote-tracking branch 'origin/main' into dave/bom2 2024-01-16 18:06:09 -08:00
dave caruso
1739612bff ok 2024-01-16 18:00:38 -08:00
dave caruso
29270661ed asadfafd 2024-01-16 17:42:37 -08:00
dave caruso
a6371192db support utf16-le files 2024-01-16 16:38:50 -08:00
6 changed files with 75 additions and 15 deletions

View File

@@ -1535,7 +1535,7 @@ pub const Blob = struct {
this.ref_count += 1;
}
pub fn external(ptr: ?*anyopaque, _: ?*anyopaque, _: usize) callconv(.C) void {
pub fn externalOnDeref(ptr: ?*anyopaque, _: ?*anyopaque, _: usize) callconv(.C) void {
if (ptr == null) return;
var this = bun.cast(*Store, ptr);
this.deref();
@@ -4515,12 +4515,6 @@ pub const Blob = struct {
return ZigString.Empty.toValue(global);
}
if (bom == .utf16_le) {
var out = bun.String.createUTF16(bun.reinterpretSlice(u16, buf));
defer out.deref();
return out.toJS(global);
}
// null == unknown
// false == can't be
const could_be_all_ascii = this.is_all_ascii orelse this.store.?.is_all_ascii;
@@ -4551,27 +4545,46 @@ pub const Blob = struct {
// we don't need to clone
.clone => {
this.store.?.ref();
if (bom == .utf16_le) {
return ZigString.init16(bun.reinterpretSlice(u16, buf))
.external(global, this.store.?, Store.externalOnDeref);
}
// we don't need to worry about UTF-8 BOM in this case because the store owns the memory.
return ZigString.init(buf).external(global, this.store.?, Store.external);
return ZigString.init(buf).external(global, this.store.?, Store.externalOnDeref);
},
.transfer => {
const store = this.store.?;
std.debug.assert(store.data == .bytes);
this.transfer();
if (bom == .utf16_le) {
return ZigString.init16(bun.reinterpretSlice(u16, buf))
.external(global, this.store.?, Store.externalOnDeref);
}
// we don't need to worry about UTF-8 BOM in this case because the store owns the memory.
return ZigString.init(buf).external(global, store, Store.external);
return ZigString.init(buf).external(global, store, Store.externalOnDeref);
},
// strings are immutable
// sharing isn't really a thing
.share => {
this.store.?.ref();
// we don't need to worry about UTF-8 BOM in this case because the store owns the memory.s
return ZigString.init(buf).external(global, this.store.?, Store.external);
if (bom == .utf16_le) {
return ZigString.init16(bun.reinterpretSlice(u16, buf))
.external(global, this.store.?, Store.externalOnDeref);
}
// we don't need to worry about UTF-8 BOM in this case because the store owns the memory.
return ZigString.init(buf).external(global, this.store.?, Store.externalOnDeref);
},
.temporary => {
// if there was a UTF-8 BOM, we need to clone the buffer because
// if there was a BOM, we need to clone the buffer because
// external doesn't support this case here yet.
if (buf.len != raw_bytes.len) {
if (bom == .utf8) {
var out = bun.String.createLatin1(buf);
defer {
bun.default_allocator.free(raw_bytes);
@@ -4581,6 +4594,12 @@ pub const Blob = struct {
return out.toJS(global);
}
if (bom == .utf16_le) {
var out = bun.String.createUTF16(bun.reinterpretSlice(u16, buf));
defer out.deref();
return out.toJS(global);
}
return ZigString.init(buf).toExternalValue(global);
},
}
@@ -4615,15 +4634,15 @@ pub const Blob = struct {
if (buf.len == 0) return global.createSyntaxErrorInstance("Unexpected end of JSON input", .{});
if (bom == .utf16_le) {
if (comptime lifetime != .temporary) this.setIsASCIIFlag(true);
var out = bun.String.createUTF16(bun.reinterpretSlice(u16, buf));
defer out.deref();
return out.toJSByParseJSON(global);
}
// null == unknown
// false == can't be
const could_be_all_ascii = this.is_all_ascii orelse this.store.?.is_all_ascii;
defer if (comptime lifetime == .temporary) bun.default_allocator.free(@constCast(buf));
if (could_be_all_ascii == null or !could_be_all_ascii.?) {
var stack_fallback = std.heap.stackFallback(4096, bun.default_allocator);
const allocator = stack_fallback.get();

Binary file not shown.

View File

@@ -0,0 +1,10 @@
test("this file is utf8 with bom", async () => {
const arrayBuffer = await Bun.file(import.meta.path).arrayBuffer();
expect(arrayBuffer.byteLength).toBeGreaterThan(3);
const uint8Array = new Uint8Array(arrayBuffer);
expect(uint8Array[0]).toBe(0xef);
expect(uint8Array[1]).toBe(0xbb);
expect(uint8Array[2]).toBe(0xbf);
const text = await Bun.file(import.meta.path).text();
expect(text).toInclude("this text right here");
});

View File

@@ -1,6 +1,7 @@
// @known-failing-on-windows: 1 failing
import { it, expect } from "bun:test";
import { tmpdir } from "node:os";
import { join } from "node:path";
it("offset should work in Bun.file() #4963", async () => {
const filename = tmpdir() + "/bun.test.offset.txt";
@@ -10,3 +11,30 @@ it("offset should work in Bun.file() #4963", async () => {
const contents = await slice.text();
expect(contents).toBe("ntents");
});
it("should be able to parse utf16le json", async () => {
const path = join(import.meta.dir, "./json-utf16le.json");
const arrayBuffer = await Bun.file(path).arrayBuffer();
expect(arrayBuffer.byteLength).toBeGreaterThan(3);
const uint8Array = new Uint8Array(arrayBuffer);
expect(uint8Array[0]).toBe(0xff);
expect(uint8Array[1]).toBe(0xfe);
const json = await Bun.file(path).json();
expect(json).toEqual({
"data": "i am utf16 spooky",
});
});
it("should be able to parse utf8bom json", async () => {
const path = join(import.meta.dir, "./json-utf8bom.json");
const arrayBuffer = await Bun.file(path).arrayBuffer();
expect(arrayBuffer.byteLength).toBeGreaterThan(3);
const uint8Array = new Uint8Array(arrayBuffer);
expect(uint8Array[0]).toBe(0xef);
expect(uint8Array[1]).toBe(0xbb);
expect(uint8Array[2]).toBe(0xbf);
const json = await Bun.file(path).json();
expect(json).toEqual({
"data": "i am utf8 spooky",
});
});

Binary file not shown.

View File

@@ -0,0 +1,3 @@
{
"data": "i am utf8 spooky"
}