From 9fcf30f661e752e2c44df84286ae402349d8c4ff Mon Sep 17 00:00:00 2001 From: Claude Bot Date: Sun, 1 Feb 2026 00:30:16 +0000 Subject: [PATCH] fix(blob): handle UTF-8 paths correctly in stat() and delete() for Buffer paths Use `ZigString.initUTF8()` instead of `ZigString.init()` when handling non-encoded_slice PathLike variants. The latter doesn't mark bytes as UTF-8, causing `toSliceClone()` to treat them as Latin1 and double-encode to UTF-8. Co-Authored-By: Claude Opus 4.5 --- src/bun.js/webcore/Blob.zig | 2 +- src/bun.js/webcore/blob/Store.zig | 2 +- test/regression/issue/26647.test.ts | 71 +++++++++++++++++++++++++++++ 3 files changed, 73 insertions(+), 2 deletions(-) create mode 100644 test/regression/issue/26647.test.ts diff --git a/src/bun.js/webcore/Blob.zig b/src/bun.js/webcore/Blob.zig index 28056fd2e0..63718d31fe 100644 --- a/src/bun.js/webcore/Blob.zig +++ b/src/bun.js/webcore/Blob.zig @@ -3134,7 +3134,7 @@ pub fn getStat(this: *Blob, globalThis: *jsc.JSGlobalObject, callback: *jsc.Call .encoded_slice = switch (path_like) { // it's already converted to utf8 .encoded_slice => |slice| try slice.toOwned(bun.default_allocator), - else => try ZigString.init(path_like.slice()).toSliceClone(bun.default_allocator), + else => try ZigString.initUTF8(path_like.slice()).toSliceClone(bun.default_allocator), }, }, }, globalThis.bunVM()); diff --git a/src/bun.js/webcore/blob/Store.zig b/src/bun.js/webcore/blob/Store.zig index 7de3d04c79..cc2b0e1fc0 100644 --- a/src/bun.js/webcore/blob/Store.zig +++ b/src/bun.js/webcore/blob/Store.zig @@ -263,7 +263,7 @@ pub const File = struct { .path = .{ .encoded_slice = switch (path_like) { .encoded_slice => |slice| try slice.toOwned(bun.default_allocator), - else => try jsc.ZigString.init(path_like.slice()).toSliceClone(bun.default_allocator), + else => try jsc.ZigString.initUTF8(path_like.slice()).toSliceClone(bun.default_allocator), }, }, }, globalThis.bunVM()), diff --git a/test/regression/issue/26647.test.ts b/test/regression/issue/26647.test.ts new file mode 100644 index 0000000000..3f49fe733a --- /dev/null +++ b/test/regression/issue/26647.test.ts @@ -0,0 +1,71 @@ +import { expect, test } from "bun:test"; +import { tempDir } from "harness"; + +// https://github.com/oven-sh/bun/issues/26647 +// Bun.file().stat() and Bun.file().delete() corrupt UTF-8 paths with non-ASCII +// characters when the path is passed as a Buffer. + +test("Bun.file() with Buffer path handles UTF-8 correctly for stat()", async () => { + using dir = tempDir("test-26647", { + "über.txt": "content", + }); + + const filepath = `${dir}/über.txt`; + + // Verify the file exists first using string path + const bunFile1 = Bun.file(filepath); + const stat1 = await bunFile1.stat(); + expect(stat1.size).toBe(7); // "content" is 7 bytes + + // Now test with Buffer path - this was failing before the fix + const bufPath = Buffer.from(filepath, "utf8"); + const bunFile2 = Bun.file(bufPath); + const stat2 = await bunFile2.stat(); + expect(stat2.size).toBe(7); +}); + +test("Bun.file() with Buffer path handles UTF-8 correctly for delete()", async () => { + using dir = tempDir("test-26647", { + "über.txt": "content", + }); + + const filepath = `${dir}/über.txt`; + + // Test delete() with Buffer path - this was failing before the fix + const bufPath = Buffer.from(filepath, "utf8"); + const bunFile = Bun.file(bufPath); + + // Verify file exists before delete + const stat = await bunFile.stat(); + expect(stat.size).toBe(7); + + // Delete should succeed + await bunFile.delete(); + + // Verify file no longer exists + const exists = await Bun.file(filepath).exists(); + expect(exists).toBe(false); +}); + +test("Bun.file() with Buffer path handles various UTF-8 characters", async () => { + using dir = tempDir("test-26647", { + "日本語.txt": "japanese", + "émoji🎉.txt": "emoji", + "中文测试.txt": "chinese", + }); + + // Test Japanese filename + const jpPath = Buffer.from(`${dir}/日本語.txt`, "utf8"); + const jpStat = await Bun.file(jpPath).stat(); + expect(jpStat.size).toBe(8); // "japanese" is 8 bytes + + // Test emoji filename + const emojiPath = Buffer.from(`${dir}/émoji🎉.txt`, "utf8"); + const emojiStat = await Bun.file(emojiPath).stat(); + expect(emojiStat.size).toBe(5); // "emoji" is 5 bytes + + // Test Chinese filename + const cnPath = Buffer.from(`${dir}/中文测试.txt`, "utf8"); + const cnStat = await Bun.file(cnPath).stat(); + expect(cnStat.size).toBe(7); // "chinese" is 7 bytes +});