diff --git a/bench/bun.lock b/bench/bun.lock index ba5ad596f1..c668be9a0a 100644 --- a/bench/bun.lock +++ b/bench/bun.lock @@ -23,6 +23,7 @@ "react-dom": "^18.3.1", "string-width": "7.1.0", "strip-ansi": "^7.1.0", + "tar": "^7.4.3", "tinycolor2": "^1.6.0", "zx": "^7.2.3", }, @@ -108,6 +109,8 @@ "@fastify/proxy-addr": ["@fastify/proxy-addr@5.0.0", "", { "dependencies": { "@fastify/forwarded": "^3.0.0", "ipaddr.js": "^2.1.0" } }, "sha512-37qVVA1qZ5sgH7KpHkkC4z9SK6StIsIcOmpjvMPXNb3vx2GQxhZocogVYbr2PbbeLCQxYIPDok307xEvRZOzGA=="], + "@isaacs/fs-minipass": ["@isaacs/fs-minipass@4.0.1", "", { "dependencies": { "minipass": "^7.0.4" } }, "sha512-wgm9Ehl2jpeqP3zw/7mo3kRHFp5MEDhqAdwy1fTGkHAwnkGOVsgpvQhL8B5n1qlb01jV3n/bI0ZfZp5lWA1k4w=="], + "@jridgewell/gen-mapping": ["@jridgewell/gen-mapping@0.1.1", "", { "dependencies": { "@jridgewell/set-array": "^1.0.0", "@jridgewell/sourcemap-codec": "^1.4.10" } }, "sha512-sQXCasFk+U8lWYEe66WxRDOE9PjVz4vSM51fTu3Hw+ClTpUSQb718772vH3pyS5pShp6lvQM7SxgIDXXXmOX7w=="], "@jridgewell/resolve-uri": ["@jridgewell/resolve-uri@3.1.0", "", {}, "sha512-F2msla3tad+Mfht5cJq7LSXcdudKTWCVYUgw6pLFOOHSTtZlj6SWNYAp+AhuqLmWdBO2X5hPrLcu8cVP8fy28w=="], @@ -182,6 +185,8 @@ "chalk": ["chalk@5.3.0", "", {}, "sha512-dLitG79d+GV1Nb/VYcCDFivJeK1hiukt9QjRNVOsUtTy1rR1YJsmpGGTZ3qJos+uw7WmWF4wUwBd9jxjocFC2w=="], + "chownr": ["chownr@3.0.0", "", {}, "sha512-+IxzY9BZOQd/XuYPRmrvEVjF/nqj5kgT4kEq7VofrDoM1MxoRjEWkrCC3EtLi59TVawxTAn+orJwFQcrqEN1+g=="], + "color": ["color@4.2.3", "", { "dependencies": { "color-convert": "^2.0.1", "color-string": "^1.9.0" } }, "sha512-1rXeuUUiGGrykh+CeBdu5Ie7OJwinCgQY0bc7GCRxy5xVHy+moaqkpL/jqQq0MtQOeYcrqEz4abc5f0KtU7W4A=="], "color-convert": ["color-convert@2.0.1", "", { "dependencies": { "color-name": "~1.1.4" } }, "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ=="], @@ -362,6 +367,10 @@ "minimist": ["minimist@1.2.8", "", {}, "sha512-2yyAR8qBkN3YuheJanUpWC5U3bb5osDywNB8RzDVlDwDHbocAJveqqj1u8+SVD7jkWT4yvsHCpWqqWqAxb0zCA=="], + "minipass": ["minipass@7.1.2", "", {}, "sha512-qOOzS1cBTWYF4BH8fVePDBOO9iptMnGUEZwNc/cMWnTV2nVLZ7VoNWEPHkYczZA0pdoA7dl6e7FL659nX9S2aw=="], + + "minizlib": ["minizlib@3.1.0", "", { "dependencies": { "minipass": "^7.1.2" } }, "sha512-KZxYo1BUkWD2TVFLr0MQoM8vUUigWD3LlD83a/75BqC+4qE0Hb1Vo5v1FgcfaNXvfXzr+5EhQ6ing/CaBijTlw=="], + "mitata": ["mitata@1.0.25", "", {}, "sha512-0v5qZtVW5vwj9FDvYfraR31BMDcRLkhSFWPTLaxx/Z3/EvScfVtAAWtMI2ArIbBcwh7P86dXh0lQWKiXQPlwYA=="], "ms": ["ms@2.1.2", "", {}, "sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w=="], @@ -458,6 +467,8 @@ "supports-color": ["supports-color@5.5.0", "", { "dependencies": { "has-flag": "^3.0.0" } }, "sha512-QjVjwdXIt408MIiAqCX4oUKsgU2EqAGzs2Ppkm4aQYbjm+ZEWEcW4SfFNTr4uMNZma0ey4f5lgLrkB0aX0QMow=="], + "tar": ["tar@7.5.2", "", { "dependencies": { "@isaacs/fs-minipass": "^4.0.0", "chownr": "^3.0.0", "minipass": "^7.1.2", "minizlib": "^3.1.0", "yallist": "^5.0.0" } }, "sha512-7NyxrTE4Anh8km8iEy7o0QYPs+0JKBTj5ZaqHg6B39erLg0qYXN3BijtShwbsNSvQ+LN75+KV+C4QR/f6Gwnpg=="], + "thread-stream": ["thread-stream@3.1.0", "", { "dependencies": { "real-require": "^0.2.0" } }, "sha512-OqyPZ9u96VohAyMfJykzmivOrY2wfMSf3C5TtFJVgN+Hm6aj+voFhlK+kZEIv2FBh1X6Xp3DlnCOfEQ3B2J86A=="], "through": ["through@2.3.8", "", {}, "sha512-w89qg7PI8wAdvX60bMDP+bFoD5Dvhm9oLheFp5O4a2QF0cSBGsBX4qZmadPMvVqlLJBBci+WqGGOAPvcDeNSVg=="], @@ -482,7 +493,7 @@ "which": ["which@3.0.1", "", { "dependencies": { "isexe": "^2.0.0" }, "bin": { "node-which": "bin/which.js" } }, "sha512-XA1b62dzQzLfaEOSQFTCOd5KFf/1VSzZo7/7TUjnya6u0vGGKzU96UQBZTAThCb2j4/xjBAyii1OhRLJEivHvg=="], - "yallist": ["yallist@3.1.1", "", {}, "sha512-a4UGQaWPH59mOXUYnAG2ewncQS4i4F43Tv3JoAM+s2VDAmS9NsK8GpDMLrCHPksFT7h3K6TOoUNn2pb7RoXx4g=="], + "yallist": ["yallist@5.0.0", "", {}, "sha512-YgvUTfwqyc7UXVMrB+SImsVYSmTS8X/tSrtdNZMImM+n7+QTriRXyXim0mBrTXNeqzVF0KWGgHPeiyViFFrNDw=="], "yaml": ["yaml@2.3.4", "", {}, "sha512-8aAvwVUSHpfEqTQ4w/KMlf3HcRdt50E5ODIQJBw1fQ5RL34xabzxtUlzTXVqc4rkZsPbvrXKWnABCD7kWSmocA=="], @@ -502,6 +513,8 @@ "light-my-request/process-warning": ["process-warning@4.0.1", "", {}, "sha512-3c2LzQ3rY9d0hc1emcsHhfT9Jwz0cChib/QN89oME2R451w5fy3f0afAhERFZAwrbDU43wk12d0ORBpDVME50Q=="], + "lru-cache/yallist": ["yallist@3.1.1", "", {}, "sha512-a4UGQaWPH59mOXUYnAG2ewncQS4i4F43Tv3JoAM+s2VDAmS9NsK8GpDMLrCHPksFT7h3K6TOoUNn2pb7RoXx4g=="], + "npm-run-path/path-key": ["path-key@4.0.0", "", {}, "sha512-haREypq7xkM7ErfgIyA0z+Bj4AGKlMSdlQE2jvJo6huWD1EdkKYV+G/T4nq0YEF2vgTT8kqMFKo1uHn950r4SQ=="], "ansi-styles/color-convert/color-name": ["color-name@1.1.3", "", {}, "sha512-72fSenhMw2HZMTVHeCA9KCmpEIbzWiQsjN+BHcBbS9vr1mtt+vJjPdksIBNUmKAW8TFUDPJK5SUU3QhE9NEXDw=="], diff --git a/bench/package.json b/bench/package.json index b65de87a31..9babdc1e89 100644 --- a/bench/package.json +++ b/bench/package.json @@ -19,6 +19,7 @@ "react-dom": "^18.3.1", "string-width": "7.1.0", "strip-ansi": "^7.1.0", + "tar": "^7.4.3", "tinycolor2": "^1.6.0", "zx": "^7.2.3" }, diff --git a/bench/snippets/archive.mjs b/bench/snippets/archive.mjs new file mode 100644 index 0000000000..778a2e888b --- /dev/null +++ b/bench/snippets/archive.mjs @@ -0,0 +1,374 @@ +import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import { Pack, Unpack } from "tar"; +import { bench, group, run } from "../runner.mjs"; + +// Check if Bun.Archive is available +const hasBunArchive = typeof Bun !== "undefined" && typeof Bun.Archive !== "undefined"; + +// Test data sizes +const smallContent = "Hello, World!"; +const mediumContent = Buffer.alloc(10 * 1024, "x").toString(); // 10KB +const largeContent = Buffer.alloc(100 * 1024, "x").toString(); // 100KB + +// Create test files for node-tar (it reads from filesystem) +const setupDir = mkdtempSync(join(tmpdir(), "archive-bench-setup-")); + +function setupNodeTarFiles(prefix, files) { + const dir = join(setupDir, prefix); + mkdirSync(dir, { recursive: true }); + for (const [name, content] of Object.entries(files)) { + const filePath = join(dir, name); + const fileDir = join(filePath, ".."); + mkdirSync(fileDir, { recursive: true }); + writeFileSync(filePath, content); + } + return dir; +} + +// Setup directories for different test cases +const smallFilesDir = setupNodeTarFiles("small", { + "file1.txt": smallContent, + "file2.txt": smallContent, + "file3.txt": smallContent, +}); + +const mediumFilesDir = setupNodeTarFiles("medium", { + "file1.txt": mediumContent, + "file2.txt": mediumContent, + "file3.txt": mediumContent, +}); + +const largeFilesDir = setupNodeTarFiles("large", { + "file1.txt": largeContent, + "file2.txt": largeContent, + "file3.txt": largeContent, +}); + +const manyFilesEntries = {}; +for (let i = 0; i < 100; i++) { + manyFilesEntries[`file${i}.txt`] = smallContent; +} +const manyFilesDir = setupNodeTarFiles("many", manyFilesEntries); + +// Pre-create archives for extraction benchmarks +let smallTarGzBuffer, mediumTarGzBuffer, largeTarGzBuffer, manyFilesTarGzBuffer; +let smallTarBuffer, mediumTarBuffer, largeTarBuffer, manyFilesTarBuffer; +let smallBunArchiveGz, mediumBunArchiveGz, largeBunArchiveGz, manyFilesBunArchiveGz; +let smallBunArchive, mediumBunArchive, largeBunArchive, manyFilesBunArchive; + +// Create tar buffer using node-tar (with optional gzip) +async function createNodeTarBuffer(cwd, files, gzip = false) { + return new Promise(resolve => { + const pack = new Pack({ cwd, gzip }); + const bufs = []; + pack.on("data", chunk => bufs.push(chunk)); + pack.on("end", () => resolve(Buffer.concat(bufs))); + for (const file of files) { + pack.add(file); + } + pack.end(); + }); +} + +// Extract tar buffer using node-tar +async function extractNodeTarBuffer(buffer, cwd) { + return new Promise((resolve, reject) => { + const unpack = new Unpack({ cwd }); + unpack.on("end", resolve); + unpack.on("error", reject); + unpack.end(buffer); + }); +} + +// Initialize gzipped archives +smallTarGzBuffer = await createNodeTarBuffer(smallFilesDir, ["file1.txt", "file2.txt", "file3.txt"], true); +mediumTarGzBuffer = await createNodeTarBuffer(mediumFilesDir, ["file1.txt", "file2.txt", "file3.txt"], true); +largeTarGzBuffer = await createNodeTarBuffer(largeFilesDir, ["file1.txt", "file2.txt", "file3.txt"], true); +manyFilesTarGzBuffer = await createNodeTarBuffer(manyFilesDir, Object.keys(manyFilesEntries), true); + +// Initialize uncompressed archives +smallTarBuffer = await createNodeTarBuffer(smallFilesDir, ["file1.txt", "file2.txt", "file3.txt"], false); +mediumTarBuffer = await createNodeTarBuffer(mediumFilesDir, ["file1.txt", "file2.txt", "file3.txt"], false); +largeTarBuffer = await createNodeTarBuffer(largeFilesDir, ["file1.txt", "file2.txt", "file3.txt"], false); +manyFilesTarBuffer = await createNodeTarBuffer(manyFilesDir, Object.keys(manyFilesEntries), false); + +const smallFiles = { "file1.txt": smallContent, "file2.txt": smallContent, "file3.txt": smallContent }; +const mediumFiles = { "file1.txt": mediumContent, "file2.txt": mediumContent, "file3.txt": mediumContent }; +const largeFiles = { "file1.txt": largeContent, "file2.txt": largeContent, "file3.txt": largeContent }; + +if (hasBunArchive) { + smallBunArchiveGz = await Bun.Archive.from(smallFiles).bytes("gzip"); + mediumBunArchiveGz = await Bun.Archive.from(mediumFiles).bytes("gzip"); + largeBunArchiveGz = await Bun.Archive.from(largeFiles).bytes("gzip"); + manyFilesBunArchiveGz = await Bun.Archive.from(manyFilesEntries).bytes("gzip"); + + smallBunArchive = await Bun.Archive.from(smallFiles).bytes(); + mediumBunArchive = await Bun.Archive.from(mediumFiles).bytes(); + largeBunArchive = await Bun.Archive.from(largeFiles).bytes(); + manyFilesBunArchive = await Bun.Archive.from(manyFilesEntries).bytes(); +} + +// Create reusable extraction directories (overwriting is fine) +const extractDirNodeTar = mkdtempSync(join(tmpdir(), "archive-bench-extract-node-")); +const extractDirBun = mkdtempSync(join(tmpdir(), "archive-bench-extract-bun-")); +const writeDirNodeTar = mkdtempSync(join(tmpdir(), "archive-bench-write-node-")); +const writeDirBun = mkdtempSync(join(tmpdir(), "archive-bench-write-bun-")); + +// ============================================================================ +// Create .tar (uncompressed) benchmarks +// ============================================================================ + +group("create .tar (3 small files)", () => { + bench("node-tar", async () => { + await createNodeTarBuffer(smallFilesDir, ["file1.txt", "file2.txt", "file3.txt"], false); + }); + + if (hasBunArchive) { + bench("Bun.Archive", async () => { + await Bun.Archive.from(smallFiles).bytes(); + }); + } +}); + +group("create .tar (3 x 100KB files)", () => { + bench("node-tar", async () => { + await createNodeTarBuffer(largeFilesDir, ["file1.txt", "file2.txt", "file3.txt"], false); + }); + + if (hasBunArchive) { + bench("Bun.Archive", async () => { + await Bun.Archive.from(largeFiles).bytes(); + }); + } +}); + +group("create .tar (100 small files)", () => { + bench("node-tar", async () => { + await createNodeTarBuffer(manyFilesDir, Object.keys(manyFilesEntries), false); + }); + + if (hasBunArchive) { + bench("Bun.Archive", async () => { + await Bun.Archive.from(manyFilesEntries).bytes(); + }); + } +}); + +// ============================================================================ +// Create .tar.gz (compressed) benchmarks +// ============================================================================ + +group("create .tar.gz (3 small files)", () => { + bench("node-tar", async () => { + await createNodeTarBuffer(smallFilesDir, ["file1.txt", "file2.txt", "file3.txt"], true); + }); + + if (hasBunArchive) { + bench("Bun.Archive", async () => { + await Bun.Archive.from(smallFiles).bytes("gzip"); + }); + } +}); + +group("create .tar.gz (3 x 100KB files)", () => { + bench("node-tar", async () => { + await createNodeTarBuffer(largeFilesDir, ["file1.txt", "file2.txt", "file3.txt"], true); + }); + + if (hasBunArchive) { + bench("Bun.Archive", async () => { + await Bun.Archive.from(largeFiles).bytes("gzip"); + }); + } +}); + +group("create .tar.gz (100 small files)", () => { + bench("node-tar", async () => { + await createNodeTarBuffer(manyFilesDir, Object.keys(manyFilesEntries), true); + }); + + if (hasBunArchive) { + bench("Bun.Archive", async () => { + await Bun.Archive.from(manyFilesEntries).bytes("gzip"); + }); + } +}); + +// ============================================================================ +// Extract .tar (uncompressed) benchmarks +// ============================================================================ + +group("extract .tar (3 small files)", () => { + bench("node-tar", async () => { + await extractNodeTarBuffer(smallTarBuffer, extractDirNodeTar); + }); + + if (hasBunArchive) { + bench("Bun.Archive", async () => { + await Bun.Archive.from(smallBunArchive).extract(extractDirBun); + }); + } +}); + +group("extract .tar (3 x 100KB files)", () => { + bench("node-tar", async () => { + await extractNodeTarBuffer(largeTarBuffer, extractDirNodeTar); + }); + + if (hasBunArchive) { + bench("Bun.Archive", async () => { + await Bun.Archive.from(largeBunArchive).extract(extractDirBun); + }); + } +}); + +group("extract .tar (100 small files)", () => { + bench("node-tar", async () => { + await extractNodeTarBuffer(manyFilesTarBuffer, extractDirNodeTar); + }); + + if (hasBunArchive) { + bench("Bun.Archive", async () => { + await Bun.Archive.from(manyFilesBunArchive).extract(extractDirBun); + }); + } +}); + +// ============================================================================ +// Extract .tar.gz (compressed) benchmarks +// ============================================================================ + +group("extract .tar.gz (3 small files)", () => { + bench("node-tar", async () => { + await extractNodeTarBuffer(smallTarGzBuffer, extractDirNodeTar); + }); + + if (hasBunArchive) { + bench("Bun.Archive", async () => { + await Bun.Archive.from(smallBunArchiveGz).extract(extractDirBun); + }); + } +}); + +group("extract .tar.gz (3 x 100KB files)", () => { + bench("node-tar", async () => { + await extractNodeTarBuffer(largeTarGzBuffer, extractDirNodeTar); + }); + + if (hasBunArchive) { + bench("Bun.Archive", async () => { + await Bun.Archive.from(largeBunArchiveGz).extract(extractDirBun); + }); + } +}); + +group("extract .tar.gz (100 small files)", () => { + bench("node-tar", async () => { + await extractNodeTarBuffer(manyFilesTarGzBuffer, extractDirNodeTar); + }); + + if (hasBunArchive) { + bench("Bun.Archive", async () => { + await Bun.Archive.from(manyFilesBunArchiveGz).extract(extractDirBun); + }); + } +}); + +// ============================================================================ +// Write .tar to disk benchmarks +// ============================================================================ + +let writeCounter = 0; + +group("write .tar to disk (3 small files)", () => { + bench("node-tar + writeFileSync", async () => { + const buffer = await createNodeTarBuffer(smallFilesDir, ["file1.txt", "file2.txt", "file3.txt"], false); + writeFileSync(join(writeDirNodeTar, `archive-${writeCounter++}.tar`), buffer); + }); + + if (hasBunArchive) { + bench("Bun.Archive.write", async () => { + await Bun.Archive.write(join(writeDirBun, `archive-${writeCounter++}.tar`), smallFiles); + }); + } +}); + +group("write .tar to disk (3 x 100KB files)", () => { + bench("node-tar + writeFileSync", async () => { + const buffer = await createNodeTarBuffer(largeFilesDir, ["file1.txt", "file2.txt", "file3.txt"], false); + writeFileSync(join(writeDirNodeTar, `archive-${writeCounter++}.tar`), buffer); + }); + + if (hasBunArchive) { + bench("Bun.Archive.write", async () => { + await Bun.Archive.write(join(writeDirBun, `archive-${writeCounter++}.tar`), largeFiles); + }); + } +}); + +group("write .tar to disk (100 small files)", () => { + bench("node-tar + writeFileSync", async () => { + const buffer = await createNodeTarBuffer(manyFilesDir, Object.keys(manyFilesEntries), false); + writeFileSync(join(writeDirNodeTar, `archive-${writeCounter++}.tar`), buffer); + }); + + if (hasBunArchive) { + bench("Bun.Archive.write", async () => { + await Bun.Archive.write(join(writeDirBun, `archive-${writeCounter++}.tar`), manyFilesEntries); + }); + } +}); + +// ============================================================================ +// Write .tar.gz to disk benchmarks +// ============================================================================ + +group("write .tar.gz to disk (3 small files)", () => { + bench("node-tar + writeFileSync", async () => { + const buffer = await createNodeTarBuffer(smallFilesDir, ["file1.txt", "file2.txt", "file3.txt"], true); + writeFileSync(join(writeDirNodeTar, `archive-${writeCounter++}.tar.gz`), buffer); + }); + + if (hasBunArchive) { + bench("Bun.Archive.write", async () => { + await Bun.Archive.write(join(writeDirBun, `archive-${writeCounter++}.tar.gz`), smallFiles, "gzip"); + }); + } +}); + +group("write .tar.gz to disk (3 x 100KB files)", () => { + bench("node-tar + writeFileSync", async () => { + const buffer = await createNodeTarBuffer(largeFilesDir, ["file1.txt", "file2.txt", "file3.txt"], true); + writeFileSync(join(writeDirNodeTar, `archive-${writeCounter++}.tar.gz`), buffer); + }); + + if (hasBunArchive) { + bench("Bun.Archive.write", async () => { + await Bun.Archive.write(join(writeDirBun, `archive-${writeCounter++}.tar.gz`), largeFiles, "gzip"); + }); + } +}); + +group("write .tar.gz to disk (100 small files)", () => { + bench("node-tar + writeFileSync", async () => { + const buffer = await createNodeTarBuffer(manyFilesDir, Object.keys(manyFilesEntries), true); + writeFileSync(join(writeDirNodeTar, `archive-${writeCounter++}.tar.gz`), buffer); + }); + + if (hasBunArchive) { + bench("Bun.Archive.write", async () => { + await Bun.Archive.write(join(writeDirBun, `archive-${writeCounter++}.tar.gz`), manyFilesEntries, "gzip"); + }); + } +}); + +await run(); + +// Cleanup +rmSync(setupDir, { recursive: true, force: true }); +rmSync(extractDirNodeTar, { recursive: true, force: true }); +rmSync(extractDirBun, { recursive: true, force: true }); +rmSync(writeDirNodeTar, { recursive: true, force: true }); +rmSync(writeDirBun, { recursive: true, force: true }); diff --git a/src/bun.js/ConsoleObject.zig b/src/bun.js/ConsoleObject.zig index 491689ecc4..7ad178704f 100644 --- a/src/bun.js/ConsoleObject.zig +++ b/src/bun.js/ConsoleObject.zig @@ -2596,6 +2596,9 @@ pub const Formatter = struct { } else if (value.as(jsc.WebCore.S3Client)) |s3client| { s3client.writeFormat(ConsoleObject.Formatter, this, writer_, enable_ansi_colors) catch {}; return; + } else if (value.as(jsc.API.Archive)) |archive| { + archive.writeFormat(ConsoleObject.Formatter, this, writer_, enable_ansi_colors) catch {}; + return; } else if (value.as(bun.webcore.FetchHeaders) != null) { if (try value.get(this.globalThis, "toJSON")) |toJSONFunction| { this.addForNewLine("Headers ".len); diff --git a/src/bun.js/api.zig b/src/bun.js/api.zig index 1b54ea0daf..e09d9c6bfc 100644 --- a/src/bun.js/api.zig +++ b/src/bun.js/api.zig @@ -38,6 +38,7 @@ pub const dns = @import("./api/bun/dns.zig"); pub const FFI = @import("./api/ffi.zig").FFI; pub const HTMLRewriter = @import("./api/html_rewriter.zig"); pub const FileSystemRouter = @import("./api/filesystem_router.zig").FileSystemRouter; +pub const Archive = @import("./api/Archive.zig"); pub const Glob = @import("./api/glob.zig"); pub const H2FrameParser = @import("./api/bun/h2_frame_parser.zig").H2FrameParser; pub const JSBundler = @import("./api/JSBundler.zig").JSBundler; diff --git a/src/bun.js/api/Archive.classes.ts b/src/bun.js/api/Archive.classes.ts new file mode 100644 index 0000000000..113c688a07 --- /dev/null +++ b/src/bun.js/api/Archive.classes.ts @@ -0,0 +1,39 @@ +import { define } from "../../codegen/class-definitions"; + +export default [ + define({ + name: "Archive", + construct: true, + finalize: true, + configurable: false, + JSType: "0b11101110", + klass: { + from: { + fn: "from", + length: 1, + }, + write: { + fn: "write", + length: 2, + }, + }, + proto: { + extract: { + fn: "extract", + length: 1, + }, + blob: { + fn: "blob", + length: 0, + }, + bytes: { + fn: "bytes", + length: 0, + }, + files: { + fn: "files", + length: 0, + }, + }, + }), +]; diff --git a/src/bun.js/api/Archive.zig b/src/bun.js/api/Archive.zig new file mode 100644 index 0000000000..5bf7a07f66 --- /dev/null +++ b/src/bun.js/api/Archive.zig @@ -0,0 +1,807 @@ +const Archive = @This(); + +pub const js = jsc.Codegen.JSArchive; +pub const toJS = js.toJS; +pub const fromJS = js.fromJS; +pub const fromJSDirect = js.fromJSDirect; + +/// The underlying data for the archive - uses Blob.Store for thread-safe ref counting +store: *jsc.WebCore.Blob.Store, + +pub fn finalize(this: *Archive) void { + jsc.markBinding(@src()); + this.store.deref(); + bun.destroy(this); +} + +/// Pretty-print for console.log +pub fn writeFormat(this: *const Archive, comptime Formatter: type, formatter: *Formatter, writer: anytype, comptime enable_ansi_colors: bool) !void { + const Writer = @TypeOf(writer); + const Output = bun.Output; + const data = this.store.sharedView(); + + try writer.print(comptime Output.prettyFmt("Archive ({f}) {{\n", enable_ansi_colors), .{bun.fmt.size(data.len, .{})}); + + { + formatter.indent += 1; + defer formatter.indent -|= 1; + + try formatter.writeIndent(Writer, writer); + try writer.writeAll(comptime Output.prettyFmt("files: ", enable_ansi_colors)); + try formatter.printAs(.Double, Writer, writer, jsc.JSValue.jsNumber(countFilesInArchive(data)), .NumberObject, enable_ansi_colors); + } + try writer.writeAll("\n"); + try formatter.writeIndent(Writer, writer); + try writer.writeAll("}"); + formatter.resetLine(); +} + +/// Configure archive for reading tar/tar.gz +fn configureArchiveReader(archive: *libarchive.lib.Archive) void { + _ = archive.readSupportFormatTar(); + _ = archive.readSupportFormatGnutar(); + _ = archive.readSupportFilterGzip(); + _ = archive.readSetOptions("read_concatenated_archives"); +} + +/// Count the number of files in an archive +fn countFilesInArchive(data: []const u8) u32 { + const archive = libarchive.lib.Archive.readNew(); + defer _ = archive.readFree(); + configureArchiveReader(archive); + + if (archive.readOpenMemory(data) != .ok) { + return 0; + } + + var count: u32 = 0; + var entry: *libarchive.lib.Archive.Entry = undefined; + while (archive.readNextHeader(&entry) == .ok) { + if (entry.filetype() == @intFromEnum(libarchive.lib.FileType.regular)) { + count += 1; + } + } + + return count; +} + +/// Constructor: new Archive() - throws an error since users should use Archive.from() +pub fn constructor(globalThis: *jsc.JSGlobalObject, _: *jsc.CallFrame) bun.JSError!*Archive { + return globalThis.throwInvalidArguments("Archive cannot be constructed directly. Use Archive.from() instead.", .{}); +} + +/// Static method: Archive.from(data) +/// Creates an Archive from either: +/// - An object { [path: string]: Blob | string | ArrayBufferView | ArrayBufferLike } +/// - A Blob, ArrayBufferView, or ArrayBufferLike (assumes it's already a valid archive) +pub fn from(globalThis: *jsc.JSGlobalObject, callframe: *jsc.CallFrame) bun.JSError!jsc.JSValue { + const arg = callframe.argumentsAsArray(1)[0]; + if (arg == .zero) { + return globalThis.throwInvalidArguments("Archive.from requires an argument", .{}); + } + + // For Blob/Archive, ref the existing store (zero-copy) + if (arg.as(jsc.WebCore.Blob)) |blob_ptr| { + if (blob_ptr.store) |store| { + store.ref(); + return bun.new(Archive, .{ .store = store }).toJS(globalThis); + } + } + + // For ArrayBuffer/TypedArray, copy the data + if (arg.asArrayBuffer(globalThis)) |array_buffer| { + const data = try bun.default_allocator.dupe(u8, array_buffer.slice()); + return createArchive(globalThis, data); + } + + // For plain objects, build a tarball + if (arg.isObject()) { + const data = try buildTarballFromObject(globalThis, arg); + return createArchive(globalThis, data); + } + + return globalThis.throwInvalidArguments("Expected an object, Blob, TypedArray, or ArrayBuffer", .{}); +} + +fn createArchive(globalThis: *jsc.JSGlobalObject, data: []u8) jsc.JSValue { + const store = jsc.WebCore.Blob.Store.init(data, bun.default_allocator); + return bun.new(Archive, .{ .store = store }).toJS(globalThis); +} + +/// Shared helper that builds tarball bytes from a JS object +fn buildTarballFromObject(globalThis: *jsc.JSGlobalObject, obj: jsc.JSValue) bun.JSError![]u8 { + const allocator = bun.default_allocator; + const lib = libarchive.lib; + + const js_obj = obj.getObject() orelse { + return globalThis.throwInvalidArguments("Expected an object", .{}); + }; + + // Set up archive first + var growing_buffer = lib.GrowingBuffer.init(allocator); + errdefer growing_buffer.deinit(); + + const archive = lib.Archive.writeNew(); + defer _ = archive.writeFree(); + + if (archive.writeSetFormatPaxRestricted() != .ok) { + return globalThis.throwInvalidArguments("Failed to create tarball: ArchiveFormatError", .{}); + } + + if (lib.archive_write_open2( + @ptrCast(archive), + @ptrCast(&growing_buffer), + &lib.GrowingBuffer.openCallback, + &lib.GrowingBuffer.writeCallback, + &lib.GrowingBuffer.closeCallback, + null, + ) != 0) { + return globalThis.throwInvalidArguments("Failed to create tarball: ArchiveOpenError", .{}); + } + + const entry = lib.Archive.Entry.new(); + defer entry.free(); + + const now_secs: isize = @intCast(@divTrunc(std.time.milliTimestamp(), 1000)); + + // Iterate over object properties and write directly to archive + const PropIterator = jsc.JSPropertyIterator(.{ + .skip_empty_name = true, + .include_value = true, + }); + + var iter = try PropIterator.init(globalThis, js_obj); + defer iter.deinit(); + + while (try iter.next()) |key| { + const value = iter.value; + if (value == .zero) continue; + + // Get the key as a null-terminated string + const key_slice = key.toUTF8(allocator); + defer key_slice.deinit(); + const key_str = try allocator.dupeZ(u8, key_slice.slice()); + defer allocator.free(key_str); + + // Get data - use view for Blob/ArrayBuffer, convert for strings + const data_slice = try getEntryData(globalThis, value, allocator); + defer data_slice.deinit(); + + // Write entry to archive + const data = data_slice.slice(); + _ = entry.clear(); + entry.setPathnameUtf8(key_str); + entry.setSize(@intCast(data.len)); + entry.setFiletype(@intFromEnum(lib.FileType.regular)); + entry.setPerm(0o644); + entry.setMtime(now_secs, 0); + + if (archive.writeHeader(entry) != .ok) { + return globalThis.throwInvalidArguments("Failed to create tarball: ArchiveHeaderError", .{}); + } + if (archive.writeData(data) < 0) { + return globalThis.throwInvalidArguments("Failed to create tarball: ArchiveWriteError", .{}); + } + if (archive.writeFinishEntry() != .ok) { + return globalThis.throwInvalidArguments("Failed to create tarball: ArchiveFinishEntryError", .{}); + } + } + + if (archive.writeClose() != .ok) { + return globalThis.throwInvalidArguments("Failed to create tarball: ArchiveCloseError", .{}); + } + + return growing_buffer.toOwnedSlice() catch { + return globalThis.throwInvalidArguments("Failed to create tarball: OutOfMemory", .{}); + }; +} + +/// Returns data as a ZigString.Slice (handles ownership automatically via deinit) +fn getEntryData(globalThis: *jsc.JSGlobalObject, value: jsc.JSValue, allocator: std.mem.Allocator) bun.JSError!jsc.ZigString.Slice { + // For Blob, use sharedView (no copy needed) + if (value.as(jsc.WebCore.Blob)) |blob_ptr| { + return jsc.ZigString.Slice.fromUTF8NeverFree(blob_ptr.sharedView()); + } + + // For ArrayBuffer/TypedArray, use view (no copy needed) + if (value.asArrayBuffer(globalThis)) |array_buffer| { + return jsc.ZigString.Slice.fromUTF8NeverFree(array_buffer.slice()); + } + + // For strings, convert (allocates) + return value.toSlice(globalThis, allocator); +} + +/// Static method: Archive.write(path, data, compress?) +/// Creates and writes an archive to disk in one operation +pub fn write(globalThis: *jsc.JSGlobalObject, callframe: *jsc.CallFrame) bun.JSError!jsc.JSValue { + const path_arg, const data_arg, const compress_arg = callframe.argumentsAsArray(3); + if (data_arg == .zero) { + return globalThis.throwInvalidArguments("Archive.write requires at least 2 arguments (path, data)", .{}); + } + + // Get the path + if (!path_arg.isString()) { + return globalThis.throwInvalidArguments("Archive.write: first argument must be a string path", .{}); + } + + const path_slice = try path_arg.toSlice(globalThis, bun.default_allocator); + defer path_slice.deinit(); + + // Determine compression + const use_gzip = try parseCompressArg(globalThis, compress_arg); + + // Try to use store reference (zero-copy) for Archive/Blob + if (fromJS(data_arg)) |archive| { + return startWriteTask(globalThis, .{ .store = archive.store }, path_slice.slice(), use_gzip); + } + + if (data_arg.as(jsc.WebCore.Blob)) |blob_ptr| { + if (blob_ptr.store) |store| { + return startWriteTask(globalThis, .{ .store = store }, path_slice.slice(), use_gzip); + } + } + + // Fall back to copying data for ArrayBuffer/TypedArray/objects + const archive_data = try getArchiveData(globalThis, data_arg); + return startWriteTask(globalThis, .{ .owned = archive_data }, path_slice.slice(), use_gzip); +} + +/// Get archive data from a value, returning owned bytes +fn getArchiveData(globalThis: *jsc.JSGlobalObject, arg: jsc.JSValue) bun.JSError![]u8 { + // Check if it's a typed array, ArrayBuffer, or similar + if (arg.asArrayBuffer(globalThis)) |array_buffer| { + return bun.default_allocator.dupe(u8, array_buffer.slice()); + } + + // Check if it's an object with entries (plain object) - build tarball + if (arg.isObject()) { + return buildTarballFromObject(globalThis, arg); + } + + return globalThis.throwInvalidArguments("Expected an object, Blob, TypedArray, ArrayBuffer, or Archive", .{}); +} + +fn parseCompressArg(globalThis: *jsc.JSGlobalObject, arg: jsc.JSValue) bun.JSError!bool { + if (arg.isUndefinedOrNull()) { + return false; + } + + if (arg.isBoolean()) { + return arg.toBoolean(); + } + + if (arg.isString()) { + const str = try arg.toSlice(globalThis, bun.default_allocator); + defer str.deinit(); + if (std.mem.eql(u8, str.slice(), "gzip")) { + return true; + } + return globalThis.throwInvalidArguments("Archive: compress argument must be 'gzip', a boolean, or undefined", .{}); + } + + return globalThis.throwInvalidArguments("Archive: compress argument must be 'gzip', a boolean, or undefined", .{}); +} + +/// Instance method: archive.extract(path) +/// Extracts the archive to the given path +/// Returns Promise with count of extracted files +pub fn extract(this: *Archive, globalThis: *jsc.JSGlobalObject, callframe: *jsc.CallFrame) bun.JSError!jsc.JSValue { + const path_arg = callframe.argumentsAsArray(1)[0]; + if (path_arg == .zero or !path_arg.isString()) { + return globalThis.throwInvalidArguments("Archive.extract requires a path argument", .{}); + } + + const path_slice = try path_arg.toSlice(globalThis, bun.default_allocator); + defer path_slice.deinit(); + + return startExtractTask(globalThis, this.store, path_slice.slice()); +} + +/// Instance method: archive.blob(compress?) +/// Returns Promise with the archive data +pub fn blob(this: *Archive, globalThis: *jsc.JSGlobalObject, callframe: *jsc.CallFrame) bun.JSError!jsc.JSValue { + const compress_arg = callframe.argumentsAsArray(1)[0]; + const use_gzip = try parseCompressArg(globalThis, compress_arg); + return startBlobTask(globalThis, this.store, use_gzip, .blob); +} + +/// Instance method: archive.bytes(compress?) +/// Returns Promise with the archive data +pub fn bytes(this: *Archive, globalThis: *jsc.JSGlobalObject, callframe: *jsc.CallFrame) bun.JSError!jsc.JSValue { + const compress_arg = callframe.argumentsAsArray(1)[0]; + const use_gzip = try parseCompressArg(globalThis, compress_arg); + return startBlobTask(globalThis, this.store, use_gzip, .bytes); +} + +/// Instance method: archive.files(glob?) +/// Returns Promise> with archive file contents +pub fn files(this: *Archive, globalThis: *jsc.JSGlobalObject, callframe: *jsc.CallFrame) bun.JSError!jsc.JSValue { + const glob_arg = callframe.argument(0); + + var glob_pattern: ?[]const u8 = null; + + if (!glob_arg.isUndefinedOrNull()) { + if (!glob_arg.isString()) { + return globalThis.throwInvalidArguments("Archive.files: argument must be a string glob pattern or undefined", .{}); + } + const glob_slice = try glob_arg.toSlice(globalThis, bun.default_allocator); + defer glob_slice.deinit(); + glob_pattern = try bun.default_allocator.dupe(u8, glob_slice.slice()); + } + errdefer if (glob_pattern) |p| bun.default_allocator.free(p); + + return startFilesTask(globalThis, this.store, glob_pattern); +} + +// ============================================================================ +// Generic Async Task Infrastructure +// ============================================================================ + +const PromiseResult = union(enum) { + resolve: jsc.JSValue, + reject: jsc.JSValue, + + fn fulfill(this: PromiseResult, globalThis: *jsc.JSGlobalObject, promise: *jsc.JSPromise) bun.JSTerminated!void { + switch (this) { + .resolve => |v| try promise.resolve(globalThis, v), + .reject => |v| try promise.reject(globalThis, v), + } + } +}; + +/// Generic async task that handles all the boilerplate for thread pool tasks. +/// Context must provide: +/// - `fn run(*Context) void` - runs on thread pool +/// - `fn runFromJS(*Context, *jsc.JSGlobalObject) PromiseResult` - returns value to resolve/reject +/// - `fn deinit(*Context) void` - cleanup +fn AsyncTask(comptime Context: type) type { + return struct { + const Self = @This(); + + ctx: Context, + promise: jsc.JSPromise.Strong, + vm: *jsc.VirtualMachine, + task: jsc.WorkPoolTask = .{ .callback = &run }, + concurrent_task: jsc.ConcurrentTask = .{}, + ref: bun.Async.KeepAlive = .{}, + + fn create(globalThis: *jsc.JSGlobalObject, ctx: Context) error{OutOfMemory}!*Self { + const vm = globalThis.bunVM(); + const self = bun.new(Self, .{ + .ctx = ctx, + .promise = jsc.JSPromise.Strong.init(globalThis), + .vm = vm, + }); + self.ref.ref(vm); + return self; + } + + fn schedule(this: *Self) void { + jsc.WorkPool.schedule(&this.task); + } + + fn run(work_task: *jsc.WorkPoolTask) void { + const this: *Self = @fieldParentPtr("task", work_task); + const result = Context.run(&this.ctx); + // Handle both error union and non-error union return types + this.ctx.result = if (@typeInfo(@TypeOf(result)) == .error_union) + result catch |err| .{ .err = err } + else + result; + this.vm.enqueueTaskConcurrent( + this.concurrent_task.from(this, .manual_deinit), + ); + } + + pub fn runFromJS(this: *Self) bun.JSTerminated!void { + this.ref.unref(this.vm); + + defer { + Context.deinit(&this.ctx); + bun.destroy(this); + } + + if (this.vm.isShuttingDown()) return; + + const globalThis = this.vm.global; + const promise = this.promise.swap(); + const result = Context.runFromJS(&this.ctx, globalThis) catch |e| { + // JSError means exception is already pending + return try promise.reject(globalThis, globalThis.takeException(e)); + }; + try result.fulfill(globalThis, promise); + } + }; +} + +// ============================================================================ +// Task Contexts +// ============================================================================ + +const ExtractContext = struct { + const Result = union(enum) { + success: u32, + err: error{ReadError}, + }; + + store: *jsc.WebCore.Blob.Store, + path: []const u8, + result: Result = .{ .err = error.ReadError }, + + fn run(this: *ExtractContext) Result { + const count = libarchive.Archiver.extractToDisk( + this.store.sharedView(), + this.path, + null, + void, + {}, + .{ .depth_to_skip = 0, .close_handles = true, .log = false, .npm = false }, + ) catch return .{ .err = error.ReadError }; + return .{ .success = count }; + } + + fn runFromJS(this: *ExtractContext, globalThis: *jsc.JSGlobalObject) bun.JSError!PromiseResult { + return switch (this.result) { + .success => |count| .{ .resolve = jsc.JSValue.jsNumber(count) }, + .err => |e| .{ .reject = globalThis.createErrorInstance("{s}", .{@errorName(e)}) }, + }; + } + + fn deinit(this: *ExtractContext) void { + this.store.deref(); + bun.default_allocator.free(this.path); + } +}; + +pub const ExtractTask = AsyncTask(ExtractContext); + +fn startExtractTask(globalThis: *jsc.JSGlobalObject, store: *jsc.WebCore.Blob.Store, path: []const u8) bun.JSError!jsc.JSValue { + const path_copy = try bun.default_allocator.dupe(u8, path); + errdefer bun.default_allocator.free(path_copy); + + store.ref(); + errdefer store.deref(); + + const task = try ExtractTask.create(globalThis, .{ + .store = store, + .path = path_copy, + }); + + const promise_js = task.promise.value(); + task.schedule(); + return promise_js; +} + +const BlobContext = struct { + const OutputType = enum { blob, bytes }; + const Error = error{ OutOfMemory, GzipInitFailed, GzipCompressFailed }; + const Result = union(enum) { + compressed: []u8, + uncompressed: void, + err: Error, + }; + + store: *jsc.WebCore.Blob.Store, + use_gzip: bool, + output_type: OutputType, + result: Result = .{ .uncompressed = {} }, + + fn run(this: *BlobContext) Result { + if (this.use_gzip) { + return .{ .compressed = compressGzip(this.store.sharedView()) catch |e| return .{ .err = e } }; + } + return .{ .uncompressed = {} }; + } + + fn runFromJS(this: *BlobContext, globalThis: *jsc.JSGlobalObject) bun.JSError!PromiseResult { + switch (this.result) { + .err => |e| return .{ .reject = globalThis.createErrorInstance("{s}", .{@errorName(e)}) }, + .compressed => |data| { + this.result = .{ .uncompressed = {} }; // Ownership transferred + return .{ .resolve = switch (this.output_type) { + .blob => jsc.WebCore.Blob.new(jsc.WebCore.Blob.createWithBytesAndAllocator(data, bun.default_allocator, globalThis, false)).toJS(globalThis), + .bytes => jsc.JSValue.createBuffer(globalThis, data), + } }; + }, + .uncompressed => return switch (this.output_type) { + .blob => blk: { + this.store.ref(); + break :blk .{ .resolve = jsc.WebCore.Blob.new(jsc.WebCore.Blob.initWithStore(this.store, globalThis)).toJS(globalThis) }; + }, + .bytes => .{ .resolve = jsc.JSValue.createBuffer(globalThis, bun.default_allocator.dupe(u8, this.store.sharedView()) catch return .{ .reject = globalThis.createOutOfMemoryError() }) }, + }, + } + } + + fn deinit(this: *BlobContext) void { + this.store.deref(); + if (this.result == .compressed) bun.default_allocator.free(this.result.compressed); + } +}; + +pub const BlobTask = AsyncTask(BlobContext); + +fn startBlobTask(globalThis: *jsc.JSGlobalObject, store: *jsc.WebCore.Blob.Store, use_gzip: bool, output_type: BlobContext.OutputType) bun.JSError!jsc.JSValue { + store.ref(); + errdefer store.deref(); + + const task = try BlobTask.create(globalThis, .{ + .store = store, + .use_gzip = use_gzip, + .output_type = output_type, + }); + + const promise_js = task.promise.value(); + task.schedule(); + return promise_js; +} + +const WriteContext = struct { + const Error = error{ OutOfMemory, GzipInitFailed, GzipCompressFailed }; + const Result = union(enum) { + success: void, + err: Error, + sys_err: bun.sys.Error, + }; + const Data = union(enum) { + owned: []u8, + store: *jsc.WebCore.Blob.Store, + }; + + data: Data, + path: [:0]const u8, + use_gzip: bool, + result: Result = .{ .success = {} }, + + fn run(this: *WriteContext) Result { + const source_data = switch (this.data) { + .owned => |d| d, + .store => |s| s.sharedView(), + }; + const data_to_write = if (this.use_gzip) + compressGzip(source_data) catch |e| return .{ .err = e } + else + source_data; + defer if (this.use_gzip) bun.default_allocator.free(data_to_write); + + const file = switch (bun.sys.File.openat(.cwd(), this.path, bun.O.CREAT | bun.O.WRONLY | bun.O.TRUNC, 0o644)) { + .err => |err| return .{ .sys_err = err.clone(bun.default_allocator) }, + .result => |f| f, + }; + defer file.close(); + + return switch (file.writeAll(data_to_write)) { + .err => |err| .{ .sys_err = err.clone(bun.default_allocator) }, + .result => .{ .success = {} }, + }; + } + + fn runFromJS(this: *WriteContext, globalThis: *jsc.JSGlobalObject) bun.JSError!PromiseResult { + return switch (this.result) { + .success => .{ .resolve = .js_undefined }, + .err => |e| .{ .reject = globalThis.createErrorInstance("{s}", .{@errorName(e)}) }, + .sys_err => |sys_err| .{ .reject = sys_err.toJS(globalThis) }, + }; + } + + fn deinit(this: *WriteContext) void { + switch (this.data) { + .owned => |d| bun.default_allocator.free(d), + .store => |s| s.deref(), + } + bun.default_allocator.free(this.path); + if (this.result == .sys_err) { + var sys_err = this.result.sys_err; + sys_err.deinit(); + } + } +}; + +pub const WriteTask = AsyncTask(WriteContext); + +fn startWriteTask( + globalThis: *jsc.JSGlobalObject, + data: WriteContext.Data, + path: []const u8, + use_gzip: bool, +) bun.JSError!jsc.JSValue { + const path_z = try bun.default_allocator.dupeZ(u8, path); + errdefer bun.default_allocator.free(path_z); + + // Ref store if using store reference + if (data == .store) { + data.store.ref(); + } + errdefer if (data == .store) data.store.deref(); + errdefer if (data == .owned) bun.default_allocator.free(data.owned); + + const task = try WriteTask.create(globalThis, .{ + .data = data, + .path = path_z, + .use_gzip = use_gzip, + }); + + const promise_js = task.promise.value(); + task.schedule(); + return promise_js; +} + +const FilesContext = struct { + const FileEntry = struct { path: []u8, data: []u8, mtime: i64 }; + const FileEntryList = std.ArrayList(FileEntry); + const Error = error{ OutOfMemory, ReadError }; + const Result = union(enum) { + success: FileEntryList, + libarchive_err: [*:0]u8, + err: Error, + + fn deinit(self: *Result) void { + switch (self.*) { + .libarchive_err => |s| bun.default_allocator.free(std.mem.span(s)), + .success => |*list| { + for (list.items) |e| { + bun.default_allocator.free(e.path); + if (e.data.len > 0) bun.default_allocator.free(e.data); + } + list.deinit(bun.default_allocator); + }, + .err => {}, + } + } + }; + + store: *jsc.WebCore.Blob.Store, + glob_pattern: ?[]const u8, + result: Result = .{ .err = error.ReadError }, + + fn cloneErrorString(archive: *libarchive.lib.Archive) ?[*:0]u8 { + const err_str = archive.errorString(); + if (err_str.len == 0) return null; + return bun.default_allocator.dupeZ(u8, err_str) catch null; + } + + fn run(this: *FilesContext) std.mem.Allocator.Error!Result { + const lib = libarchive.lib; + const archive = lib.Archive.readNew(); + defer _ = archive.readFree(); + configureArchiveReader(archive); + + if (archive.readOpenMemory(this.store.sharedView()) != .ok) { + return if (cloneErrorString(archive)) |err| .{ .libarchive_err = err } else .{ .err = error.ReadError }; + } + + var entries: FileEntryList = .empty; + errdefer { + for (entries.items) |e| { + bun.default_allocator.free(e.path); + if (e.data.len > 0) bun.default_allocator.free(e.data); + } + entries.deinit(bun.default_allocator); + } + + var entry: *lib.Archive.Entry = undefined; + while (archive.readNextHeader(&entry) == .ok) { + if (entry.filetype() != @intFromEnum(lib.FileType.regular)) continue; + + const pathname = entry.pathnameUtf8(); + if (this.glob_pattern) |pattern| { + if (!bun.glob.match(pattern, pathname).matches()) continue; + } + + const size: usize = @intCast(@max(entry.size(), 0)); + const mtime = entry.mtime(); + + // Read data first before allocating path + var data: []u8 = &.{}; + if (size > 0) { + data = try bun.default_allocator.alloc(u8, size); + var total_read: usize = 0; + while (total_read < size) { + const read = archive.readData(data[total_read..]); + if (read < 0) { + // Read error - not an allocation error, must free manually + bun.default_allocator.free(data); + return if (cloneErrorString(archive)) |err| .{ .libarchive_err = err } else .{ .err = error.ReadError }; + } + if (read == 0) break; + total_read += @intCast(read); + } + } + errdefer if (data.len > 0) bun.default_allocator.free(data); + + const path_copy = try bun.default_allocator.dupe(u8, pathname); + errdefer bun.default_allocator.free(path_copy); + + try entries.append(bun.default_allocator, .{ .path = path_copy, .data = data, .mtime = mtime }); + } + + return .{ .success = entries }; + } + + fn runFromJS(this: *FilesContext, globalThis: *jsc.JSGlobalObject) bun.JSError!PromiseResult { + switch (this.result) { + .success => |*entries| { + const map = jsc.JSMap.create(globalThis); + const map_ptr = jsc.JSMap.fromJS(map) orelse { + return .{ .reject = globalThis.createErrorInstance("Failed to create Map", .{}) }; + }; + + for (entries.items) |*entry| { + const blob_ptr = jsc.WebCore.Blob.new(jsc.WebCore.Blob.createWithBytesAndAllocator(entry.data, bun.default_allocator, globalThis, false)); + entry.data = &.{}; // Ownership transferred + blob_ptr.is_jsdom_file = true; + blob_ptr.name = bun.String.cloneUTF8(entry.path); + blob_ptr.last_modified = @floatFromInt(entry.mtime * 1000); + + try map_ptr.set(globalThis, blob_ptr.name.toJS(globalThis), blob_ptr.toJS(globalThis)); + } + + return .{ .resolve = map }; + }, + .libarchive_err => |err_msg| return .{ .reject = globalThis.createErrorInstance("{s}", .{err_msg}) }, + .err => |e| return .{ .reject = globalThis.createErrorInstance("{s}", .{@errorName(e)}) }, + } + } + + fn deinit(this: *FilesContext) void { + this.result.deinit(); + this.store.deref(); + if (this.glob_pattern) |p| bun.default_allocator.free(p); + } +}; + +pub const FilesTask = AsyncTask(FilesContext); + +fn startFilesTask(globalThis: *jsc.JSGlobalObject, store: *jsc.WebCore.Blob.Store, glob_pattern: ?[]const u8) bun.JSError!jsc.JSValue { + store.ref(); + errdefer store.deref(); + errdefer if (glob_pattern) |p| bun.default_allocator.free(p); + + const task = try FilesTask.create(globalThis, .{ + .store = store, + .glob_pattern = glob_pattern, + }); + + const promise_js = task.promise.value(); + task.schedule(); + return promise_js; +} + +// ============================================================================ +// Helpers +// ============================================================================ + +fn compressGzip(data: []const u8) ![]u8 { + libdeflate.load(); + + const compressor = libdeflate.Compressor.alloc(6) orelse return error.GzipInitFailed; + defer compressor.deinit(); + + const max_size = compressor.maxBytesNeeded(data, .gzip); + + // Use stack buffer for small data, heap for large + const stack_threshold = 256 * 1024; + var stack_buf: [stack_threshold]u8 = undefined; + + if (max_size <= stack_threshold) { + const result = compressor.gzip(data, &stack_buf); + if (result.status != .success) return error.GzipCompressFailed; + return bun.default_allocator.dupe(u8, stack_buf[0..result.written]); + } + + const output = try bun.default_allocator.alloc(u8, max_size); + errdefer bun.default_allocator.free(output); + + const result = compressor.gzip(data, output); + if (result.status != .success) return error.GzipCompressFailed; + + return bun.default_allocator.realloc(output, result.written) catch output[0..result.written]; +} + +const libarchive = @import("../../libarchive/libarchive.zig"); +const libdeflate = @import("../../deps/libdeflate.zig"); +const std = @import("std"); + +const bun = @import("bun"); +const jsc = bun.jsc; diff --git a/src/bun.js/api/BunObject.zig b/src/bun.js/api/BunObject.zig index 53322882a2..a85472f191 100644 --- a/src/bun.js/api/BunObject.zig +++ b/src/bun.js/api/BunObject.zig @@ -49,6 +49,7 @@ pub const BunObject = struct { // --- Callbacks --- // --- Lazy property callbacks --- + pub const Archive = toJSLazyPropertyCallback(Bun.getArchiveConstructor); pub const CryptoHasher = toJSLazyPropertyCallback(Crypto.CryptoHasher.getter); pub const CSRF = toJSLazyPropertyCallback(Bun.getCSRFObject); pub const FFI = toJSLazyPropertyCallback(Bun.FFIObject.getter); @@ -116,6 +117,7 @@ pub const BunObject = struct { } // --- Lazy property callbacks --- + @export(&BunObject.Archive, .{ .name = lazyPropertyCallbackName("Archive") }); @export(&BunObject.CryptoHasher, .{ .name = lazyPropertyCallbackName("CryptoHasher") }); @export(&BunObject.CSRF, .{ .name = lazyPropertyCallbackName("CSRF") }); @export(&BunObject.FFI, .{ .name = lazyPropertyCallbackName("FFI") }); @@ -1273,6 +1275,10 @@ pub fn getYAMLObject(globalThis: *jsc.JSGlobalObject, _: *jsc.JSObject) jsc.JSVa return YAMLObject.create(globalThis); } +pub fn getArchiveConstructor(globalThis: *jsc.JSGlobalObject, _: *jsc.JSObject) jsc.JSValue { + return jsc.API.Archive.js.getConstructor(globalThis); +} + pub fn getGlobConstructor(globalThis: *jsc.JSGlobalObject, _: *jsc.JSObject) jsc.JSValue { return jsc.API.Glob.js.getConstructor(globalThis); } diff --git a/src/bun.js/bindings/BunObject+exports.h b/src/bun.js/bindings/BunObject+exports.h index d2fc9d55ed..c366d7fb43 100644 --- a/src/bun.js/bindings/BunObject+exports.h +++ b/src/bun.js/bindings/BunObject+exports.h @@ -3,6 +3,7 @@ // --- Getters --- #define FOR_EACH_GETTER(macro) \ + macro(Archive) \ macro(CSRF) \ macro(CryptoHasher) \ macro(FFI) \ diff --git a/src/bun.js/bindings/BunObject.cpp b/src/bun.js/bindings/BunObject.cpp index 5400dc906a..bd6a34802a 100644 --- a/src/bun.js/bindings/BunObject.cpp +++ b/src/bun.js/bindings/BunObject.cpp @@ -712,6 +712,7 @@ JSC_DEFINE_HOST_FUNCTION(functionFileURLToPath, (JSC::JSGlobalObject * globalObj /* Source for BunObject.lut.h @begin bunObjectTable $ constructBunShell DontDelete|PropertyCallback + Archive BunObject_lazyPropCb_wrap_Archive DontDelete|PropertyCallback ArrayBufferSink BunObject_lazyPropCb_wrap_ArrayBufferSink DontDelete|PropertyCallback Cookie constructCookieObject DontDelete|ReadOnly|PropertyCallback CookieMap constructCookieMapObject DontDelete|ReadOnly|PropertyCallback diff --git a/src/bun.js/bindings/generated_classes_list.zig b/src/bun.js/bindings/generated_classes_list.zig index 86c9347868..84e1839b67 100644 --- a/src/bun.js/bindings/generated_classes_list.zig +++ b/src/bun.js/bindings/generated_classes_list.zig @@ -1,4 +1,5 @@ pub const Classes = struct { + pub const Archive = api.Archive; pub const Blob = webcore.Blob; pub const HTMLRewriter = api.HTMLRewriter.HTMLRewriter; pub const Element = api.HTMLRewriter.Element; diff --git a/src/bun.js/event_loop/Task.zig b/src/bun.js/event_loop/Task.zig index b27b500eb7..bb432256eb 100644 --- a/src/bun.js/event_loop/Task.zig +++ b/src/bun.js/event_loop/Task.zig @@ -5,6 +5,10 @@ pub const Task = TaggedPointerUnion(.{ Access, AnyTask, AppendFile, + ArchiveExtractTask, + ArchiveBlobTask, + ArchiveWriteTask, + ArchiveFilesTask, AsyncGlobWalkTask, AsyncTransformTask, bun.bake.DevServer.HotReloadEvent, @@ -133,6 +137,22 @@ pub fn tickQueueWithCount(this: *EventLoop, virtual_machine: *VirtualMachine, co log("run {s}", .{@tagName(task.tag())}); defer counter.* += 1; switch (task.tag()) { + @field(Task.Tag, @typeName(ArchiveExtractTask)) => { + var archive_task: *ArchiveExtractTask = task.get(ArchiveExtractTask).?; + try archive_task.runFromJS(); + }, + @field(Task.Tag, @typeName(ArchiveBlobTask)) => { + var archive_task: *ArchiveBlobTask = task.get(ArchiveBlobTask).?; + try archive_task.runFromJS(); + }, + @field(Task.Tag, @typeName(ArchiveWriteTask)) => { + var archive_task: *ArchiveWriteTask = task.get(ArchiveWriteTask).?; + try archive_task.runFromJS(); + }, + @field(Task.Tag, @typeName(ArchiveFilesTask)) => { + var archive_task: *ArchiveFilesTask = task.get(ArchiveFilesTask).?; + try archive_task.runFromJS(); + }, @field(Task.Tag, @typeName(ShellAsync)) => { var shell_ls_task: *ShellAsync = task.get(ShellAsync).?; shell_ls_task.runFromMainThread(); @@ -616,6 +636,11 @@ const NativeZstd = jsc.API.NativeZstd; const AsyncGlobWalkTask = jsc.API.Glob.WalkTask.AsyncGlobWalkTask; const AsyncTransformTask = jsc.API.JSTranspiler.TransformTask.AsyncTransformTask; +const ArchiveBlobTask = jsc.API.Archive.BlobTask; +const ArchiveExtractTask = jsc.API.Archive.ExtractTask; +const ArchiveFilesTask = jsc.API.Archive.FilesTask; +const ArchiveWriteTask = jsc.API.Archive.WriteTask; + const Timer = jsc.API.Timer; const ImmediateObject = Timer.ImmediateObject; const TimeoutObject = Timer.TimeoutObject; diff --git a/src/libarchive/libarchive-bindings.zig b/src/libarchive/libarchive-bindings.zig index cac5fd0628..32cdbc8142 100644 --- a/src/libarchive/libarchive-bindings.zig +++ b/src/libarchive/libarchive-bindings.zig @@ -1,11 +1,11 @@ const wchar_t = u16; // Match libarchive's platform-specific type definitions -const la_int64_t = i64; -const la_ssize_t = isize; +pub const la_int64_t = i64; +pub const la_ssize_t = isize; -const struct_archive = opaque {}; -const struct_archive_entry = opaque {}; +pub const struct_archive = opaque {}; +pub const struct_archive_entry = opaque {}; // const time_t = @import("std").c.time_t; pub const FileType = enum(mode_t) { @@ -658,12 +658,123 @@ pub const Archive = opaque { } extern fn archive_read_data(*Archive, ?*anyopaque, usize) isize; + + pub const Block = struct { + bytes: []const u8 = "", + offset: i64, + result: Result, + }; + pub fn next(archive: *Archive, offset: *i64) ?Block { + var buff: *const anyopaque = undefined; + var size: usize = 0; + const r = archive_read_data_block(@ptrCast(archive), @ptrCast(&buff), &size, offset); + if (r == Result.eof) return null; + if (r != Result.ok) return .{ .offset = offset.*, .result = r }; + const ptr: [*]const u8 = @ptrCast(buff); + return .{ .bytes = ptr[0..size], .offset = offset.*, .result = r }; + } + pub fn readData(archive: *Archive, buf: []u8) isize { return archive_read_data(archive, buf.ptr, buf.len); } extern fn archive_read_data_into_fd(*Archive, fd: c_int) Result; - pub fn readDataIntoFd(archive: *Archive, fd: c_int) Result { - return archive_read_data_into_fd(archive, fd); + fn writeZerosToFile(file: bun.sys.File, count: usize) Result { + // Use undefined + memset instead of comptime zero-init to reduce binary size + var zero_buf: [16 * 1024]u8 = undefined; + @memset(&zero_buf, 0); + var remaining = count; + while (remaining > 0) { + const to_write = zero_buf[0..@min(remaining, zero_buf.len)]; + switch (file.writeAll(to_write)) { + .err => return Result.failed, + .result => {}, + } + remaining -= to_write.len; + } + return Result.ok; + } + + /// Reads data from the archive and writes it to the given file descriptor. + /// This is a port of libarchive's archive_read_data_into_fd with optimizations: + /// - Uses pwrite when possible to avoid needing lseek for sparse file handling + /// - Falls back to lseek + write if pwrite is not available + /// - Falls back to writing zeros if lseek is not available + /// - Truncates the file to the final size to handle trailing sparse holes + pub fn readDataIntoFd(archive: *Archive, fd: bun.FileDescriptor, can_use_pwrite: *bool, can_use_lseek: *bool) Result { + var target_offset: i64 = 0; // Updated by archive.next() - where this block should be written + var actual_offset: i64 = 0; // Where we've actually written to (for write() path) + var final_offset: i64 = 0; // Track the furthest point we need the file to extend to + const file = bun.sys.File{ .handle = fd }; + + while (archive.next(&target_offset)) |block| { + if (block.result != Result.ok) { + return block.result; + } + const data = block.bytes; + + // Track the furthest point we need to write to (for final truncation) + final_offset = @max(final_offset, block.offset + @as(i64, @intCast(data.len))); + + if (comptime bun.Environment.isPosix) { + // Try pwrite first - it handles sparse files without needing lseek + if (can_use_pwrite.*) { + switch (file.pwriteAll(data, block.offset)) { + .err => { + can_use_pwrite.* = false; + bun.Output.debugWarn("libarchive: falling back to write() after pwrite() failure", .{}); + // Fall through to lseek+write path + }, + .result => { + // pwrite doesn't update file position, but track logical position for fallback + actual_offset = @max(actual_offset, block.offset + @as(i64, @intCast(data.len))); + continue; + }, + } + } + } + + // Handle mismatch between actual position and target position + if (block.offset != actual_offset) seek: { + if (can_use_lseek.*) { + switch (bun.sys.setFileOffset(fd, @intCast(block.offset))) { + .err => can_use_lseek.* = false, + .result => { + actual_offset = block.offset; + break :seek; + }, + } + } + + // lseek failed or not available + if (block.offset > actual_offset) { + // Write zeros to fill the gap + const zero_count: usize = @intCast(block.offset - actual_offset); + const zero_result = writeZerosToFile(file, zero_count); + if (zero_result != Result.ok) { + return zero_result; + } + actual_offset = block.offset; + } else { + // Can't seek backward without lseek + return Result.failed; + } + } + + switch (file.writeAll(data)) { + .err => return Result.failed, + .result => { + actual_offset += @intCast(data.len); + }, + } + } + + // Handle trailing sparse hole by truncating file to final size + // This extends the file to include any trailing zeros without actually writing them + if (final_offset > actual_offset) { + _ = bun.sys.ftruncate(fd, final_offset); + } + + return Result.ok; } extern fn archive_read_support_filter_all(*Archive) Result; @@ -821,6 +932,9 @@ pub const Archive = opaque { pub fn size(entry: *Entry) i64 { return archive_entry_size(entry); } + pub fn mtime(entry: *Entry) i64 { + return @intCast(archive_entry_mtime(@ptrCast(entry))); + } extern fn archive_entry_symlink(*Entry) [*c]const u8; pub fn symlink(entry: *Entry) [:0]const u8 { return bun.sliceTo(archive_entry_symlink(entry), 0); @@ -1010,7 +1124,7 @@ pub extern fn archive_read_header_position(*struct_archive) la_int64_t; pub extern fn archive_read_has_encrypted_entries(*struct_archive) c_int; pub extern fn archive_read_format_capabilities(*struct_archive) c_int; pub extern fn archive_seek_data(*struct_archive, la_int64_t, c_int) la_int64_t; -pub extern fn archive_read_data_block(a: *struct_archive, buff: [*c]*const anyopaque, size: [*c]usize, offset: [*c]la_int64_t) c_int; +pub extern fn archive_read_data_block(a: *struct_archive, buff: [*c]*const anyopaque, size: [*c]usize, offset: [*c]la_int64_t) Archive.Result; pub extern fn archive_read_data_skip(*struct_archive) c_int; pub extern fn archive_read_set_format_option(_a: *struct_archive, m: [*c]const u8, o: [*c]const u8, v: [*c]const u8) c_int; pub extern fn archive_read_set_filter_option(_a: *struct_archive, m: [*c]const u8, o: [*c]const u8, v: [*c]const u8) c_int; @@ -1340,6 +1454,48 @@ pub const ARCHIVE_ENTRY_ACL_STYLE_COMPACT = @as(c_int, 0x00000010); pub const OLD_ARCHIVE_ENTRY_ACL_STYLE_EXTRA_ID = @as(c_int, 1024); pub const OLD_ARCHIVE_ENTRY_ACL_STYLE_MARK_DEFAULT = @as(c_int, 2048); +/// Growing memory buffer for archive writes with libarchive callbacks +pub const GrowingBuffer = struct { + list: std.ArrayListUnmanaged(u8) = .empty, + allocator: std.mem.Allocator, + had_error: bool = false, + + pub fn init(allocator: std.mem.Allocator) GrowingBuffer { + return .{ .allocator = allocator }; + } + + pub fn deinit(self: *GrowingBuffer) void { + self.list.deinit(self.allocator); + } + + pub fn toOwnedSlice(self: *GrowingBuffer) error{OutOfMemory}![]u8 { + if (self.had_error) return error.OutOfMemory; + return self.list.toOwnedSlice(self.allocator); + } + + pub fn openCallback(_: *struct_archive, client_data: *anyopaque) callconv(.c) c_int { + const self: *GrowingBuffer = @ptrCast(@alignCast(client_data)); + self.list.clearRetainingCapacity(); + self.had_error = false; + return 0; + } + + pub fn writeCallback(_: *struct_archive, client_data: *anyopaque, buff: ?*const anyopaque, length: usize) callconv(.c) la_ssize_t { + const self: *GrowingBuffer = @ptrCast(@alignCast(client_data)); + if (buff == null or length == 0) return 0; + const data: [*]const u8 = @ptrCast(buff.?); + self.list.appendSlice(self.allocator, data[0..length]) catch { + self.had_error = true; + return -1; + }; + return @intCast(length); + } + + pub fn closeCallback(_: *struct_archive, _: *anyopaque) callconv(.c) c_int { + return 0; + } +}; + const std = @import("std"); const bun = @import("bun"); diff --git a/src/libarchive/libarchive.zig b/src/libarchive/libarchive.zig index 8291f9a012..a9ffd6c6d6 100644 --- a/src/libarchive/libarchive.zig +++ b/src/libarchive/libarchive.zig @@ -29,10 +29,7 @@ pub const BufferReadStream = struct { pub fn deinit(this: *BufferReadStream) void { _ = this.archive.readClose(); - // don't free it if we never actually read it - // if (this.reading) { - // _ = lib.archive_read_free(this.archive); - // } + _ = this.archive.readFree(); } pub fn openRead(this: *BufferReadStream) Archive.Result { @@ -319,6 +316,8 @@ pub const Archiver = struct { const dir_fd = dir.fd; var normalized_buf: bun.OSPathBuffer = undefined; + var use_pwrite = Environment.isPosix; + var use_lseek = true; loop: while (true) { const r = archive.readNextHeader(&entry); @@ -510,6 +509,7 @@ pub const Archiver = struct { }; const size: usize = @intCast(@max(entry.size(), 0)); + if (size > 0) { if (ctx) |ctx_| { const hash: u64 = if (ctx_.pluckers.len > 0) @@ -550,8 +550,9 @@ pub const Archiver = struct { } var retries_remaining: u8 = 5; + possibly_retry: while (retries_remaining != 0) : (retries_remaining -= 1) { - switch (archive.readDataIntoFd(file_handle.uv())) { + switch (archive.readDataIntoFd(file_handle, &use_pwrite, &use_lseek)) { .eof => break :loop, .ok => break :possibly_retry, .retry => { diff --git a/src/sys.zig b/src/sys.zig index 4bf88c0f3d..0c0d23f3de 100644 --- a/src/sys.zig +++ b/src/sys.zig @@ -2146,13 +2146,13 @@ pub fn pidfd_open(pid: std.os.linux.pid_t, flags: u32) Maybe(i32) { pub fn lseek(fd: bun.FileDescriptor, offset: i64, whence: usize) Maybe(usize) { while (true) { - const rc = syscall.lseek(fd.cast(), offset, whence); + const rc = syscall.lseek(fd.cast(), offset, @intCast(whence)); if (Maybe(usize).errnoSysFd(rc, .lseek, fd)) |err| { if (err.getErrno() == .INTR) continue; return err; } - return Maybe(usize){ .result = rc }; + return Maybe(usize){ .result = @intCast(rc) }; } } diff --git a/src/sys/File.zig b/src/sys/File.zig index 17a57da3b9..248a485768 100644 --- a/src/sys/File.zig +++ b/src/sys/File.zig @@ -95,6 +95,26 @@ pub fn readAll(self: File, buf: []u8) Maybe(usize) { return sys.readAll(self.handle, buf); } +pub fn pwriteAll(self: File, buf: []const u8, initial_offset: i64) Maybe(void) { + var remain = buf; + var offset = initial_offset; + while (remain.len > 0) { + const rc = sys.pwrite(self.handle, remain, offset); + switch (rc) { + .err => |err| return .{ .err = err }, + .result => |amt| { + if (amt == 0) { + return .success; + } + remain = remain[amt..]; + offset += @intCast(amt); + }, + } + } + + return .success; +} + pub fn writeAll(self: File, buf: []const u8) Maybe(void) { var remain = buf; while (remain.len > 0) { diff --git a/test/js/bun/archive-extract-leak-repro.ts b/test/js/bun/archive-extract-leak-repro.ts new file mode 100644 index 0000000000..acdd63c2a9 --- /dev/null +++ b/test/js/bun/archive-extract-leak-repro.ts @@ -0,0 +1,33 @@ +// Minimal reproduction of memory leak in Bun.Archive.extract() +// Run with: bun run test/js/bun/archive-extract-leak-repro.ts + +import { mkdtempSync, rmSync } from "fs"; +import { tmpdir } from "os"; +import { join } from "path"; + +const dir = mkdtempSync(join(tmpdir(), "archive-leak-")); + +const files = { + "a.txt": "hello", + "b.txt": "world", +}; + +const archive = Bun.Archive.from(files); + +function formatMB(bytes: number) { + return (bytes / 1024 / 1024).toFixed(0) + " MB"; +} + +console.log("Extracting archive 10,000 times per round...\n"); + +for (let round = 0; round < 20; round++) { + for (let i = 0; i < 10_000; i++) { + await archive.extract(dir); + } + + Bun.gc(true); + const rss = process.memoryUsage.rss(); + console.log(`Round ${round + 1}: RSS = ${formatMB(rss)}`); +} + +rmSync(dir, { recursive: true }); diff --git a/test/js/bun/archive.test.ts b/test/js/bun/archive.test.ts new file mode 100644 index 0000000000..cc99f0c0f6 --- /dev/null +++ b/test/js/bun/archive.test.ts @@ -0,0 +1,1211 @@ +import { describe, expect, test } from "bun:test"; +import { tempDir } from "harness"; +import { join } from "path"; + +describe("Bun.Archive", () => { + describe("Archive.from", () => { + test("creates archive from object with string values", async () => { + const archive = Bun.Archive.from({ + "hello.txt": "Hello, World!", + "data.json": JSON.stringify({ foo: "bar" }), + }); + + expect(archive).toBeInstanceOf(Bun.Archive); + }); + + test("creates archive from object with Blob values", async () => { + const archive = Bun.Archive.from({ + "blob1.txt": new Blob(["Hello from Blob"]), + "blob2.txt": new Blob(["Another Blob"]), + }); + + expect(archive).toBeInstanceOf(Bun.Archive); + }); + + test("creates archive from object with Uint8Array values", async () => { + const encoder = new TextEncoder(); + const archive = Bun.Archive.from({ + "bytes1.txt": encoder.encode("Hello from Uint8Array"), + "bytes2.txt": encoder.encode("Another Uint8Array"), + }); + + expect(archive).toBeInstanceOf(Bun.Archive); + }); + + test("creates archive from object with ArrayBuffer values", async () => { + const encoder = new TextEncoder(); + const archive = Bun.Archive.from({ + "buffer1.txt": encoder.encode("Hello from ArrayBuffer").buffer, + "buffer2.txt": encoder.encode("Another ArrayBuffer").buffer, + }); + + expect(archive).toBeInstanceOf(Bun.Archive); + }); + + test("creates archive from object with mixed value types", async () => { + const encoder = new TextEncoder(); + const archive = Bun.Archive.from({ + "string.txt": "String content", + "blob.txt": new Blob(["Blob content"]), + "uint8.txt": encoder.encode("Uint8Array content"), + "buffer.txt": encoder.encode("ArrayBuffer content").buffer, + }); + + expect(archive).toBeInstanceOf(Bun.Archive); + }); + + test("creates archive from Blob", async () => { + // First create an archive with some content + const sourceArchive = Bun.Archive.from({ + "test.txt": "test content", + }); + + const blob = await sourceArchive.blob(); + expect(blob).toBeInstanceOf(Blob); + + // Create new archive from the blob + const archive = Bun.Archive.from(blob); + expect(archive).toBeInstanceOf(Bun.Archive); + }); + + test("creates archive from ArrayBuffer", async () => { + const sourceArchive = Bun.Archive.from({ + "test.txt": "test content", + }); + + const bytes = await sourceArchive.bytes(); + const buffer = bytes.buffer; + + const archive = Bun.Archive.from(buffer); + expect(archive).toBeInstanceOf(Bun.Archive); + }); + + test("creates archive from Uint8Array", async () => { + const sourceArchive = Bun.Archive.from({ + "test.txt": "test content", + }); + + const bytes = await sourceArchive.bytes(); + + const archive = Bun.Archive.from(bytes); + expect(archive).toBeInstanceOf(Bun.Archive); + }); + + test("creates archive with nested directory structure", async () => { + const archive = Bun.Archive.from({ + "root.txt": "Root file", + "dir1/file1.txt": "File in dir1", + "dir1/dir2/file2.txt": "File in dir1/dir2", + "dir1/dir2/dir3/file3.txt": "File in dir1/dir2/dir3", + }); + + expect(archive).toBeInstanceOf(Bun.Archive); + }); + + test("creates archive with empty string value", async () => { + const archive = Bun.Archive.from({ + "empty.txt": "", + }); + + expect(archive).toBeInstanceOf(Bun.Archive); + }); + + test("throws with no arguments", () => { + expect(() => { + // @ts-expect-error - testing runtime behavior + Bun.Archive.from(); + }).toThrow(); + }); + + test("throws with invalid input type (number)", () => { + expect(() => { + // @ts-expect-error - testing runtime behavior + Bun.Archive.from(123); + }).toThrow(); + }); + + test("throws with invalid input type (null)", () => { + expect(() => { + // @ts-expect-error - testing runtime behavior + Bun.Archive.from(null); + }).toThrow(); + }); + + test("converts non-string/buffer values to strings", async () => { + // @ts-expect-error - testing runtime behavior + const archive = Bun.Archive.from({ "file.txt": 123 }); + // The archive should be created successfully - number is converted to string + expect(archive).toBeDefined(); + const bytes = await archive.bytes(); + // Should contain "123" somewhere in the tarball + expect(new TextDecoder().decode(bytes)).toContain("123"); + }); + }); + + describe("archive.blob()", () => { + test("returns a Blob", async () => { + const archive = Bun.Archive.from({ + "hello.txt": "Hello, World!", + }); + + const blob = await archive.blob(); + expect(blob).toBeInstanceOf(Blob); + expect(blob.size).toBeGreaterThan(0); + }); + + test("returns consistent output for same input", async () => { + const archive = Bun.Archive.from({ + "hello.txt": "Hello, World!", + }); + + const blob1 = await archive.blob(); + const blob2 = await archive.blob(); + expect(blob1.size).toBe(blob2.size); + }); + + test("with gzip returns gzipped blob", async () => { + const archive = Bun.Archive.from({ + "hello.txt": "Hello, World!", + }); + + const regularBlob = await archive.blob(); + const gzippedBlob = await archive.blob("gzip"); + + expect(gzippedBlob).toBeInstanceOf(Blob); + // Gzipped should be different size + expect(gzippedBlob.size).not.toBe(regularBlob.size); + }); + + test("gzip is smaller for larger repetitive data", async () => { + const largeContent = Buffer.alloc(13000, "Hello, World!"); + const archive = Bun.Archive.from({ + "large.txt": largeContent, + }); + + const regularBlob = await archive.blob(); + const gzippedBlob = await archive.blob("gzip"); + + // For large repetitive data, gzip should be smaller + expect(gzippedBlob.size).toBeLessThan(regularBlob.size); + }); + + test("throws with invalid compress argument", async () => { + const archive = Bun.Archive.from({ + "hello.txt": "Hello, World!", + }); + + await expect(async () => { + // @ts-expect-error - testing runtime behavior + await archive.blob("invalid"); + }).toThrow(); + }); + }); + + describe("archive.bytes()", () => { + test("returns a Uint8Array", async () => { + const archive = Bun.Archive.from({ + "hello.txt": "Hello, World!", + }); + + const bytes = await archive.bytes(); + expect(bytes).toBeInstanceOf(Uint8Array); + expect(bytes.length).toBeGreaterThan(0); + }); + + test("returns consistent output for same input", async () => { + const archive = Bun.Archive.from({ + "hello.txt": "Hello, World!", + }); + + const bytes1 = await archive.bytes(); + const bytes2 = await archive.bytes(); + expect(bytes1.length).toBe(bytes2.length); + }); + + test("with gzip returns gzipped bytes", async () => { + const archive = Bun.Archive.from({ + "hello.txt": "Hello, World!", + }); + + const regularBytes = await archive.bytes(); + const gzippedBytes = await archive.bytes("gzip"); + + expect(gzippedBytes).toBeInstanceOf(Uint8Array); + // Gzipped should be different size + expect(gzippedBytes.length).not.toBe(regularBytes.length); + }); + + test("gzip is smaller for larger repetitive data", async () => { + const largeContent = Buffer.alloc(13000, "Hello, World!"); + const archive = Bun.Archive.from({ + "large.txt": largeContent, + }); + + const regularBytes = await archive.bytes(); + const gzippedBytes = await archive.bytes("gzip"); + + // For large repetitive data, gzip should be smaller + expect(gzippedBytes.length).toBeLessThan(regularBytes.length); + }); + + test("bytes match blob content", async () => { + const archive = Bun.Archive.from({ + "hello.txt": "Hello, World!", + }); + + const bytes = await archive.bytes(); + const blob = await archive.blob(); + const blobBytes = new Uint8Array(await blob.arrayBuffer()); + + expect(bytes.length).toBe(blobBytes.length); + for (let i = 0; i < bytes.length; i++) { + expect(bytes[i]).toBe(blobBytes[i]); + } + }); + + test("throws with invalid compress argument", async () => { + const archive = Bun.Archive.from({ + "hello.txt": "Hello, World!", + }); + + await expect(async () => { + // @ts-expect-error - testing runtime behavior + await archive.bytes("deflate"); + }).toThrow(); + }); + }); + + describe("archive.extract()", () => { + test("extracts to directory and returns file count", async () => { + using dir = tempDir("archive-extract-test", {}); + + const archive = Bun.Archive.from({ + "hello.txt": "Hello, World!", + "subdir/nested.txt": "Nested content", + }); + + const count = await archive.extract(String(dir)); + expect(count).toBeGreaterThan(0); + + // Verify files were extracted + const helloContent = await Bun.file(join(String(dir), "hello.txt")).text(); + expect(helloContent).toBe("Hello, World!"); + }); + + test("extracts nested directory structure", async () => { + using dir = tempDir("archive-extract-nested", {}); + + const archive = Bun.Archive.from({ + "root.txt": "Root file", + "dir1/file1.txt": "File in dir1", + "dir1/dir2/file2.txt": "File in dir1/dir2", + "dir1/dir2/dir3/file3.txt": "File in dir1/dir2/dir3", + }); + + const count = await archive.extract(String(dir)); + expect(count).toBeGreaterThan(0); + + // Verify all files were extracted + expect(await Bun.file(join(String(dir), "root.txt")).text()).toBe("Root file"); + expect(await Bun.file(join(String(dir), "dir1/file1.txt")).text()).toBe("File in dir1"); + expect(await Bun.file(join(String(dir), "dir1/dir2/file2.txt")).text()).toBe("File in dir1/dir2"); + expect(await Bun.file(join(String(dir), "dir1/dir2/dir3/file3.txt")).text()).toBe("File in dir1/dir2/dir3"); + }); + + test("extracts binary data correctly", async () => { + using dir = tempDir("archive-extract-binary", {}); + + const binaryData = new Uint8Array([0, 1, 2, 255, 254, 253, 128, 127]); + const archive = Bun.Archive.from({ + "binary.bin": binaryData, + }); + + await archive.extract(String(dir)); + + const extractedBytes = new Uint8Array(await Bun.file(join(String(dir), "binary.bin")).arrayBuffer()); + expect(extractedBytes.length).toBe(binaryData.length); + for (let i = 0; i < binaryData.length; i++) { + expect(extractedBytes[i]).toBe(binaryData[i]); + } + }); + + test("extracts from archive created from blob", async () => { + using dir = tempDir("archive-extract-from-blob", {}); + + // Create original archive + const sourceArchive = Bun.Archive.from({ + "test.txt": "test content", + }); + + // Get as blob and create new archive + const blob = await sourceArchive.blob(); + const archive = Bun.Archive.from(blob); + + const count = await archive.extract(String(dir)); + expect(count).toBeGreaterThan(0); + + const content = await Bun.file(join(String(dir), "test.txt")).text(); + expect(content).toBe("test content"); + }); + + test("extracts from archive created from bytes", async () => { + using dir = tempDir("archive-extract-from-bytes", {}); + + // Create original archive + const sourceArchive = Bun.Archive.from({ + "test.txt": "test content", + }); + + // Get as bytes and create new archive + const bytes = await sourceArchive.bytes(); + const archive = Bun.Archive.from(bytes); + + const count = await archive.extract(String(dir)); + expect(count).toBeGreaterThan(0); + + const content = await Bun.file(join(String(dir), "test.txt")).text(); + expect(content).toBe("test content"); + }); + + test("throws with missing path argument", async () => { + const archive = Bun.Archive.from({ + "hello.txt": "Hello, World!", + }); + + await expect(async () => { + // @ts-expect-error - testing runtime behavior + await archive.extract(); + }).toThrow(); + }); + + test("throws with non-string path argument", async () => { + const archive = Bun.Archive.from({ + "hello.txt": "Hello, World!", + }); + + await expect(async () => { + // @ts-expect-error - testing runtime behavior + await archive.extract(123); + }).toThrow(); + }); + + test("creates directory if it doesn't exist", async () => { + using dir = tempDir("archive-extract-create-dir", {}); + const newDir = join(String(dir), "new-subdir", "nested"); + + const archive = Bun.Archive.from({ + "hello.txt": "Hello, World!", + }); + + // Should create the directory and extract successfully + const count = await archive.extract(newDir); + expect(count).toBeGreaterThan(0); + + const content = await Bun.file(join(newDir, "hello.txt")).text(); + expect(content).toBe("Hello, World!"); + }); + + test("throws when extracting to a file path instead of directory", async () => { + using dir = tempDir("archive-extract-to-file", { + "existing-file.txt": "I am a file", + }); + + const archive = Bun.Archive.from({ + "hello.txt": "Hello, World!", + }); + + // Try to extract to a file path instead of directory + await expect(async () => { + await archive.extract(join(String(dir), "existing-file.txt")); + }).toThrow(); + }); + }); + + describe("corrupted archives", () => { + test("throws when extracting corrupted archive data", async () => { + // Create garbage data that's not a valid archive + const corruptedData = new Uint8Array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]); + const archive = Bun.Archive.from(corruptedData); + + using dir = tempDir("archive-corrupted", {}); + + await expect(async () => { + await archive.extract(String(dir)); + }).toThrow(); + }); + + test("throws when extracting truncated archive", async () => { + // Create a valid archive then truncate it + const validArchive = Bun.Archive.from({ + "file.txt": "Hello, World!", + }); + const bytes = await validArchive.bytes(); + + // Truncate to only first 10 bytes - definitely incomplete + const truncated = bytes.slice(0, 10); + const archive = Bun.Archive.from(truncated); + + using dir = tempDir("archive-truncated", {}); + + await expect(async () => { + await archive.extract(String(dir)); + }).toThrow(); + }); + + test("throws when extracting random bytes as archive", async () => { + // Generate random bytes + const randomBytes = new Uint8Array(1024); + for (let i = 0; i < randomBytes.length; i++) { + randomBytes[i] = Math.floor(Math.random() * 256); + } + + const archive = Bun.Archive.from(randomBytes); + + using dir = tempDir("archive-random", {}); + + await expect(async () => { + await archive.extract(String(dir)); + }).toThrow(); + }); + + test("handles empty archive gracefully", async () => { + // Empty data + const emptyData = new Uint8Array(0); + const archive = Bun.Archive.from(emptyData); + + using dir = tempDir("archive-empty", {}); + + // Should either throw or return 0 files extracted + try { + const count = await archive.extract(String(dir)); + expect(count).toBe(0); + } catch { + // Throwing is also acceptable for empty/invalid data + } + }); + }); + + describe("path safety", () => { + test("normalizes paths with redundant separators", async () => { + const archive = Bun.Archive.from({ + "dir//subdir///file.txt": "content", + }); + + using dir = tempDir("archive-path-normalize", {}); + await archive.extract(String(dir)); + + // The file should be extracted with normalized path + const content = await Bun.file(join(String(dir), "dir/subdir/file.txt")).text(); + expect(content).toBe("content"); + }); + + test("handles paths with dots correctly", async () => { + const archive = Bun.Archive.from({ + "dir/./file.txt": "content1", + "dir/subdir/../file2.txt": "content2", + }); + + using dir = tempDir("archive-path-dots", {}); + await archive.extract(String(dir)); + + // Paths should be normalized + expect(await Bun.file(join(String(dir), "dir/file.txt")).text()).toBe("content1"); + expect(await Bun.file(join(String(dir), "dir/file2.txt")).text()).toBe("content2"); + }); + + test("handles very long filenames", async () => { + // Create a filename that's quite long but within reasonable limits + const longName = "a".repeat(200) + ".txt"; + const archive = Bun.Archive.from({ + [longName]: "content", + }); + + using dir = tempDir("archive-long-name", {}); + + // Should either work or throw, but not crash + try { + await archive.extract(String(dir)); + const content = await Bun.file(join(String(dir), longName)).text(); + expect(content).toBe("content"); + } catch { + // Some filesystems don't support very long names - that's ok + } + }); + + test("handles deeply nested paths", async () => { + // Create a deeply nested path + const deepPath = Array(50).fill("dir").join("/") + "/file.txt"; + const archive = Bun.Archive.from({ + [deepPath]: "deep content", + }); + + using dir = tempDir("archive-deep-path", {}); + + // Should either work or throw, but not crash + try { + await archive.extract(String(dir)); + const content = await Bun.file(join(String(dir), deepPath)).text(); + expect(content).toBe("deep content"); + } catch { + // Very deep paths might fail on some systems - that's acceptable + } + }); + }); + + describe("Archive.write()", () => { + test("writes archive to file", async () => { + using dir = tempDir("archive-write-test", {}); + const archivePath = join(String(dir), "test.tar"); + + await Bun.Archive.write(archivePath, { + "hello.txt": "Hello, World!", + "data.json": JSON.stringify({ foo: "bar" }), + }); + + // Verify file exists + const file = Bun.file(archivePath); + expect(await file.exists()).toBe(true); + expect(file.size).toBeGreaterThan(0); + }); + + test("writes gzipped archive to file", async () => { + using dir = tempDir("archive-write-gzip-test", {}); + const archivePath = join(String(dir), "test.tar.gz"); + const largeContent = Buffer.alloc(1300, "Hello, World!"); + + await Bun.Archive.write( + archivePath, + { + "hello.txt": largeContent, + }, + "gzip", + ); + + // Verify file exists and is smaller than uncompressed + const file = Bun.file(archivePath); + expect(await file.exists()).toBe(true); + + // Compare with uncompressed + const uncompressedPath = join(String(dir), "test.tar"); + await Bun.Archive.write(uncompressedPath, { + "hello.txt": largeContent, + }); + + expect(file.size).toBeLessThan(Bun.file(uncompressedPath).size); + }); + + test("writes archive from Blob", async () => { + using dir = tempDir("archive-write-blob-test", {}); + const archivePath = join(String(dir), "test.tar"); + + // Create archive and get blob + const sourceArchive = Bun.Archive.from({ + "test.txt": "test content", + }); + const blob = await sourceArchive.blob(); + + // Write blob to file + await Bun.Archive.write(archivePath, blob); + + // Verify file exists + const file = Bun.file(archivePath); + expect(await file.exists()).toBe(true); + }); + + test("written archive can be extracted", async () => { + using dir = tempDir("archive-write-extract-test", {}); + const archivePath = join(String(dir), "test.tar"); + const extractDir = join(String(dir), "extracted"); + + // Write archive + await Bun.Archive.write(archivePath, { + "hello.txt": "Hello from write!", + "subdir/nested.txt": "Nested content from write", + }); + + // Extract it + const blob = await Bun.file(archivePath).bytes(); + const archive = Bun.Archive.from(blob); + require("fs").mkdirSync(extractDir, { recursive: true }); + const count = await archive.extract(extractDir); + expect(count).toBeGreaterThan(0); + + // Verify contents + expect(await Bun.file(join(extractDir, "hello.txt")).text()).toBe("Hello from write!"); + expect(await Bun.file(join(extractDir, "subdir/nested.txt")).text()).toBe("Nested content from write"); + }); + + test("throws with missing arguments", async () => { + await expect(async () => { + // @ts-expect-error - testing runtime behavior + await Bun.Archive.write(); + }).toThrow(); + }); + + test("throws with only path argument", async () => { + await expect(async () => { + // @ts-expect-error - testing runtime behavior + await Bun.Archive.write("/tmp/test.tar"); + }).toThrow(); + }); + + test("throws with invalid compress argument", async () => { + using dir = tempDir("archive-write-invalid-compress", {}); + const archivePath = join(String(dir), "test.tar"); + + await expect(async () => { + // @ts-expect-error - testing runtime behavior + await Bun.Archive.write(archivePath, { "file.txt": "content" }, "invalid"); + }).toThrow(); + }); + }); + + describe("new Archive()", () => { + test("throws when constructed directly", () => { + expect(() => { + // @ts-expect-error - testing runtime behavior + new Bun.Archive(); + }).toThrow("Archive cannot be constructed directly"); + }); + }); + + describe("GC safety", () => { + test("archive remains valid after GC", async () => { + const archive = Bun.Archive.from({ + "hello.txt": "Hello, World!", + }); + + // Force GC + Bun.gc(true); + + // Archive should still work + const blob = await archive.blob(); + expect(blob).toBeInstanceOf(Blob); + expect(blob.size).toBeGreaterThan(0); + }); + + test("archive with many entries survives GC", async () => { + const entries: Record = {}; + for (let i = 0; i < 100; i++) { + entries[`file${i}.txt`] = `Content for file ${i}`; + } + + const archive = Bun.Archive.from(entries); + + // Force GC multiple times + Bun.gc(true); + Bun.gc(true); + Bun.gc(true); + + // Archive should still work + const bytes = await archive.bytes(); + expect(bytes).toBeInstanceOf(Uint8Array); + expect(bytes.length).toBeGreaterThan(0); + }); + + test("original data mutation doesn't affect archive", async () => { + const data = new Uint8Array([1, 2, 3, 4, 5]); + const archive = Bun.Archive.from({ + "data.bin": data, + }); + + // Mutate original data + data[0] = 255; + data[1] = 255; + + // Force GC + Bun.gc(true); + + // Extract and verify original data is preserved + using dir = tempDir("archive-gc-mutation", {}); + await archive.extract(String(dir)); + + const extractedBytes = new Uint8Array(await Bun.file(join(String(dir), "data.bin")).arrayBuffer()); + expect(extractedBytes[0]).toBe(1); // Original value, not mutated + expect(extractedBytes[1]).toBe(2); // Original value, not mutated + }); + + test("blob source mutation doesn't affect archive", async () => { + const original = new Uint8Array([1, 2, 3, 4, 5]); + const blob = new Blob([original]); + const sourceArchive = Bun.Archive.from({ + "data.bin": blob, + }); + + const archiveBlob = await sourceArchive.blob(); + const archive = Bun.Archive.from(archiveBlob); + + // Force GC + Bun.gc(true); + + // Mutate original + original[0] = 255; + + // Extract and verify + using dir = tempDir("archive-gc-blob-mutation", {}); + await archive.extract(String(dir)); + + const extractedBytes = new Uint8Array(await Bun.file(join(String(dir), "data.bin")).arrayBuffer()); + expect(extractedBytes[0]).toBe(1); // Original value + }); + + test("async operations work even if archive is not referenced", async () => { + // This tests that tasks copy data instead of holding Archive reference + // If the implementation held a reference to Archive, GC could finalize it + // and cause use-after-free + + using dir = tempDir("archive-gc-no-ref", {}); + + // Create promise without keeping archive reference + const promise = Bun.Archive.from({ + "test.txt": "Hello from GC test!", + }).extract(String(dir)); + + // Force aggressive GC - the archive object is now unreferenced + Bun.gc(true); + Bun.gc(true); + + // The promise should still resolve correctly + const count = await promise; + expect(count).toBeGreaterThan(0); + + // Verify the file was extracted correctly + const content = await Bun.file(join(String(dir), "test.txt")).text(); + expect(content).toBe("Hello from GC test!"); + }); + + test("blob() works even if archive is not referenced", async () => { + // Get blob promise without keeping archive reference + const promise = Bun.Archive.from({ + "file.txt": "Blob GC test content", + }).blob(); + + // Force aggressive GC + Bun.gc(true); + Bun.gc(true); + + const blob = await promise; + expect(blob).toBeInstanceOf(Blob); + expect(blob.size).toBeGreaterThan(0); + }); + + test("bytes() works even if archive is not referenced", async () => { + // Get bytes promise without keeping archive reference + const promise = Bun.Archive.from({ + "file.txt": "Bytes GC test content", + }).bytes(); + + // Force aggressive GC + Bun.gc(true); + Bun.gc(true); + + const bytes = await promise; + expect(bytes).toBeInstanceOf(Uint8Array); + expect(bytes.length).toBeGreaterThan(0); + }); + }); + + describe("large archives", () => { + test("handles large file content", async () => { + const largeContent = Buffer.alloc(1024 * 1024, "x"); // 1MB + const archive = Bun.Archive.from({ + "large.txt": largeContent, + }); + + using dir = tempDir("archive-large", {}); + await archive.extract(String(dir)); + + const extracted = await Bun.file(join(String(dir), "large.txt")).arrayBuffer(); + expect(extracted.byteLength).toBe(largeContent.length); + }); + + test("handles many files", async () => { + const entries: Record = {}; + for (let i = 0; i < 500; i++) { + entries[`file${i.toString().padStart(4, "0")}.txt`] = `Content ${i}`; + } + + const archive = Bun.Archive.from(entries); + + using dir = tempDir("archive-many-files", {}); + const count = await archive.extract(String(dir)); + expect(count).toBeGreaterThanOrEqual(500); + }); + }); + + describe("special characters", () => { + test("handles filenames with spaces", async () => { + const archive = Bun.Archive.from({ + "file with spaces.txt": "content", + }); + + using dir = tempDir("archive-spaces", {}); + await archive.extract(String(dir)); + + const content = await Bun.file(join(String(dir), "file with spaces.txt")).text(); + expect(content).toBe("content"); + }); + + test("handles special characters in filenames", async () => { + // Note: Some unicode characters may not be supported by all tar formats + // Using ASCII-only special characters + const archive = Bun.Archive.from({ + "file-with-dash.txt": "content1", + "file_with_underscore.txt": "content2", + "file.with.dots.txt": "content3", + }); + + using dir = tempDir("archive-special-chars", {}); + await archive.extract(String(dir)); + + expect(await Bun.file(join(String(dir), "file-with-dash.txt")).text()).toBe("content1"); + expect(await Bun.file(join(String(dir), "file_with_underscore.txt")).text()).toBe("content2"); + expect(await Bun.file(join(String(dir), "file.with.dots.txt")).text()).toBe("content3"); + }); + + test("handles unicode content", async () => { + const archive = Bun.Archive.from({ + "unicode.txt": "Hello, 世界! Привет! Γειά σου!", + }); + + using dir = tempDir("archive-unicode-content", {}); + await archive.extract(String(dir)); + + const content = await Bun.file(join(String(dir), "unicode.txt")).text(); + expect(content).toBe("Hello, 世界! Привет! Γειά σου!"); + }); + }); + + describe("archive.files()", () => { + test("returns a Map of File objects", async () => { + const archive = Bun.Archive.from({ + "hello.txt": "Hello, World!", + "data.json": JSON.stringify({ foo: "bar" }), + }); + + const files = await archive.files(); + expect(files).toBeInstanceOf(Map); + expect(files.size).toBe(2); + + const helloFile = files.get("hello.txt"); + expect(helloFile).toBeInstanceOf(File); + expect(helloFile!.name).toBe("hello.txt"); + expect(await helloFile!.text()).toBe("Hello, World!"); + + const dataFile = files.get("data.json"); + expect(dataFile).toBeInstanceOf(File); + expect(dataFile!.name).toBe("data.json"); + expect(await dataFile!.text()).toBe(JSON.stringify({ foo: "bar" })); + }); + + test("returns empty Map for empty archive", async () => { + const archive = Bun.Archive.from({}); + const files = await archive.files(); + expect(files).toBeInstanceOf(Map); + expect(files.size).toBe(0); + }); + + test("handles nested directory structure", async () => { + const archive = Bun.Archive.from({ + "root.txt": "Root file", + "dir1/file1.txt": "File in dir1", + "dir1/dir2/file2.txt": "File in dir1/dir2", + }); + + const files = await archive.files(); + expect(files.size).toBe(3); + + expect(files.get("root.txt")!.name).toBe("root.txt"); + expect(files.get("dir1/file1.txt")!.name).toBe("dir1/file1.txt"); + expect(files.get("dir1/dir2/file2.txt")!.name).toBe("dir1/dir2/file2.txt"); + }); + + test("filters files with glob pattern", async () => { + const archive = Bun.Archive.from({ + "file1.txt": "Text file 1", + "file2.txt": "Text file 2", + "file1.json": "JSON file 1", + "subdir/file3.txt": "Text file 3", + }); + + const txtFiles = await archive.files("*.txt"); + expect(txtFiles.size).toBe(2); + expect(txtFiles.has("file1.txt")).toBe(true); + expect(txtFiles.has("file2.txt")).toBe(true); + expect(txtFiles.has("file1.json")).toBe(false); + expect(txtFiles.has("subdir/file3.txt")).toBe(false); + }); + + test("filters with ** glob pattern", async () => { + const archive = Bun.Archive.from({ + "file1.txt": "Text file 1", + "subdir/file2.txt": "Text file 2", + "subdir/deep/file3.txt": "Text file 3", + "other.json": "JSON file", + }); + + // **/*.txt matches all .txt files including at root level (** can match zero segments) + const allTxtFiles = await archive.files("**/*.txt"); + expect(allTxtFiles.size).toBe(3); + expect(allTxtFiles.has("file1.txt")).toBe(true); + expect(allTxtFiles.has("subdir/file2.txt")).toBe(true); + expect(allTxtFiles.has("subdir/deep/file3.txt")).toBe(true); + }); + + test("filters with directory pattern", async () => { + const archive = Bun.Archive.from({ + "src/index.js": "source 1", + "src/util.js": "source 2", + "test/index.test.js": "test 1", + "package.json": "{}", + }); + + const srcFiles = await archive.files("src/*"); + expect(srcFiles.size).toBe(2); + expect(srcFiles.has("src/index.js")).toBe(true); + expect(srcFiles.has("src/util.js")).toBe(true); + }); + + test("returns empty Map when no files match glob", async () => { + const archive = Bun.Archive.from({ + "file1.txt": "Text file", + "file2.json": "JSON file", + }); + + const xmlFiles = await archive.files("*.xml"); + expect(xmlFiles).toBeInstanceOf(Map); + expect(xmlFiles.size).toBe(0); + }); + + test("handles binary data correctly", async () => { + const binaryData = new Uint8Array([0, 1, 2, 255, 254, 253, 128, 127]); + const archive = Bun.Archive.from({ + "binary.bin": binaryData, + }); + + const files = await archive.files(); + const binaryFile = files.get("binary.bin"); + expect(binaryFile).toBeInstanceOf(File); + + const extractedBytes = new Uint8Array(await binaryFile!.arrayBuffer()); + expect(extractedBytes.length).toBe(binaryData.length); + for (let i = 0; i < binaryData.length; i++) { + expect(extractedBytes[i]).toBe(binaryData[i]); + } + }); + + test("File objects have lastModified property", async () => { + // Tar archives store mtime in seconds, so round down to nearest second + const beforeTime = Math.floor(Date.now() / 1000) * 1000; + const archive = Bun.Archive.from({ + "file.txt": "content", + }); + + const files = await archive.files(); + const file = files.get("file.txt"); + const afterTime = Date.now() + 1000; // Add 1 second for rounding tolerance + + expect(file!.lastModified).toBeGreaterThanOrEqual(beforeTime); + expect(file!.lastModified).toBeLessThanOrEqual(afterTime); + }); + + test("throws with non-string glob argument", async () => { + const archive = Bun.Archive.from({ + "file.txt": "content", + }); + + await expect(async () => { + // @ts-expect-error - testing runtime behavior + await archive.files(123); + }).toThrow(); + }); + + test("works with gzipped archive source", async () => { + const sourceArchive = Bun.Archive.from({ + "hello.txt": "Hello from gzip!", + }); + + const gzippedBlob = await sourceArchive.blob("gzip"); + const archive = Bun.Archive.from(gzippedBlob); + + const files = await archive.files(); + expect(files.size).toBe(1); + expect(await files.get("hello.txt")!.text()).toBe("Hello from gzip!"); + }); + + test("concurrent files() operations work correctly", async () => { + const archive = Bun.Archive.from({ + "file.txt": "content", + }); + + const [files1, files2, files3] = await Promise.all([archive.files(), archive.files(), archive.files()]); + + expect(files1.size).toBe(1); + expect(files2.size).toBe(1); + expect(files3.size).toBe(1); + }); + + test("files() works even if archive is not referenced (GC safety)", async () => { + const promise = Bun.Archive.from({ + "test.txt": "GC test content", + }).files(); + + Bun.gc(true); + Bun.gc(true); + + const files = await promise; + expect(files).toBeInstanceOf(Map); + expect(files.size).toBe(1); + expect(await files.get("test.txt")!.text()).toBe("GC test content"); + }); + }); + + describe("sparse files", () => { + // These test sparse tar files created with GNU tar --sparse + // They exercise the pwrite/lseek/writeZeros code paths in readDataIntoFd + const fixturesDir = join(import.meta.dir, "fixtures", "sparse-tars"); + + test("extracts sparse file with small hole (< 1 tar block)", async () => { + using dir = tempDir("sparse-small", {}); + + const tarData = await Bun.file(join(fixturesDir, "small-hole.tar")).bytes(); + const archive = Bun.Archive.from(tarData); + await archive.extract(String(dir)); + + const extracted = await Bun.file(join(String(dir), "small-hole.bin")).bytes(); + + // File structure: 64 bytes 'A', 256 bytes hole, 64 bytes 'B' + expect(extracted.length).toBe(384); + expect(extracted.slice(0, 64)).toEqual(new Uint8Array(64).fill(0x41)); + expect(extracted.slice(64, 320)).toEqual(new Uint8Array(256).fill(0)); + expect(extracted.slice(320, 384)).toEqual(new Uint8Array(64).fill(0x42)); + }); + + test("extracts sparse file with 1 tar block hole (512 bytes)", async () => { + using dir = tempDir("sparse-1block", {}); + + const tarData = await Bun.file(join(fixturesDir, "one-block-hole.tar")).bytes(); + const archive = Bun.Archive.from(tarData); + await archive.extract(String(dir)); + + const extracted = await Bun.file(join(String(dir), "one-block-hole.bin")).bytes(); + + // File structure: 100 bytes 'C', 512 bytes hole, 100 bytes 'D' + expect(extracted.length).toBe(712); + expect(extracted.slice(0, 100)).toEqual(new Uint8Array(100).fill(0x43)); + expect(extracted.slice(100, 612)).toEqual(new Uint8Array(512).fill(0)); + expect(extracted.slice(612, 712)).toEqual(new Uint8Array(100).fill(0x44)); + }); + + test("extracts sparse file with multi-block hole (5 tar blocks)", async () => { + using dir = tempDir("sparse-multi", {}); + + const tarData = await Bun.file(join(fixturesDir, "multi-block-hole.tar")).bytes(); + const archive = Bun.Archive.from(tarData); + await archive.extract(String(dir)); + + const extracted = await Bun.file(join(String(dir), "multi-block-hole.bin")).bytes(); + + // File structure: 128 bytes random, 2560 bytes hole, 128 bytes random + expect(extracted.length).toBe(2816); + // Verify the hole is zeros + expect(extracted.slice(128, 2688)).toEqual(new Uint8Array(2560).fill(0)); + }); + + test("extracts sparse file with leading hole", async () => { + using dir = tempDir("sparse-leading", {}); + + const tarData = await Bun.file(join(fixturesDir, "leading-hole.tar")).bytes(); + const archive = Bun.Archive.from(tarData); + await archive.extract(String(dir)); + + const extracted = await Bun.file(join(String(dir), "leading-hole.bin")).bytes(); + + // File structure: 2048 bytes hole, 512 bytes 'Y' + expect(extracted.length).toBe(2560); + expect(extracted.slice(0, 2048)).toEqual(new Uint8Array(2048).fill(0)); + expect(extracted.slice(2048, 2560)).toEqual(new Uint8Array(512).fill(0x59)); + }); + + test("extracts sparse file with trailing hole", async () => { + using dir = tempDir("sparse-trailing", {}); + + const tarData = await Bun.file(join(fixturesDir, "trailing-hole.tar")).bytes(); + const archive = Bun.Archive.from(tarData); + await archive.extract(String(dir)); + + const extracted = await Bun.file(join(String(dir), "trailing-hole.bin")).bytes(); + + // File structure: 256 bytes 'X', 5120 bytes hole + expect(extracted.length).toBe(5376); + expect(extracted.slice(0, 256)).toEqual(new Uint8Array(256).fill(0x58)); + expect(extracted.slice(256, 5376)).toEqual(new Uint8Array(5120).fill(0)); + }); + + test("extracts sparse file with large hole (64KB)", async () => { + using dir = tempDir("sparse-large", {}); + + const tarData = await Bun.file(join(fixturesDir, "large-hole.tar")).bytes(); + const archive = Bun.Archive.from(tarData); + await archive.extract(String(dir)); + + const extracted = await Bun.file(join(String(dir), "large-hole.bin")).bytes(); + + // File structure: 1024 bytes random, 64KB hole, 1024 bytes random + expect(extracted.length).toBe(67584); + // Verify the 64KB hole is zeros + expect(extracted.slice(1024, 66560)).toEqual(new Uint8Array(65536).fill(0)); + }); + }); + + describe("concurrent operations", () => { + test("multiple extract operations run correctly", async () => { + const archive = Bun.Archive.from({ + "file.txt": "content", + }); + + using dir1 = tempDir("archive-concurrent-1", {}); + using dir2 = tempDir("archive-concurrent-2", {}); + using dir3 = tempDir("archive-concurrent-3", {}); + + const [count1, count2, count3] = await Promise.all([ + archive.extract(String(dir1)), + archive.extract(String(dir2)), + archive.extract(String(dir3)), + ]); + + expect(count1).toBeGreaterThan(0); + expect(count2).toBeGreaterThan(0); + expect(count3).toBeGreaterThan(0); + + expect(await Bun.file(join(String(dir1), "file.txt")).text()).toBe("content"); + expect(await Bun.file(join(String(dir2), "file.txt")).text()).toBe("content"); + expect(await Bun.file(join(String(dir3), "file.txt")).text()).toBe("content"); + }); + + test("multiple blob operations run correctly", async () => { + const archive = Bun.Archive.from({ + "file.txt": "content", + }); + + const [blob1, blob2, blob3] = await Promise.all([archive.blob(), archive.blob(), archive.blob()]); + + expect(blob1.size).toBe(blob2.size); + expect(blob2.size).toBe(blob3.size); + }); + + test("mixed operations run correctly", async () => { + const archive = Bun.Archive.from({ + "file.txt": "content", + }); + + using dir = tempDir("archive-concurrent-mixed", {}); + + const [blob, bytes, count] = await Promise.all([archive.blob(), archive.bytes(), archive.extract(String(dir))]); + + expect(blob).toBeInstanceOf(Blob); + expect(bytes).toBeInstanceOf(Uint8Array); + expect(count).toBeGreaterThan(0); + }); + }); +}); diff --git a/test/js/bun/fixtures/sparse-tars/large-hole.tar b/test/js/bun/fixtures/sparse-tars/large-hole.tar new file mode 100644 index 0000000000..49db6fec36 Binary files /dev/null and b/test/js/bun/fixtures/sparse-tars/large-hole.tar differ diff --git a/test/js/bun/fixtures/sparse-tars/leading-hole.tar b/test/js/bun/fixtures/sparse-tars/leading-hole.tar new file mode 100644 index 0000000000..084ae30603 Binary files /dev/null and b/test/js/bun/fixtures/sparse-tars/leading-hole.tar differ diff --git a/test/js/bun/fixtures/sparse-tars/multi-block-hole.tar b/test/js/bun/fixtures/sparse-tars/multi-block-hole.tar new file mode 100644 index 0000000000..3acf37e8ee Binary files /dev/null and b/test/js/bun/fixtures/sparse-tars/multi-block-hole.tar differ diff --git a/test/js/bun/fixtures/sparse-tars/one-block-hole.tar b/test/js/bun/fixtures/sparse-tars/one-block-hole.tar new file mode 100644 index 0000000000..ccf1a36aeb Binary files /dev/null and b/test/js/bun/fixtures/sparse-tars/one-block-hole.tar differ diff --git a/test/js/bun/fixtures/sparse-tars/small-hole.tar b/test/js/bun/fixtures/sparse-tars/small-hole.tar new file mode 100644 index 0000000000..bc7c35cdfa Binary files /dev/null and b/test/js/bun/fixtures/sparse-tars/small-hole.tar differ diff --git a/test/js/bun/fixtures/sparse-tars/trailing-hole.tar b/test/js/bun/fixtures/sparse-tars/trailing-hole.tar new file mode 100644 index 0000000000..06df81027f Binary files /dev/null and b/test/js/bun/fixtures/sparse-tars/trailing-hole.tar differ