From 02ff16d95c487743be39795ecd69171cb08a9879 Mon Sep 17 00:00:00 2001 From: Jarred Sumner Date: Tue, 20 Aug 2024 13:05:40 -0700 Subject: [PATCH] Support Worker, relative file paths in standalone executables, and partially directories (#13421) --- docs/bundler/executables.md | 78 +++++++++++++++++ packages/bun-types/bun.d.ts | 7 ++ src/StandaloneModuleGraph.zig | 21 +++++ src/bun.js/api/BunObject.zig | 38 +++++++++ src/bun.js/bindings/BunObject+exports.h | 1 + src/bun.js/bindings/BunObject.cpp | 1 + src/bun.js/javascript.zig | 8 +- src/bun.js/web_worker.zig | 54 ++++++++++++ src/cli/build_command.zig | 8 +- src/resolver/resolver.zig | 23 ++++- test/bundler/bundler_compile.test.ts | 107 ++++++++++++++++++++++++ 11 files changed, 337 insertions(+), 9 deletions(-) diff --git a/docs/bundler/executables.md b/docs/bundler/executables.md index 2e9459279d..7a7f05693f 100644 --- a/docs/bundler/executables.md +++ b/docs/bundler/executables.md @@ -106,6 +106,31 @@ The `--minify` argument optimizes the size of the transpiled output code. If you The `--sourcemap` argument embeds a sourcemap compressed with zstd, so that errors & stacktraces point to their original locations instead of the transpiled location. Bun will automatically decompress & resolve the sourcemap when an error occurs. +## Worker + +To use workers in a standalone executable, add the worker's entrypoint to the CLI arguments: + +```sh +$ bun build --compile ./index.ts ./my-worker.ts --outfile myapp +``` + +Then, reference the worker in your code: + +```ts +console.log("Hello from Bun!"); + +// Any of these will work: +new Worker("./my-worker.ts"); +new Worker(new URL("./my-worker.ts", import.meta.url)); +new Worker(new URL("./my-worker.ts", import.meta.url).href); +``` + +As of Bun v1.1.25, when you add multiple entrypoints to a standalone executable, they will be bundled separately into the executable. + +In the future, we may automatically detect usages of statically-known paths in `new Worker(path)` and then bundle those into the executable, but for now, you'll need to add it to the shell command manually like the above example. + +If you use a relative path to a file not included in the standalone executable, it will attempt to load that path from disk relative to the current working directory of the process (and then error if it doesn't exist). + ## SQLite You can use `bun:sqlite` imports with `bun build --compile`. @@ -179,6 +204,59 @@ console.log(addon.hello()); Unfortunately, if you're using `@mapbox/node-pre-gyp` or other similar tools, you'll need to make sure the `.node` file is directly required or it won't bundle correctly. +### Embed directories + +To embed a directory with `bun build --compile`, use a shell glob in your `bun build` command: + +```sh +$ bun build --compile ./index.ts ./public/**/*.png +``` + +Then, you can reference the files in your code: + +```ts +import icon from "./public/assets/icon.png" with { type: "file" }; +import { file } from "bun"; + +export default { + fetch(req) { + // Embedded files can be streamed from Response objects + return new Response(file(icon)); + }, +}; +``` + +This is honestly a workaround, and we expect to improve this in the future with a more direct API. + +### Listing embedded files + +To get a list of all embedded files, use `Bun.embeddedFiles`: + +```js +import "./icon.png" with { type: "file" }; +import { embeddedFiles } from "bun"; + +console.log(embeddedFiles[0].name); // `icon-${hash}.png` +``` + +`Bun.embeddedFiles` returns an array of `Blob` objects which you can use to get the size, contents, and other properties of the files. + +```ts +embeddedFiles: Blob[] +``` + +The list of embedded files excludes bundled source code like `.ts` and `.js` files. + +#### Content hash + +By default, embedded files have a content hash appended to their name. This is useful for situations where you want to serve the file from a URL or CDN and have fewer cache invalidation issues. But sometimes, this is unexpected and you might want the original name instead: + +To disable the content hash, pass `--asset-naming` to `bun build --compile` like this: + +```sh +$ bun build --compile --asset-naming="[name].[ext]" ./index.ts +``` + ## Minification To trim down the size of the executable a little, pass `--minify` to `bun build --compile`. This uses Bun's minifier to reduce the code size. Overall though, Bun's binary is still way too big and we need to make it smaller. diff --git a/packages/bun-types/bun.d.ts b/packages/bun-types/bun.d.ts index 3c21ae0983..a164d81a70 100644 --- a/packages/bun-types/bun.d.ts +++ b/packages/bun-types/bun.d.ts @@ -2860,6 +2860,13 @@ declare module "bun" { // tslint:disable-next-line:unified-signatures function file(path: string | URL, options?: BlobPropertyBag): BunFile; + /** + * A list of files embedded into the standalone executable. Lexigraphically sorted by name. + * + * If the process is not a standalone executable, this returns an empty array. + */ + const embeddedFiles: ReadonlyArray; + /** * `Blob` that leverages the fastest system calls available to operate on files. * diff --git a/src/StandaloneModuleGraph.zig b/src/StandaloneModuleGraph.zig index a87827a156..9e9827aa00 100644 --- a/src/StandaloneModuleGraph.zig +++ b/src/StandaloneModuleGraph.zig @@ -35,6 +35,8 @@ pub const StandaloneModuleGraph = struct { pub const base_public_path = targetBasePublicPath(Environment.os, ""); + pub const base_public_path_with_default_suffix = targetBasePublicPath(Environment.os, "root/"); + pub fn targetBasePublicPath(target: Environment.OperatingSystem, comptime suffix: [:0]const u8) [:0]const u8 { return switch (target) { .windows => "B:/~BUN/" ++ suffix, @@ -56,6 +58,11 @@ pub const StandaloneModuleGraph = struct { if (!isBunStandaloneFilePath(base_path)) { return null; } + + return this.findAssumeStandalonePath(name); + } + + pub fn findAssumeStandalonePath(this: *const StandaloneModuleGraph, name: []const u8) ?*File { if (Environment.isWindows) { var normalized_buf: bun.PathBuffer = undefined; const normalized = bun.path.platformToPosixBuf(u8, name, &normalized_buf); @@ -90,6 +97,12 @@ pub const StandaloneModuleGraph = struct { encoding: Encoding = .binary, wtf_string: bun.String = bun.String.empty, + pub fn lessThanByIndex(ctx: []const File, lhs_i: u32, rhs_i: u32) bool { + const lhs = ctx[lhs_i]; + const rhs = ctx[rhs_i]; + return bun.strings.cmpStringsAsc({}, lhs.name, rhs.name); + } + pub fn toWTFString(this: *File) bun.String { if (this.wtf_string.isEmpty()) { switch (this.encoding) { @@ -122,8 +135,16 @@ pub const StandaloneModuleGraph = struct { b.content_type_allocated = false; } + // The real name goes here: store.data.bytes.stored_name = bun.PathString.init(this.name); + // The pretty name goes here: + if (strings.hasPrefixComptime(this.name, base_public_path_with_default_suffix)) { + b.name = bun.String.createUTF8(this.name[base_public_path_with_default_suffix.len..]); + } else if (this.name.len > 0) { + b.name = bun.String.createUTF8(this.name); + } + this.cached_blob = b; } diff --git a/src/bun.js/api/BunObject.zig b/src/bun.js/api/BunObject.zig index 895cbcd0a9..0e8b93aeea 100644 --- a/src/bun.js/api/BunObject.zig +++ b/src/bun.js/api/BunObject.zig @@ -72,6 +72,7 @@ pub const BunObject = struct { pub const stdout = toJSGetter(Bun.getStdout); pub const unsafe = toJSGetter(Bun.getUnsafe); pub const semver = toJSGetter(Bun.getSemver); + pub const embeddedFiles = toJSGetter(Bun.getEmbeddedFiles); // --- Getters --- fn getterName(comptime baseName: anytype) [:0]const u8 { @@ -131,6 +132,7 @@ pub const BunObject = struct { @export(BunObject.stdout, .{ .name = getterName("stdout") }); @export(BunObject.unsafe, .{ .name = getterName("unsafe") }); @export(BunObject.semver, .{ .name = getterName("semver") }); + @export(BunObject.embeddedFiles, .{ .name = getterName("embeddedFiles") }); // --- Getters -- // -- Callbacks -- @@ -3788,6 +3790,42 @@ pub fn getGlobConstructor( return JSC.API.Glob.getConstructor(globalThis); } +pub fn getEmbeddedFiles( + globalThis: *JSC.JSGlobalObject, + _: *JSC.JSObject, +) JSC.JSValue { + const vm = globalThis.bunVM(); + const graph = vm.standalone_module_graph orelse return JSC.JSValue.createEmptyArray(globalThis, 0); + + const unsorted_files = graph.files.values(); + var sort_indices = std.ArrayList(u32).initCapacity(bun.default_allocator, unsorted_files.len) catch bun.outOfMemory(); + defer sort_indices.deinit(); + for (0..unsorted_files.len) |index| { + // Some % of people using `bun build --compile` want to obscure the source code + // We don't really do that right now, but exposing the output source + // code here as an easily accessible Blob is even worse for them. + // So let's omit any source code files from the list. + if (unsorted_files[index].loader.isJavaScriptLike()) continue; + sort_indices.appendAssumeCapacity(@intCast(index)); + } + + var i: u32 = 0; + var array = JSC.JSValue.createEmptyArray(globalThis, sort_indices.items.len); + std.mem.sort(u32, sort_indices.items, unsorted_files, bun.StandaloneModuleGraph.File.lessThanByIndex); + for (sort_indices.items) |index| { + const file = &unsorted_files[index]; + // We call .dupe() on this to ensure that we don't return a blob that might get freed later. + const input_blob = file.blob(globalThis); + const blob = JSC.WebCore.Blob.new(input_blob.dupeWithContentType(true)); + blob.allocator = bun.default_allocator; + blob.name = input_blob.name.dupeRef(); + array.putIndex(globalThis, i, blob.toJS(globalThis)); + i += 1; + } + + return array; +} + pub fn getSemver( globalThis: *JSC.JSGlobalObject, _: *JSC.JSObject, diff --git a/src/bun.js/bindings/BunObject+exports.h b/src/bun.js/bindings/BunObject+exports.h index 2925acbfd1..8d6bd26d59 100644 --- a/src/bun.js/bindings/BunObject+exports.h +++ b/src/bun.js/bindings/BunObject+exports.h @@ -30,6 +30,7 @@ macro(stdout) \ macro(unsafe) \ macro(semver) \ + macro(embeddedFiles) \ // --- Callbacks --- #define FOR_EACH_CALLBACK(macro) \ diff --git a/src/bun.js/bindings/BunObject.cpp b/src/bun.js/bindings/BunObject.cpp index e16b635768..30d3f4ba71 100644 --- a/src/bun.js/bindings/BunObject.cpp +++ b/src/bun.js/bindings/BunObject.cpp @@ -553,6 +553,7 @@ JSC_DEFINE_HOST_FUNCTION(functionFileURLToPath, (JSC::JSGlobalObject * globalObj SHA512_256 BunObject_getter_wrap_SHA512_256 DontDelete|PropertyCallback TOML BunObject_getter_wrap_TOML DontDelete|PropertyCallback Transpiler BunObject_getter_wrap_Transpiler DontDelete|PropertyCallback + embeddedFiles BunObject_getter_wrap_embeddedFiles DontDelete|PropertyCallback allocUnsafe BunObject_callback_allocUnsafe DontDelete|Function 1 argv BunObject_getter_wrap_argv DontDelete|PropertyCallback build BunObject_callback_build DontDelete|Function 1 diff --git a/src/bun.js/javascript.zig b/src/bun.js/javascript.zig index a12e6df9b7..615932bb64 100644 --- a/src/bun.js/javascript.zig +++ b/src/bun.js/javascript.zig @@ -1861,8 +1861,14 @@ pub const VirtualMachine = struct { .handler = ModuleLoader.AsyncModule.Queue.onWakeHandler, .onDependencyError = JSC.ModuleLoader.AsyncModule.Queue.onDependencyError, }; + vm.bundler.resolver.standalone_module_graph = opts.graph; + + if (opts.graph == null) { + vm.bundler.configureLinker(); + } else { + vm.bundler.configureLinkerWithAutoJSX(false); + } - vm.bundler.configureLinker(); try vm.bundler.configureFramework(false); vm.smol = opts.smol; vm.bundler.macro_context = js_ast.Macro.MacroContext.init(&vm.bundler); diff --git a/src/bun.js/web_worker.zig b/src/bun.js/web_worker.zig index 8b1a2e9fc1..dc2cd78df0 100644 --- a/src/bun.js/web_worker.zig +++ b/src/bun.js/web_worker.zig @@ -107,6 +107,60 @@ pub const WebWorker = struct { if (graph.find(str) != null) { break :brk str; } + + // Since `bun build --compile` renames files to `.js` by + // default, we need to do the reverse of our file extension + // mapping. + // + // new Worker("./foo") -> new Worker("./foo.js") + // new Worker("./foo.ts") -> new Worker("./foo.js") + // new Worker("./foo.jsx") -> new Worker("./foo.js") + // new Worker("./foo.mjs") -> new Worker("./foo.js") + // new Worker("./foo.mts") -> new Worker("./foo.js") + // new Worker("./foo.cjs") -> new Worker("./foo.js") + // new Worker("./foo.cts") -> new Worker("./foo.js") + // new Worker("./foo.tsx") -> new Worker("./foo.js") + // + if (bun.strings.hasPrefixComptime(str, "./") or bun.strings.hasPrefixComptime(str, "../")) try_from_extension: { + var pathbuf: bun.PathBuffer = undefined; + var base = str; + + base = bun.path.joinAbsStringBuf(bun.StandaloneModuleGraph.base_public_path_with_default_suffix, &pathbuf, &.{str}, .loose); + const extname = std.fs.path.extension(base); + + // ./foo -> ./foo.js + if (extname.len == 0) { + pathbuf[base.len..][0..3].* = ".js".*; + if (graph.find(pathbuf[0 .. base.len + 3])) |js_file| { + break :brk js_file.name; + } + + break :try_from_extension; + } + + // ./foo.ts -> ./foo.js + if (bun.strings.eqlComptime(extname, ".ts")) { + pathbuf[base.len - 3 .. base.len][0..3].* = ".js".*; + if (graph.find(pathbuf[0..base.len])) |js_file| { + break :brk js_file.name; + } + + break :try_from_extension; + } + + if (extname.len == 4) { + inline for (.{ ".tsx", ".jsx", ".mjs", ".mts", ".cts", ".cjs" }) |ext| { + if (bun.strings.eqlComptime(extname, ext)) { + pathbuf[base.len - ext.len ..][0..".js".len].* = ".js".*; + const as_js = pathbuf[0 .. base.len - ext.len + ".js".len]; + if (graph.find(as_js)) |js_file| { + break :brk js_file.name; + } + break :try_from_extension; + } + } + } + } } if (JSC.WebCore.ObjectURLRegistry.isBlobURL(str)) { diff --git a/src/cli/build_command.zig b/src/cli/build_command.zig index f3b51fb118..fe3ad509b1 100644 --- a/src/cli/build_command.zig +++ b/src/cli/build_command.zig @@ -130,12 +130,6 @@ pub const BuildCommand = struct { return; } - if (this_bundler.options.entry_points.len > 1) { - Output.prettyErrorln("error: multiple entry points are not supported with --compile", .{}); - Global.exit(1); - return; - } - if (ctx.bundler_options.outdir.len > 0) { Output.prettyErrorln("error: cannot use --compile with --outdir", .{}); Global.exit(1); @@ -177,7 +171,7 @@ pub const BuildCommand = struct { } } - if (ctx.bundler_options.outdir.len == 0) { + if (ctx.bundler_options.outdir.len == 0 and !ctx.bundler_options.compile) { if (this_bundler.options.entry_points.len > 1) { Output.prettyErrorln("error: Must use --outdir when specifying more than one entry point.", .{}); Global.exit(1); diff --git a/src/resolver/resolver.zig b/src/resolver/resolver.zig index 7c5901a7f5..e87873c37f 100644 --- a/src/resolver/resolver.zig +++ b/src/resolver/resolver.zig @@ -114,6 +114,7 @@ const bufs = struct { pub threadlocal var path_in_global_disk_cache: bun.PathBuffer = undefined; pub threadlocal var abs_to_rel: bun.PathBuffer = undefined; pub threadlocal var node_modules_paths_buf: bun.PathBuffer = undefined; + pub threadlocal var import_path_for_standalone_module_graph: bun.PathBuffer = undefined; pub inline fn bufs(comptime field: std.meta.DeclEnum(@This())) *@TypeOf(@field(@This(), @tagName(field))) { return &@field(@This(), @tagName(field)); @@ -934,11 +935,13 @@ pub const Resolver = struct { // relative to our special /$bunfs/ directory. // // It's always relative to the current working directory of the project root. + // + // ...unless you pass a relative path that exists in the standalone module graph executable. var source_dir_resolver: bun.path.PosixToWinNormalizer = .{}; const source_dir_normalized = brk: { if (r.standalone_module_graph) |graph| { if (bun.StandaloneModuleGraph.isBunStandaloneFilePath(import_path)) { - if (graph.files.contains(import_path)) { + if (graph.findAssumeStandalonePath(import_path) != null) { return .{ .success = Result{ .import_kind = kind, @@ -953,6 +956,24 @@ pub const Resolver = struct { return .{ .not_found = {} }; } else if (bun.StandaloneModuleGraph.isBunStandaloneFilePath(source_dir)) { + if (import_path.len > 2 and isDotSlash(import_path[0..2])) { + const buf = bufs(.import_path_for_standalone_module_graph); + const joined = bun.path.joinAbsStringBuf(source_dir, buf, &.{import_path}, .loose); + + // Support relative paths in the graph + if (graph.findAssumeStandalonePath(joined)) |file| { + return .{ + .success = Result{ + .import_kind = kind, + .path_pair = PathPair{ + .primary = Path.init(file.name), + }, + .is_standalone_module = true, + .module_type = .esm, + }, + }; + } + } break :brk Fs.FileSystem.instance.top_level_dir; } } diff --git a/test/bundler/bundler_compile.test.ts b/test/bundler/bundler_compile.test.ts index e69caaea4a..d80fb4ea77 100644 --- a/test/bundler/bundler_compile.test.ts +++ b/test/bundler/bundler_compile.test.ts @@ -30,6 +30,113 @@ describe("bundler", () => { outfile: "dist/out", run: { stdout: "Hello, world!" }, }); + itBundled("compile/WorkerRelativePathNoExtension", { + compile: true, + files: { + "/entry.ts": /* js */ ` + import {rmSync} from 'fs'; + // Verify we're not just importing from the filesystem + rmSync("./worker.ts", {force: true}); + + console.log("Hello, world!"); + new Worker("./worker"); + `, + "/worker.ts": /* js */ ` + console.log("Worker loaded!"); + `.trim(), + }, + entryPointsRaw: ["./entry.ts", "./worker.ts"], + outfile: "dist/out", + run: { stdout: "Hello, world!\nWorker loaded!\n", file: "dist/out", setCwd: true }, + }); + itBundled("compile/WorkerRelativePathTSExtension", { + compile: true, + files: { + "/entry.ts": /* js */ ` + import {rmSync} from 'fs'; + // Verify we're not just importing from the filesystem + rmSync("./worker.ts", {force: true}); + console.log("Hello, world!"); + new Worker("./worker.ts"); + `, + "/worker.ts": /* js */ ` + console.log("Worker loaded!"); + `.trim(), + }, + entryPointsRaw: ["./entry.ts", "./worker.ts"], + outfile: "dist/out", + run: { stdout: "Hello, world!\nWorker loaded!\n", file: "dist/out", setCwd: true }, + }); + itBundled("compile/Bun.embeddedFiles", { + compile: true, + // TODO: this shouldn't be necessary, or we should add a map aliasing files. + assetNaming: "[name].[ext]", + + files: { + "/entry.ts": /* js */ ` + import {rmSync} from 'fs'; + import {createRequire} from 'module'; + import './foo.file'; + import './1.embed'; + import './2.embed'; + rmSync('./foo.file', {force: true}); + rmSync('./1.embed', {force: true}); + rmSync('./2.embed', {force: true}); + const names = { + "1.embed": "1.embed", + "2.embed": "2.embed", + "foo.file": "foo.file", + } + // We want to verify it omits source code. + for (let f of Bun.embeddedFiles) { + const name = f.name; + if (!names[name]) { + throw new Error("Unexpected embedded file: " + name); + } + } + + if (Bun.embeddedFiles.length !== 3) throw "fail"; + if ((await Bun.file(createRequire(import.meta.url).resolve('./1.embed')).text()).trim() !== "abcd") throw "fail"; + if ((await Bun.file(createRequire(import.meta.url).resolve('./2.embed')).text()).trim() !== "abcd") throw "fail"; + if ((await Bun.file(createRequire(import.meta.url).resolve('./foo.file')).text()).trim() !== "abcd") throw "fail"; + if ((await Bun.file(import.meta.require.resolve('./1.embed')).text()).trim() !== "abcd") throw "fail"; + if ((await Bun.file(import.meta.require.resolve('./2.embed')).text()).trim() !== "abcd") throw "fail"; + if ((await Bun.file(import.meta.require.resolve('./foo.file')).text()).trim() !== "abcd") throw "fail"; + console.log("Hello, world!"); + `, + "/1.embed": /* js */ ` + abcd + `.trim(), + "/2.embed": /* js */ ` + abcd + `.trim(), + "/foo.file": /* js */ ` + abcd + `.trim(), + }, + outfile: "dist/out", + run: { stdout: "Hello, world!", setCwd: true }, + }); + itBundled("compile/ResolveEmbeddedFileOutfile", { + compile: true, + // TODO: this shouldn't be necessary, or we should add a map aliasing files. + assetNaming: "[name].[ext]", + + files: { + "/entry.ts": /* js */ ` + import {rmSync} from 'fs'; + import './foo.file'; + rmSync('./foo.file', {force: true}); + if ((await Bun.file(import.meta.require.resolve('./foo.file')).text()).trim() !== "abcd") throw "fail"; + console.log("Hello, world!"); + `, + "/foo.file": /* js */ ` + abcd + `.trim(), + }, + outfile: "dist/out", + run: { stdout: "Hello, world!" }, + }); itBundled("compile/pathToFileURLWorks", { compile: true, files: {