Files
bun.sh/src/bake/DevServer/DirectoryWatchStore.zig
Jarred Sumner 1bfe5c6b37 feat(md): Zig markdown parser with Bun.markdown API (#26440)
## Summary

- Port md4c (CommonMark-compliant markdown parser) from C to Zig under
`src/md/`
- Three output modes:
  - `Bun.markdown.html(input, options?)` — render to HTML string
- `Bun.markdown.render(input, callbacks?)` — render with custom
callbacks for each element
- `Bun.markdown.react(input, options?)` — render to a React Fragment
element, directly usable as a component return value
- React element creation uses a cached JSC Structure with
`putDirectOffset` for fast allocation
- Component overrides in `react()`: pass tag names as options keys to
replace default HTML elements with custom components
- GFM extensions: tables, strikethrough, task lists, permissive
autolinks, disallowed raw HTML tag filter
- Wire up `.md` as a bundler loader (via explicit `{ type: "md" }`)

## JavaScript API

### `Bun.markdown.html(input, options?)`

Renders markdown to an HTML string:

```js
const html = Bun.markdown.html("# Hello **world**");
// "<h1>Hello <strong>world</strong></h1>\n"

Bun.markdown.html("## Hello", { headingIds: true });
// '<h2 id="hello">Hello</h2>\n'
```

### `Bun.markdown.render(input, callbacks?)`

Renders markdown with custom JavaScript callbacks for each element. Each
callback receives children as a string and optional metadata, and
returns a string:

```js
// Custom HTML with classes
const html = Bun.markdown.render("# Title\n\nHello **world**", {
  heading: (children, { level }) => `<h${level} class="title">${children}</h${level}>`,
  paragraph: (children) => `<p>${children}</p>`,
  strong: (children) => `<b>${children}</b>`,
});

// ANSI terminal output
const ansi = Bun.markdown.render("# Hello\n\n**bold**", {
  heading: (children) => `\x1b[1;4m${children}\x1b[0m\n`,
  paragraph: (children) => children + "\n",
  strong: (children) => `\x1b[1m${children}\x1b[22m`,
});

// Strip all formatting
const text = Bun.markdown.render("# Hello **world**", {
  heading: (children) => children,
  paragraph: (children) => children,
  strong: (children) => children,
});
// "Hello world"

// Return null to omit elements
const result = Bun.markdown.render("# Title\n\n![logo](img.png)\n\nHello", {
  image: () => null,
  heading: (children) => children,
  paragraph: (children) => children + "\n",
});
// "Title\nHello\n"
```

Parser options can be included alongside callbacks:

```js
Bun.markdown.render("Visit www.example.com", {
  link: (children, { href }) => `[${children}](${href})`,
  paragraph: (children) => children,
  permissiveAutolinks: true,
});
```

### `Bun.markdown.react(input, options?)`

Returns a React Fragment element — use it directly as a component return
value:

```tsx
// Use as a component
function Markdown({ text }: { text: string }) {
  return Bun.markdown.react(text);
}

// With custom components
function Heading({ children }: { children: React.ReactNode }) {
  return <h1 className="title">{children}</h1>;
}
const element = Bun.markdown.react("# Hello", { h1: Heading });

// Server-side rendering
import { renderToString } from "react-dom/server";
const html = renderToString(Bun.markdown.react("# Hello **world**"));
// "<h1>Hello <strong>world</strong></h1>"
```

#### React 18 and older

By default, `react()` uses `Symbol.for('react.transitional.element')` as
the `$$typeof` symbol, which is what React 19 expects. For React 18 and
older, pass `reactVersion: 18`:

```tsx
const el = Bun.markdown.react("# Hello", { reactVersion: 18 });
```

### Component Overrides

Tag names can be overridden in `react()`:

```tsx
Bun.markdown.react(input, {
  h1: MyHeading,      // block elements
  p: CustomParagraph,
  a: CustomLink,      // inline elements
  img: CustomImage,
  pre: CodeBlock,
  // ... h1-h6, p, blockquote, ul, ol, li, pre, hr, html,
  //     table, thead, tbody, tr, th, td,
  //     em, strong, a, img, code, del, math, u, br
});
```

Boolean values are ignored (not treated as overrides), so parser options
like `{ strikethrough: true }` don't conflict with component overrides.

### Options

```js
Bun.markdown.html(input, {
  tables: true,              // GFM tables (default: true)
  strikethrough: true,       // ~~deleted~~ (default: true)
  tasklists: true,           // - [x] items (default: true)
  headingIds: true,          // Generate id attributes on headings
  autolinkHeadings: true,    // Wrap heading content in <a> tags
  tagFilter: false,          // GFM disallowed HTML tags
  wikiLinks: false,          // [[wiki]] links
  latexMath: false,          // $inline$ and $$display$$
  underline: false,          // __underline__ (instead of <strong>)
  // ... and more
});
```

## Architecture

### Parser (`src/md/`)

The parser is split into focused modules using Zig's delegation pattern:

| Module | Purpose |
|--------|---------|
| `parser.zig` | Core `Parser` struct, state, and re-exported method
delegation |
| `blocks.zig` | Block-level parsing: document processing, line
analysis, block start/end |
| `containers.zig` | Container management: blockquotes, lists, list
items |
| `inlines.zig` | Inline parsing: emphasis, code spans, HTML tags,
entities |
| `links.zig` | Link/image resolution, reference links, autolink
rendering |
| `autolinks.zig` | Permissive autolink detection (www, url, email) |
| `line_analysis.zig` | Line classification: headings, fences, HTML
blocks, tables |
| `ref_defs.zig` | Reference definition parsing and lookup |
| `render_blocks.zig` | Block rendering dispatch (code, HTML, table
blocks) |
| `html_renderer.zig` | HTML renderer implementing `Renderer` VTable |
| `types.zig` | Shared types: `Renderer` VTable, `BlockType`,
`SpanType`, `TextType`, etc. |

### Renderer Abstraction

Parsing is decoupled from output via a `Renderer` VTable interface:

```zig
pub const Renderer = struct {
    ptr: *anyopaque,
    vtable: *const VTable,

    pub const VTable = struct {
        enterBlock: *const fn (...) void,
        leaveBlock: *const fn (...) void,
        enterSpan:  *const fn (...) void,
        leaveSpan:  *const fn (...) void,
        text:       *const fn (...) void,
    };
};
```

Four renderers are implemented:
- **`HtmlRenderer`** (`src/md/html_renderer.zig`) — produces HTML string
output
- **`JsCallbackRenderer`** (`src/bun.js/api/MarkdownObject.zig`) — calls
JS callbacks for each element, accumulates string output
- **`ParseRenderer`** (`src/bun.js/api/MarkdownObject.zig`) — builds
React element AST with `MarkedArgumentBuffer` for GC safety
- **`JSReactElement`** (`src/bun.js/bindings/JSReactElement.cpp`) — C++
fast path for React element creation using cached JSC Structure +
`putDirectOffset`

## Test plan

- [x] 792 spec tests pass (CommonMark, GFM tables, strikethrough,
tasklists, permissive autolinks, GFM tag filter, wiki links, coverage,
regressions)
- [x] 114 API tests pass (`html()`, `render()`, `react()`,
`renderToString` integration, component overrides)
- [x] 58 GFM compatibility tests pass

```
bun bd test test/js/bun/md/md-spec.test.ts       # 792 pass
bun bd test test/js/bun/md/md-render-api.test.ts  # 114 pass
bun bd test test/js/bun/md/gfm-compat.test.ts     # 58 pass
```

🤖 Generated with [Claude Code](https://claude.com/claude-code)

---------

Co-authored-by: Claude <noreply@anthropic.com>
Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
Co-authored-by: Dylan Conway <dylan.conway567@gmail.com>
Co-authored-by: SUZUKI Sosuke <sosuke@bun.com>
Co-authored-by: robobun <robobun@oven.sh>
Co-authored-by: Claude Bot <claude-bot@bun.sh>
Co-authored-by: Kirill Markelov <kerusha.chubko@gmail.com>
Co-authored-by: Ciro Spaciari <ciro.spaciari@gmail.com>
Co-authored-by: Alistair Smith <hi@alistair.sh>
2026-01-28 20:24:02 -08:00

272 lines
9.6 KiB
Zig

const DirectoryWatchStore = @This();
/// When a file fails to import a relative path, directory watchers are added so
/// that when a matching file is created, the dependencies can be rebuilt. This
/// handles HMR cases where a user writes an import before creating the file,
/// or moves files around. This structure is not thread-safe.
///
/// This structure manages those watchers, including releasing them once
/// import resolution failures are solved.
// TODO: when a file fixes its resolution, there is no code specifically to remove the watchers.
/// List of active watchers. Can be re-ordered on removal
watches: bun.StringArrayHashMapUnmanaged(Entry),
dependencies: ArrayListUnmanaged(Dep),
/// Dependencies cannot be re-ordered. This list tracks what indexes are free.
dependencies_free_list: ArrayListUnmanaged(Dep.Index),
pub const empty: DirectoryWatchStore = .{
.watches = .{},
.dependencies = .{},
.dependencies_free_list = .{},
};
pub fn owner(store: *DirectoryWatchStore) *DevServer {
return @alignCast(@fieldParentPtr("directory_watchers", store));
}
pub fn trackResolutionFailure(store: *DirectoryWatchStore, import_source: []const u8, specifier: []const u8, renderer: bake.Graph, loader: bun.options.Loader) bun.OOM!void {
// When it does not resolve to a file path, there is nothing to track.
if (specifier.len == 0) return;
if (!std.fs.path.isAbsolute(import_source)) return;
switch (loader) {
.tsx, .ts, .jsx, .js => {
if (!(bun.strings.startsWith(specifier, "./") or
bun.strings.startsWith(specifier, "../"))) return;
},
// Imports in CSS can resolve to relative files without './'
// Imports in HTML can resolve to project-relative paths by
// prefixing with '/', but that is done in HTMLScanner.
.css, .html => {},
// Multiple parts of DevServer rely on the fact that these
// loaders do not depend on importing other files.
.file,
.json,
.jsonc,
.toml,
.yaml,
.json5,
.wasm,
.napi,
.base64,
.dataurl,
.text,
.bunsh,
.sqlite,
.sqlite_embedded,
.md,
=> bun.debugAssert(false),
}
const buf = bun.path_buffer_pool.get();
defer bun.path_buffer_pool.put(buf);
const joined = bun.path.joinAbsStringBuf(bun.path.dirname(import_source, .auto), buf, &.{specifier}, .auto);
const dir = bun.path.dirname(joined, .auto);
// The `import_source` parameter is not a stable string. Since the
// import source will be added to IncrementalGraph anyways, this is a
// great place to share memory.
const dev = store.owner();
dev.graph_safety_lock.lock();
defer dev.graph_safety_lock.unlock();
const owned_file_path = switch (renderer) {
.client => (try dev.client_graph.insertEmpty(import_source, .unknown)).key,
.server, .ssr => (try dev.server_graph.insertEmpty(import_source, .unknown)).key,
};
store.insert(dir, owned_file_path, specifier) catch |err| switch (err) {
error.Ignore => {}, // ignoring watch errors.
error.OutOfMemory => |e| return e,
};
}
/// `dir_name_to_watch` is cloned
/// `file_path` must have lifetime that outlives the watch
/// `specifier` is cloned
fn insert(
store: *DirectoryWatchStore,
dir_name_to_watch: []const u8,
file_path: []const u8,
specifier: []const u8,
) !void {
assert(specifier.len > 0);
// TODO: watch the parent dir too.
const dev = store.owner();
debug.log("DirectoryWatchStore.insert({f}, {f}, {f})", .{
bun.fmt.quote(dir_name_to_watch),
bun.fmt.quote(file_path),
bun.fmt.quote(specifier),
});
if (store.dependencies_free_list.items.len == 0)
try store.dependencies.ensureUnusedCapacity(dev.allocator(), 1);
const gop = try store.watches.getOrPut(dev.allocator(), bun.strings.withoutTrailingSlashWindowsPath(dir_name_to_watch));
const specifier_cloned = if (specifier[0] == '.' or std.fs.path.isAbsolute(specifier))
try dev.allocator().dupe(u8, specifier)
else
try std.fmt.allocPrint(dev.allocator(), "./{s}", .{specifier});
errdefer dev.allocator().free(specifier_cloned);
if (gop.found_existing) {
const dep = store.appendDepAssumeCapacity(.{
.next = gop.value_ptr.first_dep.toOptional(),
.source_file_path = file_path,
.specifier = specifier_cloned,
});
gop.value_ptr.first_dep = dep;
return;
}
errdefer store.watches.swapRemoveAt(gop.index);
// Try to use an existing open directory handle
const cache_fd = if (dev.server_transpiler.resolver.readDirInfo(dir_name_to_watch) catch null) |cache|
cache.getFileDescriptor().unwrapValid()
else
null;
const fd, const owned_fd = if (Watcher.requires_file_descriptors) if (cache_fd) |fd|
.{ fd, false }
else switch (bun.sys.open(
&(std.posix.toPosixPath(dir_name_to_watch) catch |err| switch (err) {
error.NameTooLong => return error.Ignore, // wouldn't be able to open, ignore
}),
// O_EVTONLY is the flag to indicate that only watches will be used.
bun.O.DIRECTORY | bun.c.O_EVTONLY,
0,
)) {
.result => |fd| .{ fd, true },
.err => |err| switch (err.getErrno()) {
// If this directory doesn't exist, a watcher should be placed
// on the parent directory. Then, if this directory is later
// created, the watcher can be properly initialized. This would
// happen if a specifier like `./dir/whatever/hello.tsx` and
// `dir` does not exist, Bun must place a watcher on `.`, see
// the creation of `dir`, and repeat until it can open a watcher
// on `whatever` to see the creation of `hello.tsx`
.NOENT => {
// TODO: implement that. for now it ignores (BUN-10968)
return error.Ignore;
},
.NOTDIR => return error.Ignore, // ignore
else => {
bun.todoPanic(@src(), "log watcher error", .{});
},
},
} else .{ bun.invalid_fd, false };
errdefer if (Watcher.requires_file_descriptors) if (owned_fd) fd.close();
if (Watcher.requires_file_descriptors)
debug.log("-> fd: {f} ({s})", .{
fd,
if (owned_fd) "from dir cache" else "owned fd",
});
const dir_name = try dev.allocator().dupe(u8, dir_name_to_watch);
errdefer dev.allocator().free(dir_name);
gop.key_ptr.* = bun.strings.withoutTrailingSlashWindowsPath(dir_name);
const watch_index = switch (dev.bun_watcher.addDirectory(fd, dir_name, bun.Watcher.getHash(dir_name), false)) {
.err => return error.Ignore,
.result => |id| id,
};
const dep = store.appendDepAssumeCapacity(.{
.next = .none,
.source_file_path = file_path,
.specifier = specifier_cloned,
});
store.watches.putAssumeCapacity(dir_name, .{
.dir = fd,
.dir_fd_owned = owned_fd,
.first_dep = dep,
.watch_index = watch_index,
});
}
/// Caller must detach the dependency from the linked list it is in.
pub fn freeDependencyIndex(store: *DirectoryWatchStore, alloc: Allocator, index: Dep.Index) !void {
alloc.free(store.dependencies.items[index.get()].specifier);
if (Environment.isDebug) {
store.dependencies.items[index.get()] = undefined;
}
if (index.get() == (store.dependencies.items.len - 1)) {
store.dependencies.items.len -= 1;
} else {
try store.dependencies_free_list.append(alloc, index);
}
}
/// Expects dependency list to be already freed
pub fn freeEntry(store: *DirectoryWatchStore, alloc: Allocator, entry_index: usize) void {
const entry = store.watches.values()[entry_index];
debug.log("DirectoryWatchStore.freeEntry({d}, {f})", .{
entry_index,
entry.dir,
});
store.owner().bun_watcher.removeAtIndex(entry.watch_index, 0, &.{}, .file);
defer if (entry.dir_fd_owned) entry.dir.close();
alloc.free(store.watches.keys()[entry_index]);
store.watches.swapRemoveAt(entry_index);
if (store.watches.entries.len == 0) {
assert(store.dependencies.items.len == 0);
store.dependencies_free_list.clearRetainingCapacity();
}
}
fn appendDepAssumeCapacity(store: *DirectoryWatchStore, dep: Dep) Dep.Index {
if (store.dependencies_free_list.pop()) |index| {
store.dependencies.items[index.get()] = dep;
return index;
}
const index = Dep.Index.init(@intCast(store.dependencies.items.len));
store.dependencies.appendAssumeCapacity(dep);
return index;
}
pub const Entry = struct {
/// The directory handle the watch is placed on
dir: bun.FileDescriptor,
dir_fd_owned: bool,
/// Files which request this import index
first_dep: Dep.Index,
/// To pass to Watcher.remove
watch_index: u16,
};
pub const Dep = struct {
next: Index.Optional,
/// The file used
source_file_path: []const u8,
/// The specifier that failed. Before running re-build, it is resolved for, as
/// creating an unrelated file should not re-emit another error. Allocated memory
specifier: []u8,
pub const Index = bun.GenericIndex(u32, Dep);
};
const bun = @import("bun");
const Environment = bun.Environment;
const Watcher = bun.Watcher;
const assert = bun.assert;
const bake = bun.bake;
const DevServer = bake.DevServer;
const debug = DevServer.debug;
const std = @import("std");
const ArrayListUnmanaged = std.ArrayListUnmanaged;
const Allocator = std.mem.Allocator;