Files
bun.sh/test/js/bun/md/md-render-callback.test.ts
Jarred Sumner 1bfe5c6b37 feat(md): Zig markdown parser with Bun.markdown API (#26440)
## Summary

- Port md4c (CommonMark-compliant markdown parser) from C to Zig under
`src/md/`
- Three output modes:
  - `Bun.markdown.html(input, options?)` — render to HTML string
- `Bun.markdown.render(input, callbacks?)` — render with custom
callbacks for each element
- `Bun.markdown.react(input, options?)` — render to a React Fragment
element, directly usable as a component return value
- React element creation uses a cached JSC Structure with
`putDirectOffset` for fast allocation
- Component overrides in `react()`: pass tag names as options keys to
replace default HTML elements with custom components
- GFM extensions: tables, strikethrough, task lists, permissive
autolinks, disallowed raw HTML tag filter
- Wire up `.md` as a bundler loader (via explicit `{ type: "md" }`)

## JavaScript API

### `Bun.markdown.html(input, options?)`

Renders markdown to an HTML string:

```js
const html = Bun.markdown.html("# Hello **world**");
// "<h1>Hello <strong>world</strong></h1>\n"

Bun.markdown.html("## Hello", { headingIds: true });
// '<h2 id="hello">Hello</h2>\n'
```

### `Bun.markdown.render(input, callbacks?)`

Renders markdown with custom JavaScript callbacks for each element. Each
callback receives children as a string and optional metadata, and
returns a string:

```js
// Custom HTML with classes
const html = Bun.markdown.render("# Title\n\nHello **world**", {
  heading: (children, { level }) => `<h${level} class="title">${children}</h${level}>`,
  paragraph: (children) => `<p>${children}</p>`,
  strong: (children) => `<b>${children}</b>`,
});

// ANSI terminal output
const ansi = Bun.markdown.render("# Hello\n\n**bold**", {
  heading: (children) => `\x1b[1;4m${children}\x1b[0m\n`,
  paragraph: (children) => children + "\n",
  strong: (children) => `\x1b[1m${children}\x1b[22m`,
});

// Strip all formatting
const text = Bun.markdown.render("# Hello **world**", {
  heading: (children) => children,
  paragraph: (children) => children,
  strong: (children) => children,
});
// "Hello world"

// Return null to omit elements
const result = Bun.markdown.render("# Title\n\n![logo](img.png)\n\nHello", {
  image: () => null,
  heading: (children) => children,
  paragraph: (children) => children + "\n",
});
// "Title\nHello\n"
```

Parser options can be included alongside callbacks:

```js
Bun.markdown.render("Visit www.example.com", {
  link: (children, { href }) => `[${children}](${href})`,
  paragraph: (children) => children,
  permissiveAutolinks: true,
});
```

### `Bun.markdown.react(input, options?)`

Returns a React Fragment element — use it directly as a component return
value:

```tsx
// Use as a component
function Markdown({ text }: { text: string }) {
  return Bun.markdown.react(text);
}

// With custom components
function Heading({ children }: { children: React.ReactNode }) {
  return <h1 className="title">{children}</h1>;
}
const element = Bun.markdown.react("# Hello", { h1: Heading });

// Server-side rendering
import { renderToString } from "react-dom/server";
const html = renderToString(Bun.markdown.react("# Hello **world**"));
// "<h1>Hello <strong>world</strong></h1>"
```

#### React 18 and older

By default, `react()` uses `Symbol.for('react.transitional.element')` as
the `$$typeof` symbol, which is what React 19 expects. For React 18 and
older, pass `reactVersion: 18`:

```tsx
const el = Bun.markdown.react("# Hello", { reactVersion: 18 });
```

### Component Overrides

Tag names can be overridden in `react()`:

```tsx
Bun.markdown.react(input, {
  h1: MyHeading,      // block elements
  p: CustomParagraph,
  a: CustomLink,      // inline elements
  img: CustomImage,
  pre: CodeBlock,
  // ... h1-h6, p, blockquote, ul, ol, li, pre, hr, html,
  //     table, thead, tbody, tr, th, td,
  //     em, strong, a, img, code, del, math, u, br
});
```

Boolean values are ignored (not treated as overrides), so parser options
like `{ strikethrough: true }` don't conflict with component overrides.

### Options

```js
Bun.markdown.html(input, {
  tables: true,              // GFM tables (default: true)
  strikethrough: true,       // ~~deleted~~ (default: true)
  tasklists: true,           // - [x] items (default: true)
  headingIds: true,          // Generate id attributes on headings
  autolinkHeadings: true,    // Wrap heading content in <a> tags
  tagFilter: false,          // GFM disallowed HTML tags
  wikiLinks: false,          // [[wiki]] links
  latexMath: false,          // $inline$ and $$display$$
  underline: false,          // __underline__ (instead of <strong>)
  // ... and more
});
```

## Architecture

### Parser (`src/md/`)

The parser is split into focused modules using Zig's delegation pattern:

| Module | Purpose |
|--------|---------|
| `parser.zig` | Core `Parser` struct, state, and re-exported method
delegation |
| `blocks.zig` | Block-level parsing: document processing, line
analysis, block start/end |
| `containers.zig` | Container management: blockquotes, lists, list
items |
| `inlines.zig` | Inline parsing: emphasis, code spans, HTML tags,
entities |
| `links.zig` | Link/image resolution, reference links, autolink
rendering |
| `autolinks.zig` | Permissive autolink detection (www, url, email) |
| `line_analysis.zig` | Line classification: headings, fences, HTML
blocks, tables |
| `ref_defs.zig` | Reference definition parsing and lookup |
| `render_blocks.zig` | Block rendering dispatch (code, HTML, table
blocks) |
| `html_renderer.zig` | HTML renderer implementing `Renderer` VTable |
| `types.zig` | Shared types: `Renderer` VTable, `BlockType`,
`SpanType`, `TextType`, etc. |

### Renderer Abstraction

Parsing is decoupled from output via a `Renderer` VTable interface:

```zig
pub const Renderer = struct {
    ptr: *anyopaque,
    vtable: *const VTable,

    pub const VTable = struct {
        enterBlock: *const fn (...) void,
        leaveBlock: *const fn (...) void,
        enterSpan:  *const fn (...) void,
        leaveSpan:  *const fn (...) void,
        text:       *const fn (...) void,
    };
};
```

Four renderers are implemented:
- **`HtmlRenderer`** (`src/md/html_renderer.zig`) — produces HTML string
output
- **`JsCallbackRenderer`** (`src/bun.js/api/MarkdownObject.zig`) — calls
JS callbacks for each element, accumulates string output
- **`ParseRenderer`** (`src/bun.js/api/MarkdownObject.zig`) — builds
React element AST with `MarkedArgumentBuffer` for GC safety
- **`JSReactElement`** (`src/bun.js/bindings/JSReactElement.cpp`) — C++
fast path for React element creation using cached JSC Structure +
`putDirectOffset`

## Test plan

- [x] 792 spec tests pass (CommonMark, GFM tables, strikethrough,
tasklists, permissive autolinks, GFM tag filter, wiki links, coverage,
regressions)
- [x] 114 API tests pass (`html()`, `render()`, `react()`,
`renderToString` integration, component overrides)
- [x] 58 GFM compatibility tests pass

```
bun bd test test/js/bun/md/md-spec.test.ts       # 792 pass
bun bd test test/js/bun/md/md-render-api.test.ts  # 114 pass
bun bd test test/js/bun/md/gfm-compat.test.ts     # 58 pass
```

🤖 Generated with [Claude Code](https://claude.com/claude-code)

---------

Co-authored-by: Claude <noreply@anthropic.com>
Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
Co-authored-by: Dylan Conway <dylan.conway567@gmail.com>
Co-authored-by: SUZUKI Sosuke <sosuke@bun.com>
Co-authored-by: robobun <robobun@oven.sh>
Co-authored-by: Claude Bot <claude-bot@bun.sh>
Co-authored-by: Kirill Markelov <kerusha.chubko@gmail.com>
Co-authored-by: Ciro Spaciari <ciro.spaciari@gmail.com>
Co-authored-by: Alistair Smith <hi@alistair.sh>
2026-01-28 20:24:02 -08:00

259 lines
9.5 KiB
TypeScript

import { describe, expect, test } from "bun:test";
const Markdown = Bun.markdown;
// ============================================================================
// Bun.markdown.render() — callback-based string renderer
// ============================================================================
describe("Bun.markdown.render", () => {
test("returns a string", () => {
const result = Markdown.render("# Hello\n", {
heading: (children: string) => `<h1>${children}</h1>`,
});
expect(typeof result).toBe("string");
});
test("without callbacks, children pass through unchanged", () => {
const result = Markdown.render("Hello world\n");
expect(result).toBe("Hello world");
});
test("heading callback with level metadata", () => {
const result = Markdown.render("# Hello\n", {
heading: (children: string, { level }: any) => `<h${level}>${children}</h${level}>`,
paragraph: (children: string) => children,
});
expect(result).toBe("<h1>Hello</h1>");
});
test("heading levels 1-6", () => {
for (let i = 1; i <= 6; i++) {
const md = Buffer.alloc(i, "#").toString() + " Level\n";
const result = Markdown.render(md, {
heading: (children: string, { level }: any) => `[h${level}:${children}]`,
});
expect(result).toBe(`[h${i}:Level]`);
}
});
test("paragraph callback", () => {
const result = Markdown.render("Hello world\n", {
paragraph: (children: string) => `<p>${children}</p>`,
});
expect(result).toBe("<p>Hello world</p>");
});
test("strong callback", () => {
const result = Markdown.render("**bold**\n", {
strong: (children: string) => `<b>${children}</b>`,
paragraph: (children: string) => children,
});
expect(result).toBe("<b>bold</b>");
});
test("emphasis callback", () => {
const result = Markdown.render("*italic*\n", {
emphasis: (children: string) => `<i>${children}</i>`,
paragraph: (children: string) => children,
});
expect(result).toBe("<i>italic</i>");
});
test("link callback with href metadata", () => {
const result = Markdown.render("[click](https://example.com)\n", {
link: (children: string, { href }: any) => `<a href="${href}">${children}</a>`,
paragraph: (children: string) => children,
});
expect(result).toBe('<a href="https://example.com">click</a>');
});
test("link callback with title metadata", () => {
const result = Markdown.render('[click](https://example.com "My Title")\n', {
link: (children: string, { href, title }: any) => `<a href="${href}" title="${title}">${children}</a>`,
paragraph: (children: string) => children,
});
expect(result).toBe('<a href="https://example.com" title="My Title">click</a>');
});
test("image callback with src metadata", () => {
const result = Markdown.render("![alt text](image.png)\n", {
image: (children: string, { src }: any) => `<img src="${src}" alt="${children}" />`,
paragraph: (children: string) => children,
});
expect(result).toBe('<img src="image.png" alt="alt text" />');
});
test("code block callback with language metadata", () => {
const result = Markdown.render("```js\nconsole.log('hi');\n```\n", {
code: (children: string, meta: any) => `<pre lang="${meta?.language}">${children}</pre>`,
});
expect(result).toBe("<pre lang=\"js\">console.log('hi');\n</pre>");
});
test("code block without language", () => {
const result = Markdown.render("```\nplain code\n```\n", {
code: (children: string, meta: any) => `<pre lang="${meta?.language ?? "none"}">${children}</pre>`,
});
expect(result).toBe('<pre lang="none">plain code\n</pre>');
});
test("codespan callback", () => {
const result = Markdown.render("`code`\n", {
codespan: (children: string) => `<code>${children}</code>`,
paragraph: (children: string) => children,
});
expect(result).toBe("<code>code</code>");
});
test("hr callback", () => {
const result = Markdown.render("---\n", {
hr: () => "<hr />",
});
expect(result).toBe("<hr />");
});
test("blockquote callback", () => {
const result = Markdown.render("> quoted text\n", {
blockquote: (children: string) => `<blockquote>${children}</blockquote>`,
paragraph: (children: string) => `<p>${children}</p>`,
});
expect(result).toBe("<blockquote><p>quoted text</p></blockquote>");
});
test("list callbacks (ordered)", () => {
const result = Markdown.render("1. first\n2. second\n", {
list: (children: string, { ordered, start }: any) =>
ordered ? `<ol start="${start}">${children}</ol>` : `<ul>${children}</ul>`,
listItem: (children: string) => `<li>${children}</li>`,
});
expect(result).toBe('<ol start="1"><li>first</li><li>second</li></ol>');
});
test("list callbacks (unordered)", () => {
const result = Markdown.render("- a\n- b\n", {
list: (children: string, { ordered }: any) => (ordered ? `<ol>${children}</ol>` : `<ul>${children}</ul>`),
listItem: (children: string) => `<li>${children}</li>`,
});
expect(result).toBe("<ul><li>a</li><li>b</li></ul>");
});
test("ordered list with start number", () => {
const result = Markdown.render("3. first\n4. second\n", {
list: (children: string, { start }: any) => `<ol start="${start}">${children}</ol>`,
listItem: (children: string) => `<li>${children}</li>`,
});
expect(result).toBe('<ol start="3"><li>first</li><li>second</li></ol>');
});
test("strikethrough callback", () => {
const result = Markdown.render("~~deleted~~\n", {
strikethrough: (children: string) => `<del>${children}</del>`,
paragraph: (children: string) => children,
});
expect(result).toBe("<del>deleted</del>");
});
test("text callback", () => {
const result = Markdown.render("Hello world\n", {
text: (text: string) => text.toUpperCase(),
paragraph: (children: string) => children,
});
expect(result).toBe("HELLO WORLD");
});
test("returning null omits element", () => {
const result = Markdown.render("# Title\n\n![logo](img.png)\n\nHello\n", {
image: () => null,
heading: (children: string) => children,
paragraph: (children: string) => children + "\n",
});
expect(result).toBe("Title\nHello\n");
});
test("returning undefined omits element", () => {
const result = Markdown.render("# Title\n\nHello\n", {
heading: () => undefined,
paragraph: (children: string) => children,
});
expect(result).toBe("Hello");
});
test("multiple callbacks combined", () => {
const result = Markdown.render("# Title\n\nHello **world**\n", {
heading: (children: string, { level }: any) => `<h${level} class="heading">${children}</h${level}>`,
paragraph: (children: string) => `<p class="body">${children}</p>`,
strong: (children: string) => `<strong class="bold">${children}</strong>`,
});
expect(result).toBe('<h1 class="heading">Title</h1><p class="body">Hello <strong class="bold">world</strong></p>');
});
test("stripping all formatting", () => {
const result = Markdown.render("# Hello **world**\n", {
heading: (children: string) => children,
paragraph: (children: string) => children,
strong: (children: string) => children,
emphasis: (children: string) => children,
link: (children: string) => children,
image: () => "",
code: (children: string) => children,
codespan: (children: string) => children,
});
expect(result).toBe("Hello world");
});
test("ANSI terminal output", () => {
const result = Markdown.render("# Hello\n\nThis is **bold** and *italic*\n", {
heading: (children: string) => `\x1b[1;4m${children}\x1b[0m\n`,
paragraph: (children: string) => children + "\n",
strong: (children: string) => `\x1b[1m${children}\x1b[22m`,
emphasis: (children: string) => `\x1b[3m${children}\x1b[23m`,
});
expect(result).toBe("\x1b[1;4mHello\x1b[0m\nThis is \x1b[1mbold\x1b[22m and \x1b[3mitalic\x1b[23m\n");
});
test("parser options work alongside callbacks", () => {
const result = Markdown.render(
"Visit www.example.com\n",
{
link: (children: string, { href }: any) => `[${children}](${href})`,
paragraph: (children: string) => children,
},
{ autolinks: true },
);
expect(result).toContain("[www.example.com]");
});
test("headings option provides id in heading meta", () => {
const result = Markdown.render(
"## Hello World\n",
{
heading: (children: string, { level, id }: any) => `<h${level} id="${id}">${children}</h${level}>`,
},
{ headings: { ids: true } },
);
expect(result).toBe('<h2 id="hello-world">Hello World</h2>');
});
test("table callbacks", () => {
const result = Markdown.render("| A | B |\n|---|---|\n| 1 | 2 |\n", {
table: (children: string) => `<table>${children}</table>`,
thead: (children: string) => `<thead>${children}</thead>`,
tbody: (children: string) => `<tbody>${children}</tbody>`,
tr: (children: string) => `<tr>${children}</tr>`,
th: (children: string) => `<th>${children}</th>`,
td: (children: string) => `<td>${children}</td>`,
});
expect(result).toContain("<table>");
expect(result).toContain("<th>A</th>");
expect(result).toContain("<td>1</td>");
});
test("entities are decoded", () => {
const result = Markdown.render("&amp;\n", {
paragraph: (children: string) => children,
});
expect(result).toBe("&");
});
});