Compare commits

...

2 Commits

Author SHA1 Message Date
Alistair Smith
048ca56e6c Merge branch 'main' into claude/fix-markdown-frontmatter-26605 2026-01-30 11:27:28 -08:00
Claude Bot
600577415a fix(md): skip YAML frontmatter in Bun.markdown.html()
When a Markdown file has YAML frontmatter (text between `---` markers
at the top), `Bun.markdown.html()` now correctly skips it instead of
rendering it as HTML content.

Previously, the opening `---` was rendered as `<hr />`, and the YAML
content followed by the closing `---` was interpreted as a setext-style
heading, producing incorrect output like:

    <hr />
    <h2>title: "Hello World"
    author: "Bun"</h2>

Now the frontmatter is properly detected and excluded:

    <h1>Heading</h1>

The detection requires at least one `:` character in the frontmatter
content to distinguish valid YAML from setext headings like:

    ---
    Foo
    ---

A new `frontmatter` option (default: true) allows disabling this
behavior when needed.

Fixes #26605

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-30 17:56:22 +00:00
3 changed files with 255 additions and 2 deletions

View File

@@ -162,6 +162,80 @@ pub fn skipUtf8Bom(text: []const u8) []const u8 {
return text;
}
/// Skip YAML frontmatter if present at the start of the text.
/// Frontmatter starts with `---` on its own line at the very beginning,
/// and ends with `---` or `...` on its own line.
/// The content between markers must contain at least one `:` to be considered
/// valid YAML (this prevents false positives with setext headings like "---\nFoo\n---").
/// Returns the text after the closing delimiter (or the original text if no frontmatter).
pub fn skipFrontmatter(text: []const u8) []const u8 {
// Must start with exactly "---" followed by newline (or end of string for empty frontmatter)
if (text.len < 3) return text;
if (text[0] != '-' or text[1] != '-' or text[2] != '-') return text;
// Check that the opening delimiter is followed by a newline or end of string
var pos: usize = 3;
// Skip optional spaces/tabs after ---
while (pos < text.len and (text[pos] == ' ' or text[pos] == '\t')) {
pos += 1;
}
// Must be followed by newline or end of text
if (pos < text.len and text[pos] != '\n' and text[pos] != '\r') {
return text; // Not a valid frontmatter opener (e.g., "---text")
}
// Skip the newline
if (pos < text.len and text[pos] == '\r') pos += 1;
if (pos < text.len and text[pos] == '\n') pos += 1;
const content_start = pos;
// Now search for the closing delimiter: `---` or `...` at the start of a line
while (pos < text.len) {
// Check for closing delimiter at start of this line
if (pos + 3 <= text.len) {
const is_dash_closer = text[pos] == '-' and text[pos + 1] == '-' and text[pos + 2] == '-';
const is_dot_closer = text[pos] == '.' and text[pos + 1] == '.' and text[pos + 2] == '.';
if (is_dash_closer or is_dot_closer) {
var end_pos = pos + 3;
// Skip optional spaces/tabs after closer
while (end_pos < text.len and (text[end_pos] == ' ' or text[end_pos] == '\t')) {
end_pos += 1;
}
// Closer must be followed by newline or end of text
if (end_pos >= text.len or text[end_pos] == '\n' or text[end_pos] == '\r') {
// Validate that the content looks like YAML (contains at least one ':')
// This prevents false positives with setext headings like "---\nFoo\n---"
const content = text[content_start..pos];
var has_colon = false;
for (content) |c| {
if (c == ':') {
has_colon = true;
break;
}
}
if (!has_colon) {
return text; // Not valid YAML frontmatter
}
// Skip the newline after the closer
if (end_pos < text.len and text[end_pos] == '\r') end_pos += 1;
if (end_pos < text.len and text[end_pos] == '\n') end_pos += 1;
return text[end_pos..];
}
}
}
// Move to the next line
while (pos < text.len and text[pos] != '\n') {
pos += 1;
}
if (pos < text.len) pos += 1; // Skip the newline
}
// No closing delimiter found - treat as no frontmatter (return original text)
return text;
}
/// Case-insensitive ASCII comparison.
pub fn asciiCaseEql(a: []const u8, b: []const u8) bool {
return bun.strings.eqlCaseInsensitiveASCIIICheckLength(a, b);

View File

@@ -35,11 +35,14 @@ pub const Options = struct {
tag_filter: bool = false,
heading_ids: bool = false,
autolink_headings: bool = false,
/// Skip YAML frontmatter at the start of the document (text between `---` markers).
frontmatter: bool = true,
pub const commonmark: Options = .{
.tables = false,
.strikethrough = false,
.tasklists = false,
.frontmatter = false,
};
pub const github: Options = .{
@@ -50,6 +53,7 @@ pub const Options = struct {
.permissive_www_autolinks = true,
.permissive_email_autolinks = true,
.tag_filter = true,
.frontmatter = true,
};
pub fn toFlags(self: Options) Flags {
@@ -86,12 +90,14 @@ pub fn renderToHtml(text: []const u8, allocator: std.mem.Allocator) parser.Parse
}
pub fn renderToHtmlWithOptions(text: []const u8, allocator: std.mem.Allocator, options: Options) parser.Parser.Error![]u8 {
return parser.renderToHtml(text, allocator, options.toFlags(), options.toRenderOptions());
const input = if (options.frontmatter) helpers.skipFrontmatter(text) else text;
return parser.renderToHtml(input, allocator, options.toFlags(), options.toRenderOptions());
}
/// Parse and render using a custom renderer implementation.
pub fn renderWithRenderer(text: []const u8, allocator: std.mem.Allocator, options: Options, renderer: Renderer) parser.Parser.Error!void {
return parser.renderWithRenderer(text, allocator, options.toFlags(), options.toRenderOptions(), renderer);
const input = if (options.frontmatter) helpers.skipFrontmatter(text) else text;
return parser.renderWithRenderer(input, allocator, options.toFlags(), options.toRenderOptions(), renderer);
}
pub const types = @import("./types.zig");

View File

@@ -0,0 +1,173 @@
import { expect, test } from "bun:test";
test("Bun.markdown.html() should skip YAML frontmatter", () => {
const md = `---
title: "Hello World"
author: "Bun"
---
# Heading
`;
const html = Bun.markdown.html(md);
// Should NOT contain frontmatter content
expect(html).not.toContain("title:");
expect(html).not.toContain("Hello World");
expect(html).not.toContain("author:");
expect(html).not.toContain("<hr");
expect(html).not.toContain("<h2>");
// Should contain the actual heading
expect(html).toContain("<h1>Heading</h1>");
});
test("Bun.markdown.html() should handle frontmatter with ... closing delimiter", () => {
const md = `---
title: Test
...
# Content
`;
const html = Bun.markdown.html(md);
expect(html).not.toContain("title:");
expect(html).toContain("<h1>Content</h1>");
});
test("Bun.markdown.html() should handle minimal frontmatter", () => {
const md = `---
a: b
---
# Heading
`;
const html = Bun.markdown.html(md);
expect(html).not.toContain("a:");
expect(html).toContain("<h1>Heading</h1>");
});
test("Bun.markdown.html() should not treat empty --- blocks as frontmatter", () => {
// Empty frontmatter (no colon) should be treated as setext heading
const md = `---
---
# Heading
`;
const html = Bun.markdown.html(md);
// Without a colon inside, this is not frontmatter - it's an hr followed by an hr
expect(html).toContain("<hr");
expect(html).toContain("<h1>Heading</h1>");
});
test("Bun.markdown.html() should not treat --- as frontmatter when not at document start", () => {
const md = `# Heading
---
Some text
`;
const html = Bun.markdown.html(md);
// The --- should be treated as a thematic break (hr)
expect(html).toContain("<hr");
expect(html).toContain("<h1>Heading</h1>");
});
test("Bun.markdown.html() should handle frontmatter with spaces after delimiters", () => {
const md = `---
title: Test
---
# Content
`;
const html = Bun.markdown.html(md);
expect(html).not.toContain("title:");
expect(html).toContain("<h1>Content</h1>");
});
test("Bun.markdown.html() should not skip frontmatter when disabled", () => {
const md = `---
title: "Hello"
---
# Heading
`;
const html = Bun.markdown.html(md, { frontmatter: false });
// With frontmatter disabled, --- becomes hr and title becomes h2
expect(html).toContain("<hr");
});
test("Bun.markdown.html() should handle document with only frontmatter", () => {
const md = `---
title: Test
---
`;
const html = Bun.markdown.html(md);
// Should result in empty/minimal output
expect(html.trim()).toBe("");
});
test("Bun.markdown.html() should handle frontmatter with complex YAML content", () => {
const md = `---
title: "Test Document"
tags:
- javascript
- markdown
date: 2024-01-15
nested:
key: value
---
# Main Content
Paragraph text.
`;
const html = Bun.markdown.html(md);
expect(html).not.toContain("tags:");
expect(html).not.toContain("javascript");
expect(html).not.toContain("nested:");
expect(html).toContain("<h1>Main Content</h1>");
expect(html).toContain("<p>Paragraph text.</p>");
});
test("Bun.markdown.html() should handle frontmatter with --- inside code blocks", () => {
const md = `---
title: Test
---
# Heading
\`\`\`yaml
---
inner: frontmatter
---
\`\`\`
`;
const html = Bun.markdown.html(md);
expect(html).not.toContain("title: Test");
expect(html).toContain("<h1>Heading</h1>");
// The code block should contain the --- markers
expect(html).toContain("inner: frontmatter");
});
test("Bun.markdown.html() handles unclosed frontmatter by treating it as regular content", () => {
const md = `---
title: Test
# Heading
`;
const html = Bun.markdown.html(md);
// Without a closing ---, the document should be treated normally
// The --- becomes an hr, and title: becomes content
expect(html).toContain("<hr");
});