pub const HtmlRenderer = struct { out: OutputBuffer, allocator: Allocator, src_text: []const u8, image_nesting_level: u32 = 0, saved_img_title: []const u8 = "", tag_filter: bool = false, tag_filter_raw_depth: u32 = 0, autolink_headings: bool = false, heading_buf: std.ArrayListUnmanaged(u8) = .{}, heading_tracker: helpers.HeadingIdTracker = helpers.HeadingIdTracker.init(false), pub const OutputBuffer = struct { list: std.ArrayListUnmanaged(u8), allocator: Allocator, oom: bool, fn write(self: *OutputBuffer, data: []const u8) void { if (self.oom) return; self.list.appendSlice(self.allocator, data) catch { self.oom = true; }; } fn writeByte(self: *OutputBuffer, b: u8) void { if (self.oom) return; self.list.append(self.allocator, b) catch { self.oom = true; }; } }; pub fn init(allocator: Allocator, src_text: []const u8, render_opts: RenderOptions) HtmlRenderer { return .{ .out = .{ .list = .{}, .allocator = allocator, .oom = false }, .allocator = allocator, .src_text = src_text, .tag_filter = render_opts.tag_filter, .autolink_headings = render_opts.autolink_headings, .heading_tracker = helpers.HeadingIdTracker.init(render_opts.heading_ids), }; } pub fn deinit(self: *HtmlRenderer) void { self.out.list.deinit(self.allocator); self.heading_buf.deinit(self.allocator); self.heading_tracker.deinit(self.allocator); } pub fn toOwnedSlice(self: *HtmlRenderer) error{OutOfMemory}![]u8 { if (self.out.oom) return error.OutOfMemory; return self.out.list.toOwnedSlice(self.allocator); } pub fn renderer(self: *HtmlRenderer) Renderer { return .{ .ptr = self, .vtable = &vtable }; } pub const vtable: Renderer.VTable = .{ .enterBlock = enterBlockImpl, .leaveBlock = leaveBlockImpl, .enterSpan = enterSpanImpl, .leaveSpan = leaveSpanImpl, .text = textImpl, }; // ======================================== // VTable implementation functions // ======================================== fn enterBlockImpl(ptr: *anyopaque, block_type: BlockType, data: u32, flags: u32) bun.JSError!void { const self: *HtmlRenderer = @ptrCast(@alignCast(ptr)); self.enterBlock(block_type, data, flags); } fn leaveBlockImpl(ptr: *anyopaque, block_type: BlockType, data: u32) bun.JSError!void { const self: *HtmlRenderer = @ptrCast(@alignCast(ptr)); self.leaveBlock(block_type, data); } fn enterSpanImpl(ptr: *anyopaque, span_type: SpanType, detail: SpanDetail) bun.JSError!void { const self: *HtmlRenderer = @ptrCast(@alignCast(ptr)); self.enterSpan(span_type, detail); } fn leaveSpanImpl(ptr: *anyopaque, span_type: SpanType) bun.JSError!void { const self: *HtmlRenderer = @ptrCast(@alignCast(ptr)); self.leaveSpan(span_type); } fn textImpl(ptr: *anyopaque, text_type: TextType, content: []const u8) bun.JSError!void { const self: *HtmlRenderer = @ptrCast(@alignCast(ptr)); self.text(text_type, content); } // ======================================== // Block rendering // ======================================== pub fn enterBlock(self: *HtmlRenderer, block_type: BlockType, data: u32, flags: u32) void { switch (block_type) { .doc => {}, .quote => { self.ensureNewline(); self.write("
\n"); }, .ul => { self.ensureNewline(); self.write("\n"); }, .ol => { self.ensureNewline(); const start = data; if (start == 1) { self.write("
\n"); } else { self.write("
\n"); } }, .li => { const task_mark = types.taskMarkFromData(data); if (task_mark != 0) { self.write("
- "); if (types.isTaskChecked(task_mark)) { self.write(""); } else { self.write(""); } } else { self.write("
- "); } }, .hr => { self.ensureNewline(); self.write("
\n"); }, .h => { self.ensureNewline(); if (self.heading_tracker.enabled) { self.heading_tracker.enterHeading(); } else { const level = data; const tag = switch (level) { 1 => "", 2 => "
", 3 => "
", 4 => "
", 5 => "
", else => "
", }; self.write(tag); } }, .code => { self.ensureNewline(); self.write("
info_beg) { self.write(" class=\"language-"); self.writeWithEntityDecoding(self.src_text[info_beg..lang_end]); self.write("\""); } } self.write(">"); }, .html => self.ensureNewline(), .p => { self.ensureNewline(); self.write(""), .del => self.write(""), .latexmath => self.write(""), .latexmath_display => self.write(""), .wikilink => self.write(""), .img => {}, // handled above } } // ======================================== // Text rendering // ======================================== pub fn text(self: *HtmlRenderer, text_type: TextType, content: []const u8) void { const in_image = self.image_nesting_level > 0; // Track plain text for slug generation when inside a heading self.heading_tracker.trackText(text_type, content, self.allocator); switch (text_type) { .null_char => self.write("\xEF\xBF\xBD"), .br => { if (in_image) self.write(" ") else self.write(""); }, .table => { self.ensureNewline(); self.write("
\n"); }, .thead => self.write("\n"), .tbody => self.write("\n"), .tr => self.write("
\n"), .thead => self.write("\n"), .tbody => self.write("\n"), .tr => self.write("\n"), .th => self.write(""), .td => self.write(""), } } // ======================================== // Span rendering // ======================================== pub fn enterSpan(self: *HtmlRenderer, span_type: SpanType, detail: SpanDetail) void { if (self.image_nesting_level > 0) { if (span_type == .img) self.image_nesting_level += 1; return; } switch (span_type) { .em => self.write(""), .strong => self.write(""), .u => self.write(""), .code => self.write(""), .th, .td => { const tag = if (block_type == .th) " "); }, } } pub fn leaveBlock(self: *HtmlRenderer, block_type: BlockType, data: u32) void { switch (block_type) { .doc => {}, .quote => self.write("\n"), .ul => self.write("\n"), .ol => self.write("\n"), .li => self.write("\n"), .hr => {}, .h => { if (self.heading_tracker.leaveHeading(self.allocator)) |slug| { // Write opening tag with id self.out.write(switch (data) { 1 => " "
\n", 2 => "\n", 3 => "\n", 4 => "\n", 5 => "\n", else => "\n", }); self.heading_buf.clearRetainingCapacity(); self.heading_tracker.clearAfterHeading(); } else { const tag = switch (data) { 1 => "\n", 2 => "\n", 3 => "\n", 4 => "\n", 5 => "\n", else => "\n", }; self.write(tag); } }, .code => self.write("\n"), .html => {}, .p => { self.write("\n"); }, .table => self.write(""
"
"
"
"); if (self.autolink_headings) { self.out.write(""); } // Flush buffered heading content self.out.write(self.heading_buf.items); if (self.autolink_headings) { self.out.write(""); } self.out.write(switch (data) { 1 => "
"), .del => self.write(""), .strong => self.write(""), .u => self.write(""), .a => self.write(""), .code => self.write(""), .latexmath => self.write(""), .latexmath_display => self.write(" "), .a => { self.write(" 0) { self.write(" title=\""); self.writeTitleWithEscapes(detail.title); self.write("\""); } self.write(">"); }, .img => { self.saved_img_title = detail.title; self.write(" { self.write("
"); }, } } pub fn leaveSpan(self: *HtmlRenderer, span_type: SpanType) void { if (self.image_nesting_level > 0) { if (span_type == .img) { self.image_nesting_level -= 1; if (self.image_nesting_level == 0) { self.write("\""); if (self.saved_img_title.len > 0) { self.write(" title=\""); self.writeTitleWithEscapes(self.saved_img_title); self.write("\""); } self.write(" />"); self.saved_img_title = ""; } } return; } switch (span_type) { .em => self.write("
\n"); }, .softbr => { if (in_image) self.write(" ") else self.write("\n"); }, .html => { if (self.tag_filter) { // Track entry/exit of disallowed tag raw zones self.updateTagFilterRawDepth(content); self.writeHtmlWithTagFilter(content); } else { self.write(content); } }, .entity => self.writeEntity(content), .code => { // In code spans, newlines become spaces var start: usize = 0; for (content, 0..) |byte, j| { if (byte == '\n') { if (j > start) self.writeHtmlEscaped(content[start..j]); self.write(" "); start = j + 1; } } if (start < content.len) self.writeHtmlEscaped(content[start..]); }, else => { // When inside a tag-filtered disallowed tag, emit text as raw if (self.tag_filter and self.tag_filter_raw_depth > 0) { self.write(content); } else { self.writeHtmlEscaped(content); } }, } } // ======================================== // HTML writing utilities // ======================================== pub fn write(self: *HtmlRenderer, data: []const u8) void { if (self.heading_tracker.in_heading) { self.heading_buf.appendSlice(self.allocator, data) catch { self.out.oom = true; }; } else { self.out.write(data); } } fn writeByte(self: *HtmlRenderer, b: u8) void { if (self.heading_tracker.in_heading) { self.heading_buf.append(self.allocator, b) catch { self.out.oom = true; }; } else { self.out.writeByte(b); } } /// Track whether we're inside a disallowed tag's raw zone. /// When an opening disallowed tag is seen, increment depth. /// When a closing disallowed tag is seen, decrement depth. fn updateTagFilterRawDepth(self: *HtmlRenderer, content: []const u8) void { if (content.len < 2 or content[0] != '<') return; if (content[1] == '/') { // Closing tag if (isDisallowedTag(content) and self.tag_filter_raw_depth > 0) { self.tag_filter_raw_depth -= 1; } } else { // Opening tag (not self-closing) if (isDisallowedTag(content)) { // Check if NOT self-closing (doesn't end with "/>") if (content[content.len - 2] != '/' or content[content.len - 1] != '>') { self.tag_filter_raw_depth += 1; } } } } /// Write HTML content with GFM tag filter applied. Scans for disallowed /// tags and replaces their leading `<` with `<`. fn writeHtmlWithTagFilter(self: *HtmlRenderer, content: []const u8) void { var start: usize = 0; var i: usize = 0; while (i < content.len) { if (content[i] == '<' and isDisallowedTag(content[i..])) { // Write everything before this '<' if (i > start) self.write(content[start..i]); self.write("<"); start = i + 1; } i += 1; } if (start < content.len) self.write(content[start..]); } fn ensureNewline(self: *HtmlRenderer) void { if (self.heading_tracker.in_heading) { const items = self.heading_buf.items; if (items.len > 0 and items[items.len - 1] != '\n') { self.heading_buf.append(self.allocator, '\n') catch { self.out.oom = true; }; } } else { const items = self.out.list.items; if (items.len > 0 and items[items.len - 1] != '\n') { self.out.writeByte('\n'); } } } pub fn writeHtmlEscaped(self: *HtmlRenderer, txt: []const u8) void { var i: usize = 0; const needle = "&<>\""; while (true) { const next = std.mem.indexOfAny(u8, txt[i..], needle) orelse { self.write(txt[i..]); return; }; const pos = i + next; if (pos > i) self.write(txt[i..pos]); const c = txt[pos]; switch (c) { '&' => self.write("&"), '<' => self.write("<"), '>' => self.write(">"), '"' => self.write("""), else => unreachable, } i = pos + 1; } } fn writeUrlEscaped(self: *HtmlRenderer, txt: []const u8) void { for (txt) |byte| { self.writeUrlByte(byte); } } fn writeUrlByte(self: *HtmlRenderer, byte: u8) void { switch (byte) { '&' => self.write("&"), '\'' => self.write("'"), 'A'...'Z', 'a'...'z', '0'...'9', '-', '.', '_', '~', ':', '/', '?', '#', '@', '!', '$', '(', ')', '*', '+', ',', ';', '=', '%', => self.writeByte(byte), else => { var buf: [3]u8 = undefined; buf[0] = '%'; buf[1] = hexDigit(byte >> 4); buf[2] = hexDigit(byte & 0x0F); self.write(&buf); }, } } /// Write URL with backslash escape and entity processing. fn writeUrlWithEscapes(self: *HtmlRenderer, txt: []const u8) void { var i: usize = 0; while (i < txt.len) { if (txt[i] == '\\' and i + 1 < txt.len and helpers.isAsciiPunctuation(txt[i + 1])) { self.writeUrlByte(txt[i + 1]); i += 2; } else if (txt[i] == '&') { if (findEntityInText(txt, i)) |end_pos| { self.writeEntityToUrl(txt[i..end_pos]); i = end_pos; } else { self.write("&"); i += 1; } } else { self.writeUrlByte(txt[i]); i += 1; } } } /// Write title attribute with backslash escape and entity processing (HTML-escaped). fn writeTitleWithEscapes(self: *HtmlRenderer, txt: []const u8) void { var i: usize = 0; while (i < txt.len) { if (txt[i] == '\\' and i + 1 < txt.len and helpers.isAsciiPunctuation(txt[i + 1])) { self.writeHtmlEscaped(txt[i + 1 .. i + 2]); i += 2; } else if (txt[i] == '&') { if (findEntityInText(txt, i)) |end_pos| { self.writeEntity(txt[i..end_pos]); i = end_pos; } else { self.write("&"); i += 1; } } else { self.writeHtmlEscaped(txt[i .. i + 1]); i += 1; } } } /// Write text with entity and backslash escape decoding, then HTML-escape the result. /// Used for code fence info strings where entities are recognized. fn writeWithEntityDecoding(self: *HtmlRenderer, txt: []const u8) void { var i: usize = 0; while (i < txt.len) { if (txt[i] == '&') { if (findEntityInText(txt, i)) |end_pos| { self.writeEntity(txt[i..end_pos]); i = end_pos; continue; } } else if (txt[i] == '\\' and i + 1 < txt.len and helpers.isAsciiPunctuation(txt[i + 1])) { self.writeHtmlEscaped(txt[i + 1 .. i + 2]); i += 2; continue; } self.writeHtmlEscaped(txt[i .. i + 1]); i += 1; } } fn writeEntity(self: *HtmlRenderer, entity_text: []const u8) void { var buf: [8]u8 = undefined; if (helpers.decodeEntityToUtf8(entity_text, &buf)) |decoded| { self.writeHtmlEscaped(decoded); } else { self.write(entity_text); } } /// Decode an entity and write its UTF-8 bytes as percent-encoded URL bytes. fn writeEntityToUrl(self: *HtmlRenderer, entity_text: []const u8) void { var buf: [8]u8 = undefined; if (helpers.decodeEntityToUtf8(entity_text, &buf)) |decoded| { for (decoded) |b| self.writeUrlByte(b); } else { self.writeUrlEscaped(entity_text); } } fn writeDecimal(self: *HtmlRenderer, value: u32) void { var buf: [10]u8 = undefined; var v = value; var i: usize = buf.len; if (v == 0) { self.writeByte('0'); return; } while (v > 0) { i -= 1; buf[i] = @intCast('0' + v % 10); v /= 10; } self.write(buf[i..]); } // ======================================== // Static helpers // ======================================== fn hexDigit(v: u8) u8 { return if (v < 10) '0' + v else 'A' + v - 10; } /// GFM 6.11: Check if HTML content starts with a disallowed tag. /// Disallowed tags have their leading `<` replaced with `<`. fn isDisallowedTag(content: []const u8) bool { // Must start with '<', optionally followed by '/' if (content.len < 2 or content[0] != '<') return false; const after_lt: usize = if (content[1] == '/') 2 else 1; if (after_lt >= content.len) return false; const disallowed = [_][]const u8{ "title", "textarea", "style", "xmp", "iframe", "noembed", "noframes", "script", "plaintext", }; inline for (disallowed) |tag| { if (matchTagNameCI(content, after_lt, tag)) return true; } return false; } /// Case-insensitive match of tag name at `pos` in `content`. /// After the name, the next char must be '>', '/', whitespace, or end of string. fn matchTagNameCI(content: []const u8, pos: usize, tag: []const u8) bool { if (pos + tag.len > content.len) return false; if (!bun.strings.eqlCaseInsensitiveASCIIIgnoreLength(content[pos..][0..tag.len], tag)) return false; // Check delimiter after tag name const end = pos + tag.len; if (end >= content.len) return true; return switch (content[end]) { '>', ' ', '\t', '\n', '/' => true, else => false, }; } /// Find an entity in text starting at `start`. Delegates to helpers.findEntity. fn findEntityInText(content: []const u8, start: usize) ?usize { return helpers.findEntity(content, start); } }; const bun = @import("bun"); const helpers = @import("./helpers.zig"); const std = @import("std"); const Allocator = std.mem.Allocator; const root = @import("./root.zig"); const RenderOptions = root.RenderOptions; const types = @import("./types.zig"); const BlockType = types.BlockType; const Renderer = types.Renderer; const SpanDetail = types.SpanDetail; const SpanType = types.SpanType; const TextType = types.TextType;