mirror of
https://github.com/oven-sh/bun
synced 2026-02-09 10:28:47 +00:00
### What does this PR do? I was looking at the [recent support](https://github.com/oven-sh/bun/pull/26440) for markdown and did some benchmarking against [bindings](https://github.com/just-js/lo/blob/main/lib/md4c/api.js) i created for my `lo` runtime to `md4c`. In some cases, Bun is quite a bit slower, so i did a bit of digging and came up with this change. It uses `indexOfAny` which should utilise `SIMD` where it's available to scan ahead in the payload for characters that need escaping. In [benchmarks](https://gist.github.com/billywhizz/397f7929a8920c826c072139b695bb68#file-results-md) I have done this results in anywhere from `3%` to `~15%` improvement in throughput. The bigger the payload and the more space between entities the bigger the gain afaict, which would make sense. ### How did you verify your code works? It passes `test/js/bun/md/*.test.ts` running locally. Only tested on macos. Can test on linux but I assume that will happen in CI anyway? ## main  ## patched 
715 lines
25 KiB
Zig
715 lines
25 KiB
Zig
pub const HtmlRenderer = struct {
|
|
out: OutputBuffer,
|
|
allocator: Allocator,
|
|
src_text: []const u8,
|
|
image_nesting_level: u32 = 0,
|
|
saved_img_title: []const u8 = "",
|
|
tag_filter: bool = false,
|
|
tag_filter_raw_depth: u32 = 0,
|
|
autolink_headings: bool = false,
|
|
heading_buf: std.ArrayListUnmanaged(u8) = .{},
|
|
heading_tracker: helpers.HeadingIdTracker = helpers.HeadingIdTracker.init(false),
|
|
|
|
pub const OutputBuffer = struct {
|
|
list: std.ArrayListUnmanaged(u8),
|
|
allocator: Allocator,
|
|
oom: bool,
|
|
|
|
fn write(self: *OutputBuffer, data: []const u8) void {
|
|
if (self.oom) return;
|
|
self.list.appendSlice(self.allocator, data) catch {
|
|
self.oom = true;
|
|
};
|
|
}
|
|
|
|
fn writeByte(self: *OutputBuffer, b: u8) void {
|
|
if (self.oom) return;
|
|
self.list.append(self.allocator, b) catch {
|
|
self.oom = true;
|
|
};
|
|
}
|
|
};
|
|
|
|
pub fn init(allocator: Allocator, src_text: []const u8, render_opts: RenderOptions) HtmlRenderer {
|
|
return .{
|
|
.out = .{ .list = .{}, .allocator = allocator, .oom = false },
|
|
.allocator = allocator,
|
|
.src_text = src_text,
|
|
.tag_filter = render_opts.tag_filter,
|
|
.autolink_headings = render_opts.autolink_headings,
|
|
.heading_tracker = helpers.HeadingIdTracker.init(render_opts.heading_ids),
|
|
};
|
|
}
|
|
|
|
pub fn deinit(self: *HtmlRenderer) void {
|
|
self.out.list.deinit(self.allocator);
|
|
self.heading_buf.deinit(self.allocator);
|
|
self.heading_tracker.deinit(self.allocator);
|
|
}
|
|
|
|
pub fn toOwnedSlice(self: *HtmlRenderer) error{OutOfMemory}![]u8 {
|
|
if (self.out.oom) return error.OutOfMemory;
|
|
return self.out.list.toOwnedSlice(self.allocator);
|
|
}
|
|
|
|
pub fn renderer(self: *HtmlRenderer) Renderer {
|
|
return .{ .ptr = self, .vtable = &vtable };
|
|
}
|
|
|
|
pub const vtable: Renderer.VTable = .{
|
|
.enterBlock = enterBlockImpl,
|
|
.leaveBlock = leaveBlockImpl,
|
|
.enterSpan = enterSpanImpl,
|
|
.leaveSpan = leaveSpanImpl,
|
|
.text = textImpl,
|
|
};
|
|
|
|
// ========================================
|
|
// VTable implementation functions
|
|
// ========================================
|
|
|
|
fn enterBlockImpl(ptr: *anyopaque, block_type: BlockType, data: u32, flags: u32) bun.JSError!void {
|
|
const self: *HtmlRenderer = @ptrCast(@alignCast(ptr));
|
|
self.enterBlock(block_type, data, flags);
|
|
}
|
|
|
|
fn leaveBlockImpl(ptr: *anyopaque, block_type: BlockType, data: u32) bun.JSError!void {
|
|
const self: *HtmlRenderer = @ptrCast(@alignCast(ptr));
|
|
self.leaveBlock(block_type, data);
|
|
}
|
|
|
|
fn enterSpanImpl(ptr: *anyopaque, span_type: SpanType, detail: SpanDetail) bun.JSError!void {
|
|
const self: *HtmlRenderer = @ptrCast(@alignCast(ptr));
|
|
self.enterSpan(span_type, detail);
|
|
}
|
|
|
|
fn leaveSpanImpl(ptr: *anyopaque, span_type: SpanType) bun.JSError!void {
|
|
const self: *HtmlRenderer = @ptrCast(@alignCast(ptr));
|
|
self.leaveSpan(span_type);
|
|
}
|
|
|
|
fn textImpl(ptr: *anyopaque, text_type: TextType, content: []const u8) bun.JSError!void {
|
|
const self: *HtmlRenderer = @ptrCast(@alignCast(ptr));
|
|
self.text(text_type, content);
|
|
}
|
|
|
|
// ========================================
|
|
// Block rendering
|
|
// ========================================
|
|
|
|
pub fn enterBlock(self: *HtmlRenderer, block_type: BlockType, data: u32, flags: u32) void {
|
|
switch (block_type) {
|
|
.doc => {},
|
|
.quote => {
|
|
self.ensureNewline();
|
|
self.write("<blockquote>\n");
|
|
},
|
|
.ul => {
|
|
self.ensureNewline();
|
|
self.write("<ul>\n");
|
|
},
|
|
.ol => {
|
|
self.ensureNewline();
|
|
const start = data;
|
|
if (start == 1) {
|
|
self.write("<ol>\n");
|
|
} else {
|
|
self.write("<ol start=\"");
|
|
self.writeDecimal(start);
|
|
self.write("\">\n");
|
|
}
|
|
},
|
|
.li => {
|
|
const task_mark = types.taskMarkFromData(data);
|
|
if (task_mark != 0) {
|
|
self.write("<li class=\"task-list-item\">");
|
|
if (types.isTaskChecked(task_mark)) {
|
|
self.write("<input type=\"checkbox\" class=\"task-list-item-checkbox\" disabled checked>");
|
|
} else {
|
|
self.write("<input type=\"checkbox\" class=\"task-list-item-checkbox\" disabled>");
|
|
}
|
|
} else {
|
|
self.write("<li>");
|
|
}
|
|
},
|
|
.hr => {
|
|
self.ensureNewline();
|
|
self.write("<hr />\n");
|
|
},
|
|
.h => {
|
|
self.ensureNewline();
|
|
if (self.heading_tracker.enabled) {
|
|
self.heading_tracker.enterHeading();
|
|
} else {
|
|
const level = data;
|
|
const tag = switch (level) {
|
|
1 => "<h1>",
|
|
2 => "<h2>",
|
|
3 => "<h3>",
|
|
4 => "<h4>",
|
|
5 => "<h5>",
|
|
else => "<h6>",
|
|
};
|
|
self.write(tag);
|
|
}
|
|
},
|
|
.code => {
|
|
self.ensureNewline();
|
|
self.write("<pre><code");
|
|
if (flags & types.BLOCK_FENCED_CODE != 0) {
|
|
const info_beg = data;
|
|
// Find end of language token (first word of info string)
|
|
var lang_end = info_beg;
|
|
while (lang_end < self.src_text.len and !helpers.isBlank(self.src_text[lang_end]) and
|
|
!helpers.isNewline(self.src_text[lang_end]))
|
|
{
|
|
lang_end += 1;
|
|
}
|
|
if (lang_end > info_beg) {
|
|
self.write(" class=\"language-");
|
|
self.writeWithEntityDecoding(self.src_text[info_beg..lang_end]);
|
|
self.write("\"");
|
|
}
|
|
}
|
|
self.write(">");
|
|
},
|
|
.html => self.ensureNewline(),
|
|
.p => {
|
|
self.ensureNewline();
|
|
self.write("<p>");
|
|
},
|
|
.table => {
|
|
self.ensureNewline();
|
|
self.write("<table>\n");
|
|
},
|
|
.thead => self.write("<thead>\n"),
|
|
.tbody => self.write("<tbody>\n"),
|
|
.tr => self.write("<tr>"),
|
|
.th, .td => {
|
|
const tag = if (block_type == .th) "<th" else "<td";
|
|
self.write(tag);
|
|
const alignment = types.alignmentFromData(data);
|
|
if (types.alignmentName(alignment)) |name| {
|
|
self.write(" align=\"");
|
|
self.write(name);
|
|
self.write("\"");
|
|
}
|
|
self.write(">");
|
|
},
|
|
}
|
|
}
|
|
|
|
pub fn leaveBlock(self: *HtmlRenderer, block_type: BlockType, data: u32) void {
|
|
switch (block_type) {
|
|
.doc => {},
|
|
.quote => self.write("</blockquote>\n"),
|
|
.ul => self.write("</ul>\n"),
|
|
.ol => self.write("</ol>\n"),
|
|
.li => self.write("</li>\n"),
|
|
.hr => {},
|
|
.h => {
|
|
if (self.heading_tracker.leaveHeading(self.allocator)) |slug| {
|
|
// Write opening tag with id
|
|
self.out.write(switch (data) {
|
|
1 => "<h1",
|
|
2 => "<h2",
|
|
3 => "<h3",
|
|
4 => "<h4",
|
|
5 => "<h5",
|
|
else => "<h6",
|
|
});
|
|
self.out.write(" id=\"");
|
|
self.out.write(slug);
|
|
self.out.write("\">");
|
|
if (self.autolink_headings) {
|
|
self.out.write("<a href=\"#");
|
|
self.out.write(slug);
|
|
self.out.write("\">");
|
|
}
|
|
// Flush buffered heading content
|
|
self.out.write(self.heading_buf.items);
|
|
if (self.autolink_headings) {
|
|
self.out.write("</a>");
|
|
}
|
|
self.out.write(switch (data) {
|
|
1 => "</h1>\n",
|
|
2 => "</h2>\n",
|
|
3 => "</h3>\n",
|
|
4 => "</h4>\n",
|
|
5 => "</h5>\n",
|
|
else => "</h6>\n",
|
|
});
|
|
self.heading_buf.clearRetainingCapacity();
|
|
self.heading_tracker.clearAfterHeading();
|
|
} else {
|
|
const tag = switch (data) {
|
|
1 => "</h1>\n",
|
|
2 => "</h2>\n",
|
|
3 => "</h3>\n",
|
|
4 => "</h4>\n",
|
|
5 => "</h5>\n",
|
|
else => "</h6>\n",
|
|
};
|
|
self.write(tag);
|
|
}
|
|
},
|
|
.code => self.write("</code></pre>\n"),
|
|
.html => {},
|
|
.p => {
|
|
self.write("</p>\n");
|
|
},
|
|
.table => self.write("</table>\n"),
|
|
.thead => self.write("</thead>\n"),
|
|
.tbody => self.write("</tbody>\n"),
|
|
.tr => self.write("</tr>\n"),
|
|
.th => self.write("</th>"),
|
|
.td => self.write("</td>"),
|
|
}
|
|
}
|
|
|
|
// ========================================
|
|
// Span rendering
|
|
// ========================================
|
|
|
|
pub fn enterSpan(self: *HtmlRenderer, span_type: SpanType, detail: SpanDetail) void {
|
|
if (self.image_nesting_level > 0) {
|
|
if (span_type == .img) self.image_nesting_level += 1;
|
|
return;
|
|
}
|
|
|
|
switch (span_type) {
|
|
.em => self.write("<em>"),
|
|
.strong => self.write("<strong>"),
|
|
.u => self.write("<u>"),
|
|
.code => self.write("<code>"),
|
|
.del => self.write("<del>"),
|
|
.latexmath => self.write("<x-equation>"),
|
|
.latexmath_display => self.write("<x-equation type=\"display\">"),
|
|
.a => {
|
|
self.write("<a href=\"");
|
|
if (detail.permissive_autolink) {
|
|
// Permissive autolinks use HTML-escaping for href
|
|
if (detail.autolink_email) self.write("mailto:");
|
|
if (detail.autolink_www) self.write("http://");
|
|
self.writeHtmlEscaped(detail.href);
|
|
} else if (detail.autolink) {
|
|
// Standard autolinks: percent-encode only, no entity/escape processing
|
|
if (detail.autolink_email) self.write("mailto:");
|
|
self.writeUrlEscaped(detail.href);
|
|
} else {
|
|
// Regular links: full entity/escape processing
|
|
if (detail.autolink_email) self.write("mailto:");
|
|
self.writeUrlWithEscapes(detail.href);
|
|
}
|
|
self.write("\"");
|
|
if (detail.title.len > 0) {
|
|
self.write(" title=\"");
|
|
self.writeTitleWithEscapes(detail.title);
|
|
self.write("\"");
|
|
}
|
|
self.write(">");
|
|
},
|
|
.img => {
|
|
self.saved_img_title = detail.title;
|
|
self.write("<img src=\"");
|
|
self.writeUrlWithEscapes(detail.href);
|
|
self.write("\" alt=\"");
|
|
self.image_nesting_level += 1;
|
|
},
|
|
.wikilink => {
|
|
self.write("<x-wikilink data-target=\"");
|
|
self.writeHtmlEscaped(detail.href);
|
|
self.write("\">");
|
|
},
|
|
}
|
|
}
|
|
|
|
pub fn leaveSpan(self: *HtmlRenderer, span_type: SpanType) void {
|
|
if (self.image_nesting_level > 0) {
|
|
if (span_type == .img) {
|
|
self.image_nesting_level -= 1;
|
|
if (self.image_nesting_level == 0) {
|
|
self.write("\"");
|
|
if (self.saved_img_title.len > 0) {
|
|
self.write(" title=\"");
|
|
self.writeTitleWithEscapes(self.saved_img_title);
|
|
self.write("\"");
|
|
}
|
|
self.write(" />");
|
|
self.saved_img_title = "";
|
|
}
|
|
}
|
|
return;
|
|
}
|
|
|
|
switch (span_type) {
|
|
.em => self.write("</em>"),
|
|
.strong => self.write("</strong>"),
|
|
.u => self.write("</u>"),
|
|
.a => self.write("</a>"),
|
|
.code => self.write("</code>"),
|
|
.del => self.write("</del>"),
|
|
.latexmath => self.write("</x-equation>"),
|
|
.latexmath_display => self.write("</x-equation>"),
|
|
.wikilink => self.write("</x-wikilink>"),
|
|
.img => {}, // handled above
|
|
}
|
|
}
|
|
|
|
// ========================================
|
|
// Text rendering
|
|
// ========================================
|
|
|
|
pub fn text(self: *HtmlRenderer, text_type: TextType, content: []const u8) void {
|
|
const in_image = self.image_nesting_level > 0;
|
|
|
|
// Track plain text for slug generation when inside a heading
|
|
self.heading_tracker.trackText(text_type, content, self.allocator);
|
|
|
|
switch (text_type) {
|
|
.null_char => self.write("\xEF\xBF\xBD"),
|
|
.br => {
|
|
if (in_image) self.write(" ") else self.write("<br />\n");
|
|
},
|
|
.softbr => {
|
|
if (in_image) self.write(" ") else self.write("\n");
|
|
},
|
|
.html => {
|
|
if (self.tag_filter) {
|
|
// Track entry/exit of disallowed tag raw zones
|
|
self.updateTagFilterRawDepth(content);
|
|
self.writeHtmlWithTagFilter(content);
|
|
} else {
|
|
self.write(content);
|
|
}
|
|
},
|
|
.entity => self.writeEntity(content),
|
|
.code => {
|
|
// In code spans, newlines become spaces
|
|
var start: usize = 0;
|
|
for (content, 0..) |byte, j| {
|
|
if (byte == '\n') {
|
|
if (j > start) self.writeHtmlEscaped(content[start..j]);
|
|
self.write(" ");
|
|
start = j + 1;
|
|
}
|
|
}
|
|
if (start < content.len) self.writeHtmlEscaped(content[start..]);
|
|
},
|
|
else => {
|
|
// When inside a tag-filtered disallowed tag, emit text as raw
|
|
if (self.tag_filter and self.tag_filter_raw_depth > 0) {
|
|
self.write(content);
|
|
} else {
|
|
self.writeHtmlEscaped(content);
|
|
}
|
|
},
|
|
}
|
|
}
|
|
|
|
// ========================================
|
|
// HTML writing utilities
|
|
// ========================================
|
|
|
|
pub fn write(self: *HtmlRenderer, data: []const u8) void {
|
|
if (self.heading_tracker.in_heading) {
|
|
self.heading_buf.appendSlice(self.allocator, data) catch {
|
|
self.out.oom = true;
|
|
};
|
|
} else {
|
|
self.out.write(data);
|
|
}
|
|
}
|
|
|
|
fn writeByte(self: *HtmlRenderer, b: u8) void {
|
|
if (self.heading_tracker.in_heading) {
|
|
self.heading_buf.append(self.allocator, b) catch {
|
|
self.out.oom = true;
|
|
};
|
|
} else {
|
|
self.out.writeByte(b);
|
|
}
|
|
}
|
|
|
|
/// Track whether we're inside a disallowed tag's raw zone.
|
|
/// When an opening disallowed tag is seen, increment depth.
|
|
/// When a closing disallowed tag is seen, decrement depth.
|
|
fn updateTagFilterRawDepth(self: *HtmlRenderer, content: []const u8) void {
|
|
if (content.len < 2 or content[0] != '<') return;
|
|
if (content[1] == '/') {
|
|
// Closing tag
|
|
if (isDisallowedTag(content) and self.tag_filter_raw_depth > 0) {
|
|
self.tag_filter_raw_depth -= 1;
|
|
}
|
|
} else {
|
|
// Opening tag (not self-closing)
|
|
if (isDisallowedTag(content)) {
|
|
// Check if NOT self-closing (doesn't end with "/>")
|
|
if (content[content.len - 2] != '/' or content[content.len - 1] != '>') {
|
|
self.tag_filter_raw_depth += 1;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Write HTML content with GFM tag filter applied. Scans for disallowed
|
|
/// tags and replaces their leading `<` with `<`.
|
|
fn writeHtmlWithTagFilter(self: *HtmlRenderer, content: []const u8) void {
|
|
var start: usize = 0;
|
|
var i: usize = 0;
|
|
while (i < content.len) {
|
|
if (content[i] == '<' and isDisallowedTag(content[i..])) {
|
|
// Write everything before this '<'
|
|
if (i > start) self.write(content[start..i]);
|
|
self.write("<");
|
|
start = i + 1;
|
|
}
|
|
i += 1;
|
|
}
|
|
if (start < content.len) self.write(content[start..]);
|
|
}
|
|
|
|
fn ensureNewline(self: *HtmlRenderer) void {
|
|
if (self.heading_tracker.in_heading) {
|
|
const items = self.heading_buf.items;
|
|
if (items.len > 0 and items[items.len - 1] != '\n') {
|
|
self.heading_buf.append(self.allocator, '\n') catch {
|
|
self.out.oom = true;
|
|
};
|
|
}
|
|
} else {
|
|
const items = self.out.list.items;
|
|
if (items.len > 0 and items[items.len - 1] != '\n') {
|
|
self.out.writeByte('\n');
|
|
}
|
|
}
|
|
}
|
|
|
|
pub fn writeHtmlEscaped(self: *HtmlRenderer, txt: []const u8) void {
|
|
var i: usize = 0;
|
|
const needle = "&<>\"";
|
|
|
|
while (true) {
|
|
const next = std.mem.indexOfAny(u8, txt[i..], needle) orelse {
|
|
self.write(txt[i..]);
|
|
return;
|
|
};
|
|
const pos = i + next;
|
|
if (pos > i)
|
|
self.write(txt[i..pos]);
|
|
const c = txt[pos];
|
|
switch (c) {
|
|
'&' => self.write("&"),
|
|
'<' => self.write("<"),
|
|
'>' => self.write(">"),
|
|
'"' => self.write("""),
|
|
else => unreachable,
|
|
}
|
|
i = pos + 1;
|
|
}
|
|
}
|
|
|
|
fn writeUrlEscaped(self: *HtmlRenderer, txt: []const u8) void {
|
|
for (txt) |byte| {
|
|
self.writeUrlByte(byte);
|
|
}
|
|
}
|
|
|
|
fn writeUrlByte(self: *HtmlRenderer, byte: u8) void {
|
|
switch (byte) {
|
|
'&' => self.write("&"),
|
|
'\'' => self.write("'"),
|
|
'A'...'Z',
|
|
'a'...'z',
|
|
'0'...'9',
|
|
'-',
|
|
'.',
|
|
'_',
|
|
'~',
|
|
':',
|
|
'/',
|
|
'?',
|
|
'#',
|
|
'@',
|
|
'!',
|
|
'$',
|
|
'(',
|
|
')',
|
|
'*',
|
|
'+',
|
|
',',
|
|
';',
|
|
'=',
|
|
'%',
|
|
=> self.writeByte(byte),
|
|
else => {
|
|
var buf: [3]u8 = undefined;
|
|
buf[0] = '%';
|
|
buf[1] = hexDigit(byte >> 4);
|
|
buf[2] = hexDigit(byte & 0x0F);
|
|
self.write(&buf);
|
|
},
|
|
}
|
|
}
|
|
|
|
/// Write URL with backslash escape and entity processing.
|
|
fn writeUrlWithEscapes(self: *HtmlRenderer, txt: []const u8) void {
|
|
var i: usize = 0;
|
|
while (i < txt.len) {
|
|
if (txt[i] == '\\' and i + 1 < txt.len and helpers.isAsciiPunctuation(txt[i + 1])) {
|
|
self.writeUrlByte(txt[i + 1]);
|
|
i += 2;
|
|
} else if (txt[i] == '&') {
|
|
if (findEntityInText(txt, i)) |end_pos| {
|
|
self.writeEntityToUrl(txt[i..end_pos]);
|
|
i = end_pos;
|
|
} else {
|
|
self.write("&");
|
|
i += 1;
|
|
}
|
|
} else {
|
|
self.writeUrlByte(txt[i]);
|
|
i += 1;
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Write title attribute with backslash escape and entity processing (HTML-escaped).
|
|
fn writeTitleWithEscapes(self: *HtmlRenderer, txt: []const u8) void {
|
|
var i: usize = 0;
|
|
while (i < txt.len) {
|
|
if (txt[i] == '\\' and i + 1 < txt.len and helpers.isAsciiPunctuation(txt[i + 1])) {
|
|
self.writeHtmlEscaped(txt[i + 1 .. i + 2]);
|
|
i += 2;
|
|
} else if (txt[i] == '&') {
|
|
if (findEntityInText(txt, i)) |end_pos| {
|
|
self.writeEntity(txt[i..end_pos]);
|
|
i = end_pos;
|
|
} else {
|
|
self.write("&");
|
|
i += 1;
|
|
}
|
|
} else {
|
|
self.writeHtmlEscaped(txt[i .. i + 1]);
|
|
i += 1;
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Write text with entity and backslash escape decoding, then HTML-escape the result.
|
|
/// Used for code fence info strings where entities are recognized.
|
|
fn writeWithEntityDecoding(self: *HtmlRenderer, txt: []const u8) void {
|
|
var i: usize = 0;
|
|
while (i < txt.len) {
|
|
if (txt[i] == '&') {
|
|
if (findEntityInText(txt, i)) |end_pos| {
|
|
self.writeEntity(txt[i..end_pos]);
|
|
i = end_pos;
|
|
continue;
|
|
}
|
|
} else if (txt[i] == '\\' and i + 1 < txt.len and helpers.isAsciiPunctuation(txt[i + 1])) {
|
|
self.writeHtmlEscaped(txt[i + 1 .. i + 2]);
|
|
i += 2;
|
|
continue;
|
|
}
|
|
self.writeHtmlEscaped(txt[i .. i + 1]);
|
|
i += 1;
|
|
}
|
|
}
|
|
|
|
fn writeEntity(self: *HtmlRenderer, entity_text: []const u8) void {
|
|
var buf: [8]u8 = undefined;
|
|
if (helpers.decodeEntityToUtf8(entity_text, &buf)) |decoded| {
|
|
self.writeHtmlEscaped(decoded);
|
|
} else {
|
|
self.write(entity_text);
|
|
}
|
|
}
|
|
|
|
/// Decode an entity and write its UTF-8 bytes as percent-encoded URL bytes.
|
|
fn writeEntityToUrl(self: *HtmlRenderer, entity_text: []const u8) void {
|
|
var buf: [8]u8 = undefined;
|
|
if (helpers.decodeEntityToUtf8(entity_text, &buf)) |decoded| {
|
|
for (decoded) |b| self.writeUrlByte(b);
|
|
} else {
|
|
self.writeUrlEscaped(entity_text);
|
|
}
|
|
}
|
|
|
|
fn writeDecimal(self: *HtmlRenderer, value: u32) void {
|
|
var buf: [10]u8 = undefined;
|
|
var v = value;
|
|
var i: usize = buf.len;
|
|
if (v == 0) {
|
|
self.writeByte('0');
|
|
return;
|
|
}
|
|
while (v > 0) {
|
|
i -= 1;
|
|
buf[i] = @intCast('0' + v % 10);
|
|
v /= 10;
|
|
}
|
|
self.write(buf[i..]);
|
|
}
|
|
|
|
// ========================================
|
|
// Static helpers
|
|
// ========================================
|
|
|
|
fn hexDigit(v: u8) u8 {
|
|
return if (v < 10) '0' + v else 'A' + v - 10;
|
|
}
|
|
|
|
/// GFM 6.11: Check if HTML content starts with a disallowed tag.
|
|
/// Disallowed tags have their leading `<` replaced with `<`.
|
|
fn isDisallowedTag(content: []const u8) bool {
|
|
// Must start with '<', optionally followed by '/'
|
|
if (content.len < 2 or content[0] != '<') return false;
|
|
const after_lt: usize = if (content[1] == '/') 2 else 1;
|
|
if (after_lt >= content.len) return false;
|
|
|
|
const disallowed = [_][]const u8{
|
|
"title", "textarea", "style", "xmp", "iframe",
|
|
"noembed", "noframes", "script", "plaintext",
|
|
};
|
|
inline for (disallowed) |tag| {
|
|
if (matchTagNameCI(content, after_lt, tag)) return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
/// Case-insensitive match of tag name at `pos` in `content`.
|
|
/// After the name, the next char must be '>', '/', whitespace, or end of string.
|
|
fn matchTagNameCI(content: []const u8, pos: usize, tag: []const u8) bool {
|
|
if (pos + tag.len > content.len) return false;
|
|
if (!bun.strings.eqlCaseInsensitiveASCIIIgnoreLength(content[pos..][0..tag.len], tag)) return false;
|
|
// Check delimiter after tag name
|
|
const end = pos + tag.len;
|
|
if (end >= content.len) return true;
|
|
return switch (content[end]) {
|
|
'>', ' ', '\t', '\n', '/' => true,
|
|
else => false,
|
|
};
|
|
}
|
|
|
|
/// Find an entity in text starting at `start`. Delegates to helpers.findEntity.
|
|
fn findEntityInText(content: []const u8, start: usize) ?usize {
|
|
return helpers.findEntity(content, start);
|
|
}
|
|
};
|
|
|
|
const bun = @import("bun");
|
|
const helpers = @import("./helpers.zig");
|
|
const std = @import("std");
|
|
const Allocator = std.mem.Allocator;
|
|
|
|
const root = @import("./root.zig");
|
|
const RenderOptions = root.RenderOptions;
|
|
|
|
const types = @import("./types.zig");
|
|
const BlockType = types.BlockType;
|
|
const Renderer = types.Renderer;
|
|
const SpanDetail = types.SpanDetail;
|
|
const SpanType = types.SpanType;
|
|
const TextType = types.TextType;
|