import { describe, expect, test } from "bun:test"; import { readFileSync } from "fs"; import { join } from "path"; const SPEC_DIR = import.meta.dir; interface SpecExample { markdown: string; expected: string; line: number; section: string; flags: string[]; } function parseSpecFile(path: string): SpecExample[] { const content = readFileSync(path, "utf8").replace(/\r\n?/g, "\n"); const lines = content.split("\n"); const examples: SpecExample[] = []; const fence = "`".repeat(32); let i = 0; let currentSection = ""; while (i < lines.length) { const line = lines[i]; // Track section headers if (line.startsWith("# ") || line.startsWith("## ") || line.startsWith("### ")) { currentSection = line.replace(/^#+\s*/, ""); } if (line.startsWith(fence + " example")) { const startLine = i + 1; i++; // Collect markdown input (until lone "." line) const mdLines: string[] = []; while (i < lines.length && lines[i] !== ".") { mdLines.push(lines[i]); i++; } i++; // skip the "." // Collect expected HTML (until closing fence) const htmlLines: string[] = []; while (i < lines.length && !lines[i].startsWith(fence)) { htmlLines.push(lines[i]); i++; } // Extension spec files have a second "." followed by flags (e.g. "--ftables"). // Strip trailing ".\n--fXXX\n--fYYY\n..." from expected HTML and save flags. let expectedHtml = htmlLines.join("\n"); let flags: string[] = []; const flagMatch = expectedHtml.match(/\n\.\n((?:--[^\n]+\n?)+)$/); if (flagMatch) { expectedHtml = expectedHtml.slice(0, -flagMatch[0].length); flags = flagMatch[1] .trim() .split("\n") .flatMap((line: string) => line.split(/\s+/)) .filter((f: string) => f.startsWith("--f")); } examples.push({ markdown: mdLines.join("\n").replaceAll("\u2192", "\t"), expected: expectedHtml.replaceAll("\u2192", "\t"), line: startLine, section: currentSection, flags, }); } i++; } return examples; } const markdown = Bun.markdown; function renderMarkdown(md: string, flags?: string[]): string { const options: Record = {}; if (flags && flags.length > 0) { for (const flag of flags) { // Strip --f prefix, replace - with _ const name = flag.slice(3).replace(/-/g, "_"); // Map autolink flags to compound option if (name === "permissive_autolinks") { options.autolinks = true; } else if (name === "permissive_url_autolinks") { if (typeof options.autolinks !== "object") options.autolinks = {}; options.autolinks.url = true; } else if (name === "permissive_www_autolinks") { if (typeof options.autolinks !== "object") options.autolinks = {}; options.autolinks.www = true; } else if (name === "permissive_email_autolinks") { if (typeof options.autolinks !== "object") options.autolinks = {}; options.autolinks.email = true; } else { options[name] = true; } } } return markdown.html(md + "\n", options); } // Normalize HTML for comparison, ported from md4c's normalize.py. // This ignores insignificant output differences: // - Whitespace around block-level tags is removed // - Multiple whitespace chars collapsed to single space (outside

)
// - Self-closing tags converted to open tags (
 → 
)
function normalizeHtml(html: string): string {
  const blockTags = new Set([
    "article",
    "header",
    "aside",
    "hgroup",
    "blockquote",
    "hr",
    "iframe",
    "body",
    "li",
    "map",
    "button",
    "object",
    "canvas",
    "ol",
    "caption",
    "output",
    "col",
    "p",
    "colgroup",
    "pre",
    "dd",
    "progress",
    "div",
    "section",
    "dl",
    "table",
    "td",
    "dt",
    "tbody",
    "embed",
    "textarea",
    "fieldset",
    "tfoot",
    "figcaption",
    "th",
    "figure",
    "thead",
    "footer",
    "tr",
    "form",
    "ul",
    "h1",
    "h2",
    "h3",
    "h4",
    "h5",
    "h6",
    "video",
    "script",
    "style",
  ]);

  let output = "";
  let lastType = "starttag";
  let lastTag = "";
  let inPre = false;

  // Simple HTML tokenizer: splits into tags and text
  const tokens = html.match(/||]*>|<\?[^>]*>|<\/?[a-zA-Z][^>]*\/?>|[^<]+/gs) || [];

  for (const token of tokens) {
    if (token.startsWith("`;
      lastTag = tag;
      lastType = "endtag";
    } else if (token.startsWith("<")) {
      // Start tag (possibly self-closing)
      const selfClosing = token.endsWith("/>");
      const inner = token.slice(1, selfClosing ? -2 : -1).trim();
      const spaceIdx = inner.search(/[\s\/]/);
      const tag = (spaceIdx === -1 ? inner : inner.slice(0, spaceIdx)).toLowerCase();

      if (tag === "pre") inPre = true;
      if (blockTags.has(tag)) output = output.trimEnd();

      // Parse attributes
      let attrStr = spaceIdx === -1 ? "" : inner.slice(spaceIdx).replace(/\/$/, "").trim();
      let attrs: [string, string | null][] = [];
      const attrRe = /([a-zA-Z_:][a-zA-Z0-9_.:-]*)(?:\s*=\s*(?:"([^"]*)"|'([^']*)'|(\S+)))?/g;
      let m;
      while ((m = attrRe.exec(attrStr)) !== null) {
        const name = m[1].toLowerCase();
        const value = m[2] ?? m[3] ?? m[4] ?? null;
        attrs.push([name, value]);
      }
      attrs.sort((a, b) => a[0].localeCompare(b[0]));

      output += `<${tag}`;
      for (const [k, v] of attrs) {
        output += ` ${k}`;
        if (v !== null) output += `="${v}"`;
      }
      output += ">";

      lastTag = tag;
      // Self-closing tags are treated as endtags for whitespace purposes
      lastType = selfClosing ? "endtag" : "starttag";
    } else {
      // Text data
      let data = token;
      const afterTag = lastType === "endtag" || lastType === "starttag";
      const afterBlockTag = afterTag && blockTags.has(lastTag);

      if (afterTag && lastTag === "br") data = data.replace(/^\n/, "");
      if (!inPre) data = data.replace(/\s+/g, " ");
      if (afterBlockTag && !inPre) {
        if (lastType === "starttag") data = data.trimStart();
        else if (lastType === "endtag") data = data.trim();
      }

      output += data;
      lastType = "data";
    }
  }

  return output.trim();
}

const specFiles = [
  { name: "CommonMark", file: "spec.txt" },
  { name: "GFM Tables", file: "spec-tables.txt" },
  { name: "GFM Strikethrough", file: "spec-strikethrough.txt" },
  { name: "GFM Tasklists", file: "spec-tasklists.txt" },
  { name: "Permissive Autolinks", file: "spec-permissive-autolinks.txt" },
  { name: "GFM", file: "spec-gfm.txt" },
  { name: "Coverage", file: "coverage.txt" },
  { name: "Regressions", file: "regressions.txt" },
];

for (const { name, file } of specFiles) {
  const specPath = join(SPEC_DIR, file);
  let examples: SpecExample[];
  try {
    examples = parseSpecFile(specPath);
  } catch {
    continue;
  }
  if (examples.length === 0) continue;

  describe(name, () => {
    for (let i = 0; i < examples.length; i++) {
      const ex = examples[i];
      test(`example ${i + 1} (line ${ex.line}): ${ex.section}`, () => {
        const actual = renderMarkdown(ex.markdown, ex.flags.length > 0 ? ex.flags : undefined);
        expect(normalizeHtml(actual)).toBe(normalizeHtml(ex.expected));
      });
    }
  });
}