import { afterAll, beforeAll, describe, expect, it } from "bun:test"; import fs from "fs"; import { gcTick, tls, tmpdirSync } from "harness"; import path, { join } from "path"; import { setImmediate as setImmediatePromise } from "timers/promises"; var setTimeoutAsync = (fn, delay) => { return new Promise((resolve, reject) => { setTimeout(() => { try { resolve(fn()); } catch (e) { reject(e); } }, delay); }); }; describe("HTMLRewriter", () => { it("error handling", () => { expect(() => new HTMLRewriter().transform(Symbol("ok"))).toThrow(); }); it("error inside element handler", () => { expect(() => new HTMLRewriter() .on("div", { element(element) { throw new Error("test"); }, }) .transform(new Response("
hello
")), ).toThrow("test"); }); it("error inside element handler (string)", () => { expect(() => new HTMLRewriter() .on("div", { element(element) { throw new Error("test"); }, }) .transform("
hello
"), ).toThrow("test"); }); it("fast async error inside element handler", () => { let caught = false; try { new HTMLRewriter() .on("div", { async element(element) { await setImmediatePromise(); throw new Error("test"); }, }) .transform(new Response("
hello
")); expect.unreachable(); } catch (e) { caught = true; expect(e.message).toBe("test"); } finally { expect(caught).toBeTrue(); } }); it("slow async error inside element handler", () => { let caught = false; try { new HTMLRewriter() .on("div", { async element(element) { await Bun.sleep(1); throw new Error("test"); }, }) .transform(new Response("
hello
")); expect.unreachable(); } catch (e) { caught = true; expect(e.message).toBe("test"); } finally { expect(caught).toBeTrue(); } }); it("HTMLRewriter: async replacement", async () => { await gcTick(); const res = new HTMLRewriter() .on("div", { async element(element) { await setTimeoutAsync(() => { element.setInnerContent("replace", { html: true }); }, 5); }, }) .transform(new Response("
example.com
")); await gcTick(); expect(await res.text()).toBe("
replace
"); await gcTick(); }); it("HTMLRewriter handles Symbol invalid type error", async () => { expect(() => new HTMLRewriter().transform(new Response(Symbol("ok")))).toThrow(); expect(() => new HTMLRewriter().transform(Symbol("ok"))).toThrow(); }); it("HTMLRewriter: async replacement using fetch + Bun.serve", async () => { await gcTick(); let content; { using server = Bun.serve({ port: 0, fetch(req) { return new HTMLRewriter() .on("div", { async element(element) { content = await fetch("https://www.example.com/").then(res => res.text()); element.setInnerContent(content, { html: true }); }, }) .transform(new Response("
example.com
")); }, }); await gcTick(); const url = `http://localhost:${server.port}`; expect(await fetch(url).then(res => res.text())).toBe(`
${content}
`); await gcTick(); } }); for (let input of [new Response("
hello
"), "
hello
"]) { it("supports element handlers with input " + input.constructor.name, async () => { var rewriter = new HTMLRewriter(); rewriter.on("div", { element(element) { element.setInnerContent("it worked!", { html: true }); }, }); var output = rewriter.transform(input); expect(typeof input === "string" ? output : await output.text()).toBe("
it worked!
"); }); } it("(from file) supports element handlers", async () => { var rewriter = new HTMLRewriter(); rewriter.on("div", { element(element) { element.setInnerContent("it worked!", { html: true }); }, }); const filePath = join(tmpdirSync(), "html-rewriter.txt.js"); await Bun.write(filePath, "
hello
"); var output = rewriter.transform(new Response(Bun.file(filePath))); expect(await output.text()).toBe("
it worked!
"); }); it("supports attribute iterator", async () => { var rewriter = new HTMLRewriter(); var expected = [ ["first", ""], ["second", "alrihgt"], ["third", "123"], ["fourth", "5"], ["fifth", "helloooo"], ]; rewriter.on("div", { element(element2) { for (let attr of element2.attributes) { const stack = expected.shift(); expect(stack[0]).toBe(attr[0]); expect(stack[1]).toBe(attr[1]); } }, }); var input = new Response('
hello
'); var output = rewriter.transform(input); expect(await output.text()).toBe('
hello
'); expect(expected.length).toBe(0); }); it("handles element specific mutations", async () => { // prepend/append let res = new HTMLRewriter() .on("p", { element(element) { element.prepend("prepend"); element.prepend("prepend html", { html: true }); element.append("append"); element.append("append html", { html: true }); }, }) .transform(new Response("

test

")); expect(await res.text()).toBe( [ "

", "prepend html", "<span>prepend</span>", "test", "<span>append</span>", "append html", "

", ].join(""), ); // setInnerContent res = new HTMLRewriter() .on("p", { element(element) { element.setInnerContent("replace"); }, }) .transform(new Response("

test

")); expect(await res.text()).toBe("

<span>replace</span>

"); res = new HTMLRewriter() .on("p", { element(element) { element.setInnerContent("replace", { html: true }); }, }) .transform(new Response("

test

")); expect(await res.text()).toBe("

replace

"); // removeAndKeepContent res = new HTMLRewriter() .on("p", { element(element) { element.removeAndKeepContent(); }, }) .transform(new Response("

test

")); expect(await res.text()).toBe("test"); }); it("handles element class properties", async () => { class Handler { constructor(content) { this.content = content; } // noinspection JSUnusedGlobalSymbols element(element) { element.setInnerContent(this.content); } } const res = new HTMLRewriter().on("p", new Handler("new")).transform(new Response("

test

")); expect(await res.text()).toBe("

new

"); }); const commentsMutationsInput = "

"; const commentsMutationsExpected = { beforeAfter: [ "

", "<span>before</span>", "before html", "", "after html", "<span>after</span>", "

", ].join(""), replace: "

<span>replace</span>

", replaceHtml: "

replace

", remove: "

", }; const commentPropertiesMacro = async func => { const res = func(new HTMLRewriter(), comment => { expect(comment.removed).toBe(false); expect(comment.text).toBe("test"); comment.text = "new"; expect(comment.text).toBe("new"); }).transform(new Response("

")); expect(await res.text()).toBe("

"); }; it("HTMLRewriter: handles comment properties", () => commentPropertiesMacro((rw, comments) => { rw.on("p", { comments }); return rw; })); it("selector tests", async () => { const checkSelector = async (selector, input, expected) => { const res = new HTMLRewriter() .on(selector, { element(element) { element.setInnerContent("new"); }, }) .transform(new Response(input)); expect(await res.text()).toBe(expected); }; await checkSelector("*", "

1

2

", "

new

new

"); await checkSelector("p", "

1

2

", "

1

new

"); await checkSelector( "p:nth-child(2)", "

1

2

3

", "

1

new

3

", ); await checkSelector( "p:first-child", "

1

2

3

", "

new

2

3

", ); await checkSelector( "p:nth-of-type(2)", "

1

2

3

4

5

", "

1

2

new

4

5

", ); await checkSelector( "p:first-of-type", "

1

2

3

", "

1

new

3

", ); await checkSelector( "p:not(:first-child)", "

1

2

3

", "

1

new

new

", ); await checkSelector("p.red", '

1

2

', '

new

2

'); await checkSelector("h1#header", '

1

2

', '

new

2

'); await checkSelector("p[data-test]", "

1

2

", "

new

2

"); await checkSelector( 'p[data-test="one"]', '

1

2

', '

new

2

', ); await checkSelector( 'p[data-test="one" i]', '

1

2

3

', '

new

new

3

', ); await checkSelector( 'p[data-test="one" s]', '

1

2

3

', '

new

2

3

', ); await checkSelector( 'p[data-test~="two"]', '

1

2

3

', '

new

new

3

', ); await checkSelector( 'p[data-test^="a"]', '

1

2

3

', '

new

new

3

', ); await checkSelector( 'p[data-test$="1"]', '

1

2

3

', '

new

2

new

', ); await checkSelector( 'p[data-test*="b"]', '

1

2

3

', '

new

new

3

', ); await checkSelector( 'p[data-test|="a"]', '

1

2

3

', '

new

new

3

', ); await checkSelector( "div span", "

1

23
", "

new

new3
", ); await checkSelector( "div > span", "

1

23
", "

1

new3
", ); }); it("supports deleting innerContent", async () => { expect( await new HTMLRewriter() .on("div", { element(elem) { // https://github.com/oven-sh/bun/issues/2323 elem.setInnerContent(""); }, }) .transform(new Response("
content
")) .text(), ).toEqual("
"); }); it("supports deleting innerHTML", async () => { expect( await new HTMLRewriter() .on("div", { element(elem) { // https://github.com/oven-sh/bun/issues/2323 elem.setInnerContent("", { html: true }); }, }) .transform(new Response("
content
")) .text(), ).toEqual("
"); }); it("it supports lastInTextNode", async () => { let lastInTextNode; await new HTMLRewriter() .on("p", { text(text) { lastInTextNode ??= text.lastInTextNode; }, }) .transform(new Response("

Lorem ipsum!

")) .text(); expect(lastInTextNode).toBeBoolean(); }); it("it supports selfClosing", async () => { const selfClosing = {}; await new HTMLRewriter() .on("*", { element(el) { selfClosing[el.tagName] = el.selfClosing; }, }) .transform(new Response("

Lorem ipsum!

")) .text(); expect(selfClosing).toEqual({ p: false, br: false, div: true, }); }); it("it supports canHaveContent", async () => { const canHaveContent = {}; await new HTMLRewriter() .on("*", { element(el) { canHaveContent[el.tagName] = el.canHaveContent; }, }) .transform(new Response("

Lorem ipsum!

")) .text(); expect(canHaveContent).toEqual({ p: true, br: false, div: true, svg: true, circle: false, }); }); }); // By not segfaulting, this test passes it("#3334 regression", async () => { for (let i = 0; i < 10; i++) { const headers = new Headers({ "content-type": "text/html", }); const response = new Response("
content
", { headers }); const result = await new HTMLRewriter() .on("div", { element(elem) { elem.setInnerContent("new"); }, }) .transform(response) .text(); expect(result).toEqual("
new
"); } Bun.gc(true); }); it("#3489", async () => { var el; await new HTMLRewriter() .on("p", { element(element) { el = element.getAttribute("id"); }, }) .transform(new Response('

')) .text(); expect(el).toEqual("Šžõäöü"); }); it("get attribute - ascii", async () => { for (let i = 0; i < 10; i++) { var el; await new HTMLRewriter() .on("p", { element(element) { el = element.getAttribute("id"); }, }) .transform(new Response(`

`)) .text(); expect(el).toEqual("asciii"); } }); it("#3520", async () => { const pairs = []; await new HTMLRewriter() .on("p", { element(element) { for (const pair of element.attributes) { pairs.push(pair); } }, }) .transform(new Response('

')) .text(); expect(pairs).toEqual([ ["šž", "Õäöü"], ["ab", "Õäöü"], ["šž", "Õäöü"], ["šž", "dc"], ["šž", "🕵🏻"], ]); }); const fixture_html = path.join(import.meta.dir, "../web/fetch/fixture.html"); const fixture_html_content = fs.readFileSync(fixture_html); const fixture_html_gz = path.join(import.meta.dir, "../web/fetch/fixture.html.gz"); const fixture_html_gz_content = fs.readFileSync(fixture_html_gz); function getStream(type, fixture) { const data = fixture === "gz" ? fixture_html_gz_content : fixture_html_content; const half = parseInt(data.length / 2, 10); if (type === "direct") { return new ReadableStream({ type: "direct", async pull(controller) { controller.write(data.slice(0, half)); await controller.flush(); controller.write(data.slice(half)); await controller.flush(); controller.close(); }, }); } return new ReadableStream({ async pull(controller) { controller.enqueue(data.slice(0, half)); await Bun.sleep(15); controller.enqueue(data.slice(half)); await Bun.sleep(15); controller.close(); }, }); } function createServer(tls) { return Bun.serve({ port: 0, tls, async fetch(req) { const is_compressed = req.url.endsWith("/gzip"); let payload; if (req.url.indexOf("chunked") !== -1) { if (req.url.indexOf("direct")) { payload = getStream("direct", is_compressed ? "gz" : "default"); } else { payload = getStream("default", is_compressed ? "gz" : "default"); } } else if (req.url.indexOf("file") !== -1) { payload = is_compressed ? Bun.file(fixture_html_gz) : Bun.file(fixture_html); } else { payload = is_compressed ? fixture_html_gz_content : fixture_html_content; } let headers = { "content-type": "text/html", }; if (is_compressed) { headers["content-encoding"] = "gzip"; } return new Response(payload, { headers }); }, }); } let http_server; let https_server; beforeAll(() => { http_server = createServer(); https_server = createServer({ ...tls, }); }); afterAll(() => { http_server?.stop(true); https_server?.stop(true); }); const request_types = ["/", "/gzip", "/chunked/gzip", "/chunked", "/file", "/file/gzip"]; ["http", "https"].forEach(protocol => { request_types.forEach(path => { it(`works with ${protocol} fetch using ${path}`, async () => { const server = protocol === "http" ? http_server : https_server; const server_origin = server.url.origin; const res = await fetch(`${server_origin}${path}`, { tls: { rejectUnauthorized: false } }); let calls = 0; const rw = new HTMLRewriter(); rw.on("h1", { text() { calls++; }, }); const transformed = rw.transform(res); if (transformed instanceof Error) throw transformed; const body = await transformed.text(); let trimmed = body?.trim(); expect(body).toBe(fixture_html_content.toString("utf8")); expect(trimmed).toEndWith(""); expect(trimmed).toStartWith(""); expect(calls).toBeGreaterThan(0); }); }); }); const payloads = [ { name: "direct", data: getStream("direct", "none"), test: it.todo, }, { name: "default", data: getStream("default", "none"), test: it.todo, }, { name: "file", data: Bun.file(fixture_html), test: it, }, { name: "blob", data: new Blob([fixture_html_content]), test: it, }, { name: "buffer", data: fixture_html_content, test: it, }, { name: "string", data: fixture_html_content.toString("utf8"), test: it, }, ]; payloads.forEach(type => { type.test(`works with payload of type ${type.name}`, async () => { let calls = 0; const rw = new HTMLRewriter(); rw.on("h1", { text() { calls++; }, }); const transformed = rw.transform(new Response(type.data)); if (transformed instanceof Error) throw transformed; const body = await transformed.text(); let trimmed = body?.trim(); expect(body).toBe(fixture_html_content.toString("utf8")); expect(trimmed).toEndWith(""); expect(trimmed).toStartWith(""); expect(calls).toBeGreaterThan(0); }); });