fix(bundler): process resources inside <noscript> tags in HTML bundler

The HTML bundler was ignoring resources (stylesheets, images, etc.) inside `<noscript>` tags because lol-html treats noscript content as raw text when the scripting flag is enabled (which is the default). This fix adds manual parsing of noscript raw text content to: - Extract href/src/srcset/poster attribute values - Create import records for discovered resources - Rewrite URLs in the output to point to bundled files CSS files inside noscript are now pointed to the bundled CSS chunk, and other assets (images, etc.) get their URLs properly hashed. Fixes #25618 Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-16 13:51:47 +00:00 · 2026-01-12 21:05:43 +00:00
4 changed files with 532 additions and 1 deletions
--- a/src/HTMLScanner.zig
+++ b/src/HTMLScanner.zig
@@ -66,6 +66,11 @@ pub fn onTag(this: *HTMLScanner, _: *lol.Element, path: []const u8, url_attribut
    this.createImportRecord(path, kind) catch {};
 }

+/// Handle URLs found inside noscript elements (parsed from raw text content)
+pub fn onNoscriptUrl(this: *HTMLScanner, path: []const u8, kind: ImportKind) void {
+    this.createImportRecord(path, kind) catch {};
+}
+
 const processor = HTMLProcessor(HTMLScanner, false);

 pub fn scan(this: *HTMLScanner, input: []const u8) !void {
@@ -198,6 +203,181 @@ pub fn HTMLProcessor(
            //     },
        };

+        /// URL location within noscript content for replacement
+        pub const NoscriptUrlLocation = struct {
+            start: usize,
+            end: usize,
+            kind: ImportKind,
+        };
+
+        /// Maximum number of URLs we expect to find in a single noscript element
+        const max_noscript_urls = 32;
+
+        /// Parse noscript raw text content to find URL locations.
+        /// Returns a list of (start, end, kind) tuples for each URL found.
+        /// Since lol-html treats noscript content as raw text (scripting flag is enabled),
+        /// we manually parse the content to find resource references.
+        fn findNoscriptUrls(content: []const u8) bun.BoundedArray(NoscriptUrlLocation, max_noscript_urls) {
+            var urls: bun.BoundedArray(NoscriptUrlLocation, max_noscript_urls) = .{};
+
+            // Parse href attributes (for <link> stylesheets)
+            var offset: usize = 0;
+            while (offset < content.len) {
+                // Look for href=" or href='
+                if (std.mem.indexOfPos(u8, content, offset, "href=")) |href_pos| {
+                    const quote_pos = href_pos + 5;
+                    if (quote_pos < content.len) {
+                        const quote_char = content[quote_pos];
+                        if (quote_char == '"' or quote_char == '\'') {
+                            const value_start = quote_pos + 1;
+                            if (std.mem.indexOfScalarPos(u8, content, value_start, quote_char)) |value_end| {
+                                // Check if this is a stylesheet link by looking for rel="stylesheet" nearby
+                                // Note: In streaming mode, the '<' might be in a previous chunk, so we check
+                                // for "link" without requiring the '<' prefix
+                                const tag_start = std.mem.lastIndexOfScalar(u8, content[0..href_pos], '<') orelse 0;
+                                const tag_end = std.mem.indexOfScalarPos(u8, content, href_pos, '>') orelse content.len;
+                                const tag_content = content[tag_start..tag_end];
+                                // Check for <link or just "link" at start (streaming might split the '<')
+                                const is_link = std.mem.indexOf(u8, tag_content, "<link") != null or
+                                    (tag_start == 0 and tag_content.len >= 4 and std.mem.startsWith(u8, tag_content, "link"));
+                                if (is_link) {
+                                    const kind: ImportKind = if (std.mem.indexOf(u8, tag_content, "rel=\"stylesheet\"") != null or
+                                        std.mem.indexOf(u8, tag_content, "rel='stylesheet'") != null)
+                                        .at
+                                    else
+                                        .url;
+                                    urls.append(.{ .start = value_start, .end = value_end, .kind = kind }) catch break;
+                                }
+                                offset = value_end + 1;
+                                continue;
+                            }
+                        }
+                    }
+                    offset = href_pos + 1;
+                } else {
+                    break;
+                }
+            }
+
+            // Parse src attributes (for <script>, <img>, <video>, <audio>, <source>)
+            offset = 0;
+            while (offset < content.len) {
+                if (std.mem.indexOfPos(u8, content, offset, "src=")) |src_pos| {
+                    // Make sure this is not "srcset="
+                    if (src_pos > 0 and content[src_pos - 1] == 'c') {
+                        offset = src_pos + 1;
+                        continue;
+                    }
+                    const quote_pos = src_pos + 4;
+                    if (quote_pos < content.len) {
+                        const quote_char = content[quote_pos];
+                        if (quote_char == '"' or quote_char == '\'') {
+                            const value_start = quote_pos + 1;
+                            if (std.mem.indexOfScalarPos(u8, content, value_start, quote_char)) |value_end| {
+                                // Determine the kind based on the tag
+                                // Note: In streaming mode, the '<' might be in a previous chunk
+                                const tag_start = std.mem.lastIndexOfScalar(u8, content[0..src_pos], '<') orelse 0;
+                                const tag_content = content[tag_start..src_pos];
+                                const is_script = std.mem.indexOf(u8, tag_content, "<script") != null or
+                                    (tag_start == 0 and std.mem.startsWith(u8, tag_content, "script"));
+                                const kind: ImportKind = if (is_script)
+                                    .stmt
+                                else
+                                    .url; // img, video, audio, source, etc.
+                                urls.append(.{ .start = value_start, .end = value_end, .kind = kind }) catch break;
+                                offset = value_end + 1;
+                                continue;
+                            }
+                        }
+                    }
+                    offset = src_pos + 1;
+                } else {
+                    break;
+                }
+            }
+
+            // Parse srcset attributes (for <img>, <source>)
+            offset = 0;
+            while (offset < content.len) {
+                if (std.mem.indexOfPos(u8, content, offset, "srcset=")) |srcset_pos| {
+                    const quote_pos = srcset_pos + 7;
+                    if (quote_pos < content.len) {
+                        const quote_char = content[quote_pos];
+                        if (quote_char == '"' or quote_char == '\'') {
+                            const value_start = quote_pos + 1;
+                            if (std.mem.indexOfScalarPos(u8, content, value_start, quote_char)) |value_end| {
+                                urls.append(.{ .start = value_start, .end = value_end, .kind = .url }) catch break;
+                                offset = value_end + 1;
+                                continue;
+                            }
+                        }
+                    }
+                    offset = srcset_pos + 1;
+                } else {
+                    break;
+                }
+            }
+
+            // Parse poster attributes (for <video>)
+            offset = 0;
+            while (offset < content.len) {
+                if (std.mem.indexOfPos(u8, content, offset, "poster=")) |poster_pos| {
+                    const quote_pos = poster_pos + 7;
+                    if (quote_pos < content.len) {
+                        const quote_char = content[quote_pos];
+                        if (quote_char == '"' or quote_char == '\'') {
+                            const value_start = quote_pos + 1;
+                            if (std.mem.indexOfScalarPos(u8, content, value_start, quote_char)) |value_end| {
+                                urls.append(.{ .start = value_start, .end = value_end, .kind = .url }) catch break;
+                                offset = value_end + 1;
+                                continue;
+                            }
+                        }
+                    }
+                    offset = poster_pos + 1;
+                } else {
+                    break;
+                }
+            }
+
+            // Sort by start position to process in order
+            std.mem.sort(NoscriptUrlLocation, urls.slice(), {}, struct {
+                pub fn lessThan(_: void, a: NoscriptUrlLocation, b: NoscriptUrlLocation) bool {
+                    return a.start < b.start;
+                }
+            }.lessThan);
+
+            return urls;
+        }
+
+        fn handleNoscriptText(this: *T, text_chunk: *lol.TextChunk) bool {
+            const chunk_content = text_chunk.getContent();
+            if (chunk_content.len == 0) return false;
+
+            const content = chunk_content.slice();
+
+            // Find all URLs in the content
+            var urls = findNoscriptUrls(content);
+
+            if (urls.len == 0) return false;
+
+            // Call onNoscriptUrl for each URL found
+            for (urls.slice()) |url_loc| {
+                const url_value = content[url_loc.start..url_loc.end];
+                debug("noscript url: {s} kind={}", .{ url_value, url_loc.kind });
+                T.onNoscriptUrl(this, url_value, url_loc.kind);
+            }
+
+            // If the type has a rewriteNoscriptContent method, use it to replace the content
+            if (@hasDecl(T, "rewriteNoscriptContent")) {
+                if (T.rewriteNoscriptContent(this, content, urls.slice(), text_chunk)) {
+                    return false;
+                }
+            }
+
+            return false;
+        }
+
        fn generateHandlerForTag(comptime tag_info: TagHandler) fn (*T, *lol.Element) bool {
            const Handler = struct {
                pub fn handle(this: *T, element: *lol.Element) bool {
@@ -222,7 +402,8 @@ pub fn HTMLProcessor(
            var builder = lol.HTMLRewriter.Builder.init();
            defer builder.deinit();

-            var selectors: bun.BoundedArray(*lol.HTMLSelector, tag_handlers.len + if (visit_document_tags) 3 else 0) = .{};
+            // +1 for noscript handler
+            var selectors: bun.BoundedArray(*lol.HTMLSelector, tag_handlers.len + 1 + if (visit_document_tags) 3 else 0) = .{};
            defer for (selectors.slice()) |selector| {
                selector.deinit();
            };
@@ -245,6 +426,26 @@ pub fn HTMLProcessor(
                );
            }

+            // Add noscript handler with text content handler to parse raw content
+            // The HTML parser treats noscript content as raw text (scripting flag enabled),
+            // so we use a text handler to capture and parse the content manually.
+            if (@hasDecl(T, "onNoscriptUrl")) {
+                const noscript_selector = try lol.HTMLSelector.parse("noscript");
+                selectors.appendAssumeCapacity(noscript_selector);
+                try builder.addElementContentHandlers(
+                    noscript_selector,
+                    void, // No element handler needed
+                    null,
+                    null,
+                    void,
+                    null,
+                    null,
+                    T,
+                    handleNoscriptText,
+                    this,
+                );
+            }
+
            if (visit_document_tags) {
                inline for (.{ "body", "head", "html" }, &.{ T.onBodyTag, T.onHeadTag, T.onHtmlTag }) |tag, cb| {
                    const head_selector = try lol.HTMLSelector.parse(tag);
--- a/src/bundler/linker_context/generateCompileResultForHtmlChunk.zig
+++ b/src/bundler/linker_context/generateCompileResultForHtmlChunk.zig
@@ -113,6 +113,98 @@ fn generateCompileResultForHTMLChunkImpl(worker: *ThreadPool.Worker, c: *LinkerC
            }
        }

+        /// Handle URLs found inside noscript elements (parsed from raw text content).
+        /// This is called during the rewrite phase - we don't need to do anything here
+        /// since rewriteNoscriptContent handles the actual replacement.
+        pub fn onNoscriptUrl(_: *@This(), _: []const u8, _: ImportKind) void {
+            // The actual URL replacement is handled by rewriteNoscriptContent
+        }
+
+        /// Rewrite noscript content by replacing URLs with their resolved paths.
+        /// This is called during the HTML rewrite phase to update URLs found in noscript raw text.
+        pub fn rewriteNoscriptContent(
+            this: *@This(),
+            content: []const u8,
+            url_locations: anytype,
+            text_chunk: *lol.TextChunk,
+        ) bool {
+            if (url_locations.len == 0) return false;
+
+            // Build new content with replaced URLs
+            var new_content = std.array_list.Managed(u8).init(this.allocator);
+            defer new_content.deinit();
+
+            var last_end: usize = 0;
+            for (url_locations) |url_loc| {
+                if (this.current_import_record_index >= this.import_records.len) {
+                    Output.panic("Assertion failure in HTMLLoader.rewriteNoscriptContent: current_import_record_index ({d}) >= import_records.len ({d})", .{ this.current_import_record_index, this.import_records.len });
+                }
+
+                const import_record: *const ImportRecord = &this.import_records[this.current_import_record_index];
+                this.current_import_record_index += 1;
+
+                // Get the replacement URL
+                const unique_key_for_additional_files = if (import_record.source_index.isValid())
+                    this.linker.parse_graph.input_files.items(.unique_key_for_additional_file)[import_record.source_index.get()]
+                else
+                    "";
+                const loader: Loader = if (import_record.source_index.isValid())
+                    this.linker.parse_graph.input_files.items(.loader)[import_record.source_index.get()]
+                else
+                    .file;
+
+                // Append content before this URL
+                new_content.appendSlice(content[last_end..url_loc.start]) catch return false;
+
+                // Determine what to replace with
+                if (import_record.flags.is_external_without_side_effects) {
+                    // Keep external imports as-is
+                    new_content.appendSlice(content[url_loc.start..url_loc.end]) catch return false;
+                } else if (this.linker.dev_server != null) {
+                    if (unique_key_for_additional_files.len > 0) {
+                        new_content.appendSlice(unique_key_for_additional_files) catch return false;
+                    } else if (import_record.path.is_disabled or loader.isJavaScriptLike() or loader.isCSS()) {
+                        // For dev server, keep the original path for CSS/JS that will be handled separately
+                        new_content.appendSlice(content[url_loc.start..url_loc.end]) catch return false;
+                    } else {
+                        new_content.appendSlice(import_record.path.pretty) catch return false;
+                    }
+                } else if (import_record.source_index.isInvalid()) {
+                    // Keep as-is if source index is invalid
+                    new_content.appendSlice(content[url_loc.start..url_loc.end]) catch return false;
+                } else if (loader.isJavaScriptLike() or loader.isCSS()) {
+                    // For CSS/JS in noscript, point to the bundled chunk's unique key
+                    // The bundler creates a combined CSS/JS chunk for all files of that type
+                    if (loader.isCSS()) {
+                        if (this.chunk.getCSSChunkForHTML(this.chunks)) |css_chunk| {
+                            new_content.appendSlice(css_chunk.unique_key) catch return false;
+                        } else {
+                            new_content.appendSlice(content[url_loc.start..url_loc.end]) catch return false;
+                        }
+                    } else {
+                        // For JS, keep as-is for now (noscript with JS is unusual)
+                        new_content.appendSlice(content[url_loc.start..url_loc.end]) catch return false;
+                    }
+                } else if (unique_key_for_additional_files.len > 0) {
+                    // Replace with the unique key for assets (images, etc.)
+                    new_content.appendSlice(unique_key_for_additional_files) catch return false;
+                } else {
+                    // Keep as-is for other cases
+                    new_content.appendSlice(content[url_loc.start..url_loc.end]) catch return false;
+                }
+
+                last_end = url_loc.end;
+            }
+
+            // Append remaining content after last URL
+            new_content.appendSlice(content[last_end..]) catch return false;
+
+            // Replace the text chunk with new content (true = treat as HTML to avoid escaping)
+            text_chunk.replace(new_content.items, true) catch return false;
+
+            return true;
+        }
+
        pub fn onHeadTag(this: *@This(), element: *lol.Element) bool {
            element.onEndTag(endHeadTagHandler, this) catch return true;
            return false;
--- a/test/bundler/bundler_html.test.ts
+++ b/test/bundler/bundler_html.test.ts
@@ -843,4 +843,107 @@ body {
      api.expectFile("out/" + jsFile).toContain("sourceMappingURL");
    },
  });
+
+  // Test that resources inside <noscript> tags are bundled - issue #25618
+  itBundled("html/noscript-resources", {
+    outdir: "out/",
+    files: {
+      "/index.html": `
+<!DOCTYPE html>
+<html lang="en">
+  <head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <noscript><link rel="stylesheet" href="noscript.css"></noscript>
+    <link rel="stylesheet" href="main.css">
+  </head>
+  <body>
+    <p>Hello, World!</p>
+    <noscript>
+      <img src="fallback.jpg" alt="Fallback image">
+    </noscript>
+  </body>
+</html>`,
+      "/noscript.css": `
+p {
+  color: red;
+}`,
+      "/main.css": `
+body {
+  margin: 0;
+}`,
+      "/fallback.jpg": "fake image content",
+    },
+    entryPoints: ["/index.html"],
+    onAfterBundle(api) {
+      const htmlContent = api.readFile("out/index.html");
+
+      // Check that noscript CSS is bundled and referenced
+      expect(htmlContent).not.toContain('href="noscript.css"');
+      expect(htmlContent).not.toContain('href="main.css"');
+      expect(htmlContent).toMatch(/href=".*\.css"/);
+
+      // Check that noscript image is bundled and referenced
+      expect(htmlContent).not.toContain('src="fallback.jpg"');
+      expect(htmlContent).toMatch(/<noscript>\s*<img src=".*\.jpg"/);
+
+      // Verify there are hashed CSS files for both stylesheets
+      const cssMatches = htmlContent.match(/href="([^"]*\.css)"/g);
+      expect(cssMatches).not.toBeNull();
+      expect(cssMatches!.length).toBeGreaterThanOrEqual(1);
+
+      // Verify the CSS content exists
+      const cssPath = htmlContent.match(/href="([^"]*\.css)"/)?.[1];
+      const cssBundle = api.readFile("out/" + cssPath!);
+      expect(cssBundle).toContain("color:");
+      expect(cssBundle).toContain("margin:");
+    },
+  });
+
+  // Test noscript with script tags - issue #25618
+  itBundled("html/noscript-script", {
+    outdir: "out/",
+    files: {
+      "/index.html": `
+<!DOCTYPE html>
+<html>
+  <head>
+    <script src="main.js"></script>
+    <noscript>
+      <link rel="stylesheet" href="nojs-styles.css">
+    </noscript>
+  </head>
+  <body>
+    <div id="app"></div>
+  </body>
+</html>`,
+      "/main.js": `console.log('App loaded');`,
+      "/nojs-styles.css": `
+#app {
+  display: none;
+}
+.no-js-message {
+  display: block;
+}`,
+    },
+    entryPoints: ["/index.html"],
+    onAfterBundle(api) {
+      const htmlContent = api.readFile("out/index.html");
+
+      // Main JS should be bundled
+      expect(htmlContent).not.toContain('src="main.js"');
+      expect(htmlContent).toMatch(/src=".*\.js"/);
+
+      // Noscript CSS should be bundled
+      expect(htmlContent).not.toContain('href="nojs-styles.css"');
+
+      // Get the CSS file from inside noscript
+      const noscriptMatch = htmlContent.match(/<noscript>\s*<link[^>]*href="([^"]*\.css)"/);
+      expect(noscriptMatch).not.toBeNull();
+
+      const cssBundle = api.readFile("out/" + noscriptMatch![1]);
+      expect(cssBundle).toContain("#app");
+      expect(cssBundle).toContain(".no-js-message");
+    },
+  });
 });
--- a/test/regression/issue/25618.test.ts
+++ b/test/regression/issue/25618.test.ts
@@ -0,0 +1,135 @@
+// https://github.com/oven-sh/bun/issues/25618
+// HTML bundler ignores resources inside <noscript> tags
+
+import { expect, test } from "bun:test";
+import { existsSync, readdirSync, readFileSync } from "fs";
+import { bunEnv, bunExe, tempDir } from "harness";
+import { join } from "path";
+
+test("bun build bundles CSS inside noscript tags - issue #25618", async () => {
+  using dir = tempDir("25618-noscript-css", {
+    "index.html": `<!DOCTYPE html>
+<html lang="en">
+  <head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <noscript><link rel="stylesheet" href="noscript.css"></noscript>
+  </head>
+  <body>
+    <p>Hello, World!</p>
+  </body>
+</html>`,
+    "noscript.css": `p {
+  color: red;
+}`,
+  });
+
+  await using proc = Bun.spawn({
+    cmd: [bunExe(), "build", "./index.html", "--outdir", "./output"],
+    env: bunEnv,
+    cwd: String(dir),
+    stdout: "pipe",
+    stderr: "pipe",
+  });
+
+  const [stdout, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]);
+
+  expect(exitCode).toBe(0);
+
+  // Check that output directory exists
+  const outputDir = join(String(dir), "output");
+  expect(existsSync(outputDir)).toBe(true);
+
+  // Check that CSS file was created (with hash)
+  const files = readdirSync(outputDir);
+  const cssFiles = files.filter((f: string) => f.endsWith(".css"));
+  expect(cssFiles.length).toBeGreaterThan(0);
+
+  // Check that HTML references the hashed CSS, not the original
+  const htmlContent = readFileSync(join(outputDir, "index.html"), "utf-8");
+  expect(htmlContent).not.toContain('href="noscript.css"');
+  expect(htmlContent).toMatch(/href="[^"]*\.css"/);
+
+  // Check that the CSS file contains the expected content
+  const cssContent = readFileSync(join(outputDir, cssFiles[0]), "utf-8");
+  expect(cssContent).toContain("color:");
+});
+
+test("bun build bundles images inside noscript tags - issue #25618", async () => {
+  using dir = tempDir("25618-noscript-img", {
+    "index.html": `<!DOCTYPE html>
+<html>
+  <body>
+    <noscript>
+      <img src="fallback.png" alt="Fallback">
+    </noscript>
+  </body>
+</html>`,
+    "fallback.png": "fake png content",
+  });
+
+  await using proc = Bun.spawn({
+    cmd: [bunExe(), "build", "./index.html", "--outdir", "./output"],
+    env: bunEnv,
+    cwd: String(dir),
+    stdout: "pipe",
+    stderr: "pipe",
+  });
+
+  const [stdout, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]);
+
+  expect(exitCode).toBe(0);
+
+  // Check that output directory exists
+  const outputDir = join(String(dir), "output");
+  expect(existsSync(outputDir)).toBe(true);
+
+  // Check that image file was created (with hash)
+  const files = readdirSync(outputDir);
+  const imgFiles = files.filter((f: string) => f.endsWith(".png"));
+  expect(imgFiles.length).toBeGreaterThan(0);
+
+  // Check that HTML references the hashed image, not the original
+  const htmlContent = readFileSync(join(outputDir, "index.html"), "utf-8");
+  expect(htmlContent).not.toContain('src="fallback.png"');
+  expect(htmlContent).toMatch(/src="[^"]*\.png"/);
+});
+
+test("bun build bundles scripts inside noscript tags - issue #25618", async () => {
+  using dir = tempDir("25618-noscript-script", {
+    "index.html": `<!DOCTYPE html>
+<html>
+  <head>
+    <noscript>
+      <meta http-equiv="refresh" content="0; url=nojs.html">
+    </noscript>
+  </head>
+  <body>
+    <noscript>
+      <link rel="stylesheet" href="nojs.css">
+    </noscript>
+  </body>
+</html>`,
+    "nojs.css": `body { background: yellow; }`,
+  });
+
+  await using proc = Bun.spawn({
+    cmd: [bunExe(), "build", "./index.html", "--outdir", "./output"],
+    env: bunEnv,
+    cwd: String(dir),
+    stdout: "pipe",
+    stderr: "pipe",
+  });
+
+  const [stdout, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]);
+
+  expect(exitCode).toBe(0);
+
+  const outputDir = join(String(dir), "output");
+  const files = readdirSync(outputDir);
+  const cssFiles = files.filter((f: string) => f.endsWith(".css"));
+  expect(cssFiles.length).toBeGreaterThan(0);
+
+  const htmlContent = readFileSync(join(outputDir, "index.html"), "utf-8");
+  expect(htmlContent).not.toContain('href="nojs.css"');
+});