Compare commits

...

1 Commits

Author SHA1 Message Date
Claude Bot
f4d71a687a fix(bundler): process resources inside <noscript> tags in HTML bundler
The HTML bundler was ignoring resources (stylesheets, images, etc.)
inside `<noscript>` tags because lol-html treats noscript content
as raw text when the scripting flag is enabled (which is the default).

This fix adds manual parsing of noscript raw text content to:
- Extract href/src/srcset/poster attribute values
- Create import records for discovered resources
- Rewrite URLs in the output to point to bundled files

CSS files inside noscript are now pointed to the bundled CSS chunk,
and other assets (images, etc.) get their URLs properly hashed.

Fixes #25618

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-12 21:05:43 +00:00
4 changed files with 532 additions and 1 deletions

View File

@@ -66,6 +66,11 @@ pub fn onTag(this: *HTMLScanner, _: *lol.Element, path: []const u8, url_attribut
this.createImportRecord(path, kind) catch {};
}
/// Handle URLs found inside noscript elements (parsed from raw text content)
pub fn onNoscriptUrl(this: *HTMLScanner, path: []const u8, kind: ImportKind) void {
this.createImportRecord(path, kind) catch {};
}
const processor = HTMLProcessor(HTMLScanner, false);
pub fn scan(this: *HTMLScanner, input: []const u8) !void {
@@ -198,6 +203,181 @@ pub fn HTMLProcessor(
// },
};
/// URL location within noscript content for replacement
pub const NoscriptUrlLocation = struct {
start: usize,
end: usize,
kind: ImportKind,
};
/// Maximum number of URLs we expect to find in a single noscript element
const max_noscript_urls = 32;
/// Parse noscript raw text content to find URL locations.
/// Returns a list of (start, end, kind) tuples for each URL found.
/// Since lol-html treats noscript content as raw text (scripting flag is enabled),
/// we manually parse the content to find resource references.
fn findNoscriptUrls(content: []const u8) bun.BoundedArray(NoscriptUrlLocation, max_noscript_urls) {
var urls: bun.BoundedArray(NoscriptUrlLocation, max_noscript_urls) = .{};
// Parse href attributes (for <link> stylesheets)
var offset: usize = 0;
while (offset < content.len) {
// Look for href=" or href='
if (std.mem.indexOfPos(u8, content, offset, "href=")) |href_pos| {
const quote_pos = href_pos + 5;
if (quote_pos < content.len) {
const quote_char = content[quote_pos];
if (quote_char == '"' or quote_char == '\'') {
const value_start = quote_pos + 1;
if (std.mem.indexOfScalarPos(u8, content, value_start, quote_char)) |value_end| {
// Check if this is a stylesheet link by looking for rel="stylesheet" nearby
// Note: In streaming mode, the '<' might be in a previous chunk, so we check
// for "link" without requiring the '<' prefix
const tag_start = std.mem.lastIndexOfScalar(u8, content[0..href_pos], '<') orelse 0;
const tag_end = std.mem.indexOfScalarPos(u8, content, href_pos, '>') orelse content.len;
const tag_content = content[tag_start..tag_end];
// Check for <link or just "link" at start (streaming might split the '<')
const is_link = std.mem.indexOf(u8, tag_content, "<link") != null or
(tag_start == 0 and tag_content.len >= 4 and std.mem.startsWith(u8, tag_content, "link"));
if (is_link) {
const kind: ImportKind = if (std.mem.indexOf(u8, tag_content, "rel=\"stylesheet\"") != null or
std.mem.indexOf(u8, tag_content, "rel='stylesheet'") != null)
.at
else
.url;
urls.append(.{ .start = value_start, .end = value_end, .kind = kind }) catch break;
}
offset = value_end + 1;
continue;
}
}
}
offset = href_pos + 1;
} else {
break;
}
}
// Parse src attributes (for <script>, <img>, <video>, <audio>, <source>)
offset = 0;
while (offset < content.len) {
if (std.mem.indexOfPos(u8, content, offset, "src=")) |src_pos| {
// Make sure this is not "srcset="
if (src_pos > 0 and content[src_pos - 1] == 'c') {
offset = src_pos + 1;
continue;
}
const quote_pos = src_pos + 4;
if (quote_pos < content.len) {
const quote_char = content[quote_pos];
if (quote_char == '"' or quote_char == '\'') {
const value_start = quote_pos + 1;
if (std.mem.indexOfScalarPos(u8, content, value_start, quote_char)) |value_end| {
// Determine the kind based on the tag
// Note: In streaming mode, the '<' might be in a previous chunk
const tag_start = std.mem.lastIndexOfScalar(u8, content[0..src_pos], '<') orelse 0;
const tag_content = content[tag_start..src_pos];
const is_script = std.mem.indexOf(u8, tag_content, "<script") != null or
(tag_start == 0 and std.mem.startsWith(u8, tag_content, "script"));
const kind: ImportKind = if (is_script)
.stmt
else
.url; // img, video, audio, source, etc.
urls.append(.{ .start = value_start, .end = value_end, .kind = kind }) catch break;
offset = value_end + 1;
continue;
}
}
}
offset = src_pos + 1;
} else {
break;
}
}
// Parse srcset attributes (for <img>, <source>)
offset = 0;
while (offset < content.len) {
if (std.mem.indexOfPos(u8, content, offset, "srcset=")) |srcset_pos| {
const quote_pos = srcset_pos + 7;
if (quote_pos < content.len) {
const quote_char = content[quote_pos];
if (quote_char == '"' or quote_char == '\'') {
const value_start = quote_pos + 1;
if (std.mem.indexOfScalarPos(u8, content, value_start, quote_char)) |value_end| {
urls.append(.{ .start = value_start, .end = value_end, .kind = .url }) catch break;
offset = value_end + 1;
continue;
}
}
}
offset = srcset_pos + 1;
} else {
break;
}
}
// Parse poster attributes (for <video>)
offset = 0;
while (offset < content.len) {
if (std.mem.indexOfPos(u8, content, offset, "poster=")) |poster_pos| {
const quote_pos = poster_pos + 7;
if (quote_pos < content.len) {
const quote_char = content[quote_pos];
if (quote_char == '"' or quote_char == '\'') {
const value_start = quote_pos + 1;
if (std.mem.indexOfScalarPos(u8, content, value_start, quote_char)) |value_end| {
urls.append(.{ .start = value_start, .end = value_end, .kind = .url }) catch break;
offset = value_end + 1;
continue;
}
}
}
offset = poster_pos + 1;
} else {
break;
}
}
// Sort by start position to process in order
std.mem.sort(NoscriptUrlLocation, urls.slice(), {}, struct {
pub fn lessThan(_: void, a: NoscriptUrlLocation, b: NoscriptUrlLocation) bool {
return a.start < b.start;
}
}.lessThan);
return urls;
}
fn handleNoscriptText(this: *T, text_chunk: *lol.TextChunk) bool {
const chunk_content = text_chunk.getContent();
if (chunk_content.len == 0) return false;
const content = chunk_content.slice();
// Find all URLs in the content
var urls = findNoscriptUrls(content);
if (urls.len == 0) return false;
// Call onNoscriptUrl for each URL found
for (urls.slice()) |url_loc| {
const url_value = content[url_loc.start..url_loc.end];
debug("noscript url: {s} kind={}", .{ url_value, url_loc.kind });
T.onNoscriptUrl(this, url_value, url_loc.kind);
}
// If the type has a rewriteNoscriptContent method, use it to replace the content
if (@hasDecl(T, "rewriteNoscriptContent")) {
if (T.rewriteNoscriptContent(this, content, urls.slice(), text_chunk)) {
return false;
}
}
return false;
}
fn generateHandlerForTag(comptime tag_info: TagHandler) fn (*T, *lol.Element) bool {
const Handler = struct {
pub fn handle(this: *T, element: *lol.Element) bool {
@@ -222,7 +402,8 @@ pub fn HTMLProcessor(
var builder = lol.HTMLRewriter.Builder.init();
defer builder.deinit();
var selectors: bun.BoundedArray(*lol.HTMLSelector, tag_handlers.len + if (visit_document_tags) 3 else 0) = .{};
// +1 for noscript handler
var selectors: bun.BoundedArray(*lol.HTMLSelector, tag_handlers.len + 1 + if (visit_document_tags) 3 else 0) = .{};
defer for (selectors.slice()) |selector| {
selector.deinit();
};
@@ -245,6 +426,26 @@ pub fn HTMLProcessor(
);
}
// Add noscript handler with text content handler to parse raw content
// The HTML parser treats noscript content as raw text (scripting flag enabled),
// so we use a text handler to capture and parse the content manually.
if (@hasDecl(T, "onNoscriptUrl")) {
const noscript_selector = try lol.HTMLSelector.parse("noscript");
selectors.appendAssumeCapacity(noscript_selector);
try builder.addElementContentHandlers(
noscript_selector,
void, // No element handler needed
null,
null,
void,
null,
null,
T,
handleNoscriptText,
this,
);
}
if (visit_document_tags) {
inline for (.{ "body", "head", "html" }, &.{ T.onBodyTag, T.onHeadTag, T.onHtmlTag }) |tag, cb| {
const head_selector = try lol.HTMLSelector.parse(tag);

View File

@@ -113,6 +113,98 @@ fn generateCompileResultForHTMLChunkImpl(worker: *ThreadPool.Worker, c: *LinkerC
}
}
/// Handle URLs found inside noscript elements (parsed from raw text content).
/// This is called during the rewrite phase - we don't need to do anything here
/// since rewriteNoscriptContent handles the actual replacement.
pub fn onNoscriptUrl(_: *@This(), _: []const u8, _: ImportKind) void {
// The actual URL replacement is handled by rewriteNoscriptContent
}
/// Rewrite noscript content by replacing URLs with their resolved paths.
/// This is called during the HTML rewrite phase to update URLs found in noscript raw text.
pub fn rewriteNoscriptContent(
this: *@This(),
content: []const u8,
url_locations: anytype,
text_chunk: *lol.TextChunk,
) bool {
if (url_locations.len == 0) return false;
// Build new content with replaced URLs
var new_content = std.array_list.Managed(u8).init(this.allocator);
defer new_content.deinit();
var last_end: usize = 0;
for (url_locations) |url_loc| {
if (this.current_import_record_index >= this.import_records.len) {
Output.panic("Assertion failure in HTMLLoader.rewriteNoscriptContent: current_import_record_index ({d}) >= import_records.len ({d})", .{ this.current_import_record_index, this.import_records.len });
}
const import_record: *const ImportRecord = &this.import_records[this.current_import_record_index];
this.current_import_record_index += 1;
// Get the replacement URL
const unique_key_for_additional_files = if (import_record.source_index.isValid())
this.linker.parse_graph.input_files.items(.unique_key_for_additional_file)[import_record.source_index.get()]
else
"";
const loader: Loader = if (import_record.source_index.isValid())
this.linker.parse_graph.input_files.items(.loader)[import_record.source_index.get()]
else
.file;
// Append content before this URL
new_content.appendSlice(content[last_end..url_loc.start]) catch return false;
// Determine what to replace with
if (import_record.flags.is_external_without_side_effects) {
// Keep external imports as-is
new_content.appendSlice(content[url_loc.start..url_loc.end]) catch return false;
} else if (this.linker.dev_server != null) {
if (unique_key_for_additional_files.len > 0) {
new_content.appendSlice(unique_key_for_additional_files) catch return false;
} else if (import_record.path.is_disabled or loader.isJavaScriptLike() or loader.isCSS()) {
// For dev server, keep the original path for CSS/JS that will be handled separately
new_content.appendSlice(content[url_loc.start..url_loc.end]) catch return false;
} else {
new_content.appendSlice(import_record.path.pretty) catch return false;
}
} else if (import_record.source_index.isInvalid()) {
// Keep as-is if source index is invalid
new_content.appendSlice(content[url_loc.start..url_loc.end]) catch return false;
} else if (loader.isJavaScriptLike() or loader.isCSS()) {
// For CSS/JS in noscript, point to the bundled chunk's unique key
// The bundler creates a combined CSS/JS chunk for all files of that type
if (loader.isCSS()) {
if (this.chunk.getCSSChunkForHTML(this.chunks)) |css_chunk| {
new_content.appendSlice(css_chunk.unique_key) catch return false;
} else {
new_content.appendSlice(content[url_loc.start..url_loc.end]) catch return false;
}
} else {
// For JS, keep as-is for now (noscript with JS is unusual)
new_content.appendSlice(content[url_loc.start..url_loc.end]) catch return false;
}
} else if (unique_key_for_additional_files.len > 0) {
// Replace with the unique key for assets (images, etc.)
new_content.appendSlice(unique_key_for_additional_files) catch return false;
} else {
// Keep as-is for other cases
new_content.appendSlice(content[url_loc.start..url_loc.end]) catch return false;
}
last_end = url_loc.end;
}
// Append remaining content after last URL
new_content.appendSlice(content[last_end..]) catch return false;
// Replace the text chunk with new content (true = treat as HTML to avoid escaping)
text_chunk.replace(new_content.items, true) catch return false;
return true;
}
pub fn onHeadTag(this: *@This(), element: *lol.Element) bool {
element.onEndTag(endHeadTagHandler, this) catch return true;
return false;

View File

@@ -843,4 +843,107 @@ body {
api.expectFile("out/" + jsFile).toContain("sourceMappingURL");
},
});
// Test that resources inside <noscript> tags are bundled - issue #25618
itBundled("html/noscript-resources", {
outdir: "out/",
files: {
"/index.html": `
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<noscript><link rel="stylesheet" href="noscript.css"></noscript>
<link rel="stylesheet" href="main.css">
</head>
<body>
<p>Hello, World!</p>
<noscript>
<img src="fallback.jpg" alt="Fallback image">
</noscript>
</body>
</html>`,
"/noscript.css": `
p {
color: red;
}`,
"/main.css": `
body {
margin: 0;
}`,
"/fallback.jpg": "fake image content",
},
entryPoints: ["/index.html"],
onAfterBundle(api) {
const htmlContent = api.readFile("out/index.html");
// Check that noscript CSS is bundled and referenced
expect(htmlContent).not.toContain('href="noscript.css"');
expect(htmlContent).not.toContain('href="main.css"');
expect(htmlContent).toMatch(/href=".*\.css"/);
// Check that noscript image is bundled and referenced
expect(htmlContent).not.toContain('src="fallback.jpg"');
expect(htmlContent).toMatch(/<noscript>\s*<img src=".*\.jpg"/);
// Verify there are hashed CSS files for both stylesheets
const cssMatches = htmlContent.match(/href="([^"]*\.css)"/g);
expect(cssMatches).not.toBeNull();
expect(cssMatches!.length).toBeGreaterThanOrEqual(1);
// Verify the CSS content exists
const cssPath = htmlContent.match(/href="([^"]*\.css)"/)?.[1];
const cssBundle = api.readFile("out/" + cssPath!);
expect(cssBundle).toContain("color:");
expect(cssBundle).toContain("margin:");
},
});
// Test noscript with script tags - issue #25618
itBundled("html/noscript-script", {
outdir: "out/",
files: {
"/index.html": `
<!DOCTYPE html>
<html>
<head>
<script src="main.js"></script>
<noscript>
<link rel="stylesheet" href="nojs-styles.css">
</noscript>
</head>
<body>
<div id="app"></div>
</body>
</html>`,
"/main.js": `console.log('App loaded');`,
"/nojs-styles.css": `
#app {
display: none;
}
.no-js-message {
display: block;
}`,
},
entryPoints: ["/index.html"],
onAfterBundle(api) {
const htmlContent = api.readFile("out/index.html");
// Main JS should be bundled
expect(htmlContent).not.toContain('src="main.js"');
expect(htmlContent).toMatch(/src=".*\.js"/);
// Noscript CSS should be bundled
expect(htmlContent).not.toContain('href="nojs-styles.css"');
// Get the CSS file from inside noscript
const noscriptMatch = htmlContent.match(/<noscript>\s*<link[^>]*href="([^"]*\.css)"/);
expect(noscriptMatch).not.toBeNull();
const cssBundle = api.readFile("out/" + noscriptMatch![1]);
expect(cssBundle).toContain("#app");
expect(cssBundle).toContain(".no-js-message");
},
});
});

View File

@@ -0,0 +1,135 @@
// https://github.com/oven-sh/bun/issues/25618
// HTML bundler ignores resources inside <noscript> tags
import { expect, test } from "bun:test";
import { existsSync, readdirSync, readFileSync } from "fs";
import { bunEnv, bunExe, tempDir } from "harness";
import { join } from "path";
test("bun build bundles CSS inside noscript tags - issue #25618", async () => {
using dir = tempDir("25618-noscript-css", {
"index.html": `<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<noscript><link rel="stylesheet" href="noscript.css"></noscript>
</head>
<body>
<p>Hello, World!</p>
</body>
</html>`,
"noscript.css": `p {
color: red;
}`,
});
await using proc = Bun.spawn({
cmd: [bunExe(), "build", "./index.html", "--outdir", "./output"],
env: bunEnv,
cwd: String(dir),
stdout: "pipe",
stderr: "pipe",
});
const [stdout, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]);
expect(exitCode).toBe(0);
// Check that output directory exists
const outputDir = join(String(dir), "output");
expect(existsSync(outputDir)).toBe(true);
// Check that CSS file was created (with hash)
const files = readdirSync(outputDir);
const cssFiles = files.filter((f: string) => f.endsWith(".css"));
expect(cssFiles.length).toBeGreaterThan(0);
// Check that HTML references the hashed CSS, not the original
const htmlContent = readFileSync(join(outputDir, "index.html"), "utf-8");
expect(htmlContent).not.toContain('href="noscript.css"');
expect(htmlContent).toMatch(/href="[^"]*\.css"/);
// Check that the CSS file contains the expected content
const cssContent = readFileSync(join(outputDir, cssFiles[0]), "utf-8");
expect(cssContent).toContain("color:");
});
test("bun build bundles images inside noscript tags - issue #25618", async () => {
using dir = tempDir("25618-noscript-img", {
"index.html": `<!DOCTYPE html>
<html>
<body>
<noscript>
<img src="fallback.png" alt="Fallback">
</noscript>
</body>
</html>`,
"fallback.png": "fake png content",
});
await using proc = Bun.spawn({
cmd: [bunExe(), "build", "./index.html", "--outdir", "./output"],
env: bunEnv,
cwd: String(dir),
stdout: "pipe",
stderr: "pipe",
});
const [stdout, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]);
expect(exitCode).toBe(0);
// Check that output directory exists
const outputDir = join(String(dir), "output");
expect(existsSync(outputDir)).toBe(true);
// Check that image file was created (with hash)
const files = readdirSync(outputDir);
const imgFiles = files.filter((f: string) => f.endsWith(".png"));
expect(imgFiles.length).toBeGreaterThan(0);
// Check that HTML references the hashed image, not the original
const htmlContent = readFileSync(join(outputDir, "index.html"), "utf-8");
expect(htmlContent).not.toContain('src="fallback.png"');
expect(htmlContent).toMatch(/src="[^"]*\.png"/);
});
test("bun build bundles scripts inside noscript tags - issue #25618", async () => {
using dir = tempDir("25618-noscript-script", {
"index.html": `<!DOCTYPE html>
<html>
<head>
<noscript>
<meta http-equiv="refresh" content="0; url=nojs.html">
</noscript>
</head>
<body>
<noscript>
<link rel="stylesheet" href="nojs.css">
</noscript>
</body>
</html>`,
"nojs.css": `body { background: yellow; }`,
});
await using proc = Bun.spawn({
cmd: [bunExe(), "build", "./index.html", "--outdir", "./output"],
env: bunEnv,
cwd: String(dir),
stdout: "pipe",
stderr: "pipe",
});
const [stdout, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]);
expect(exitCode).toBe(0);
const outputDir = join(String(dir), "output");
const files = readdirSync(outputDir);
const cssFiles = files.filter((f: string) => f.endsWith(".css"));
expect(cssFiles.length).toBeGreaterThan(0);
const htmlContent = readFileSync(join(outputDir, "index.html"), "utf-8");
expect(htmlContent).not.toContain('href="nojs.css"');
});