Compare commits

...

2 Commits

Author SHA1 Message Date
Claude Bot
c057f444e1 fix: address review comments
- Use std.mem.startsWith for data- prefix check
- Add < and > escaping in attribute values
- Reorder test assertions (content before exit code)
- Test exact escaped JSON value
- Add negative assertion for merge test

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-18 09:39:41 +00:00
Claude Bot
6e0a60fac6 fix(bundler): preserve data-* attributes on script and link tags in HTML bundler
When bundling HTML files, the bundler was removing script and link tags
that reference JS/CSS files and injecting new tags for the bundled output.
However, this process was stripping all custom attributes including data-*
attributes from the original elements.

This fix captures data-* attributes from the first bundled script/link
element of each type and includes them on the corresponding injected tag
in the output HTML.

Fixes #26216

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-18 09:28:34 +00:00
2 changed files with 216 additions and 2 deletions

View File

@@ -49,6 +49,10 @@ fn generateCompileResultForHTMLChunkImpl(worker: *ThreadPool.Worker, c: *LinkerC
html: ?u32 = 0,
},
added_head_tags: bool,
/// Stores data-* attributes from the first bundled JS element
js_data_attrs: ?[]const u8,
/// Stores data-* attributes from the first bundled CSS element
css_data_attrs: ?[]const u8,
pub fn onWriteHTML(this: *@This(), bytes: []const u8) void {
bun.handleOom(this.output.appendSlice(bytes));
@@ -100,6 +104,12 @@ fn generateCompileResultForHTMLChunkImpl(worker: *ThreadPool.Worker, c: *LinkerC
}
if (loader.isJavaScriptLike() or loader.isCSS()) {
// Capture data-* attributes from the first bundled element of each type
if (loader.isJavaScriptLike() and this.js_data_attrs == null) {
this.js_data_attrs = collectDataAttributes(element, this.allocator);
} else if (loader.isCSS() and this.css_data_attrs == null) {
this.css_data_attrs = collectDataAttributes(element, this.allocator);
}
// Remove the original non-external tags
element.remove();
return;
@@ -146,12 +156,22 @@ fn generateCompileResultForHTMLChunkImpl(worker: *ThreadPool.Worker, c: *LinkerC
var array: bun.BoundedArray([]const u8, 2) = .{};
// Put CSS before JS to reduce changes of flash of unstyled content
if (this.chunk.getCSSChunkForHTML(this.chunks)) |css_chunk| {
const link_tag = bun.handleOom(std.fmt.allocPrintSentinel(allocator, "<link rel=\"stylesheet\" crossorigin href=\"{s}\">", .{css_chunk.unique_key}, 0));
const data_attrs = this.css_data_attrs orelse "";
const link_tag = bun.handleOom(std.fmt.allocPrintSentinel(allocator, "<link rel=\"stylesheet\" crossorigin href=\"{s}\"{s}{s}>", .{
css_chunk.unique_key,
if (data_attrs.len > 0) " " else "",
data_attrs,
}, 0));
array.appendAssumeCapacity(link_tag);
}
if (this.chunk.getJSChunkForHTML(this.chunks)) |js_chunk| {
// type="module" scripts do not block rendering, so it is okay to put them in head
const script = bun.handleOom(std.fmt.allocPrintSentinel(allocator, "<script type=\"module\" crossorigin src=\"{s}\"></script>", .{js_chunk.unique_key}, 0));
const data_attrs = this.js_data_attrs orelse "";
const script = bun.handleOom(std.fmt.allocPrintSentinel(allocator, "<script type=\"module\" crossorigin src=\"{s}\"{s}{s}></script>", .{
js_chunk.unique_key,
if (data_attrs.len > 0) " " else "",
data_attrs,
}, 0));
array.appendAssumeCapacity(script);
}
return array;
@@ -209,6 +229,8 @@ fn generateCompileResultForHTMLChunkImpl(worker: *ThreadPool.Worker, c: *LinkerC
.head = null,
},
.added_head_tags = false,
.js_data_attrs = null,
.css_data_attrs = null,
};
HTMLScanner.HTMLProcessor(HTMLLoader, true).run(
@@ -253,6 +275,57 @@ fn generateCompileResultForHTMLChunkImpl(worker: *ThreadPool.Worker, c: *LinkerC
} };
}
/// Collects all data-* attributes from an element and returns them as a string.
/// Returns null if there are no data-* attributes.
fn collectDataAttributes(element: *const lol.Element, allocator: std.mem.Allocator) ?[]const u8 {
const attr_iter = element.attributes() orelse return null;
defer attr_iter.deinit();
var result = std.array_list.Managed(u8).init(allocator);
var first = true;
while (attr_iter.next()) |attr| {
const attr_name = attr.name();
defer attr_name.deinit();
const name_slice = attr_name.slice();
// Check if attribute name starts with "data-"
if (std.mem.startsWith(u8, name_slice, "data-")) {
const attr_value = attr.value();
defer attr_value.deinit();
const value_slice = attr_value.slice();
if (!first) {
result.append(' ') catch bun.outOfMemory();
}
first = false;
// Append attribute name
result.appendSlice(name_slice) catch bun.outOfMemory();
// Append ="value" (with proper escaping for the value)
result.appendSlice("=\"") catch bun.outOfMemory();
// Escape the value for HTML attribute context
for (value_slice) |c| {
switch (c) {
'"' => result.appendSlice("&quot;") catch bun.outOfMemory(),
'&' => result.appendSlice("&amp;") catch bun.outOfMemory(),
'<' => result.appendSlice("&lt;") catch bun.outOfMemory(),
'>' => result.appendSlice("&gt;") catch bun.outOfMemory(),
else => result.append(c) catch bun.outOfMemory(),
}
}
result.append('"') catch bun.outOfMemory();
}
}
if (result.items.len == 0) {
result.deinit();
return null;
}
return result.toOwnedSlice() catch bun.outOfMemory();
}
pub const DeferredBatchTask = bun.bundle_v2.DeferredBatchTask;
pub const ThreadPool = bun.bundle_v2.ThreadPool;
pub const ParseTask = bun.bundle_v2.ParseTask;

View File

@@ -0,0 +1,141 @@
import { expect, test } from "bun:test";
import { bunEnv, bunExe, tempDir } from "harness";
test("bun build preserves data-* attributes on script tags", async () => {
using dir = tempDir("issue-26216", {
"index.html": `<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<script type="module" src="./app.js" data-inline data-custom="value"></script>
</head>
<body>
<div id="app"></div>
</body>
</html>`,
"app.js": `console.log("hello");`,
});
await using proc = Bun.spawn({
cmd: [bunExe(), "build", "./index.html", "--outdir=out"],
env: bunEnv,
cwd: String(dir),
stderr: "pipe",
stdout: "pipe",
});
const [stdout, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]);
const outputHtml = await Bun.file(`${dir}/out/index.html`).text();
// Check that data-* attributes are preserved on the bundled script tag
expect(outputHtml).toContain('data-inline=""');
expect(outputHtml).toContain('data-custom="value"');
expect(exitCode).toBe(0);
});
test("bun build preserves data-* attributes on link tags", async () => {
using dir = tempDir("issue-26216-link", {
"index.html": `<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<link href="./style.css" rel="stylesheet" data-theme="dark" data-priority="high">
</head>
<body>
<div id="app"></div>
</body>
</html>`,
"style.css": `body { color: red; }`,
});
await using proc = Bun.spawn({
cmd: [bunExe(), "build", "./index.html", "--outdir=out"],
env: bunEnv,
cwd: String(dir),
stderr: "pipe",
stdout: "pipe",
});
const [stdout, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]);
const outputHtml = await Bun.file(`${dir}/out/index.html`).text();
// Check that data-* attributes are preserved on the bundled link tag
expect(outputHtml).toContain('data-theme="dark"');
expect(outputHtml).toContain('data-priority="high"');
expect(exitCode).toBe(0);
});
test("bun build preserves data-* attributes with special characters", async () => {
using dir = tempDir("issue-26216-special", {
"index.html": `<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<script type="module" src="./app.js" data-config='{"key":"value"}'></script>
</head>
<body>
<div id="app"></div>
</body>
</html>`,
"app.js": `console.log("hello");`,
});
await using proc = Bun.spawn({
cmd: [bunExe(), "build", "./index.html", "--outdir=out"],
env: bunEnv,
cwd: String(dir),
stderr: "pipe",
stdout: "pipe",
});
const [stdout, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]);
const outputHtml = await Bun.file(`${dir}/out/index.html`).text();
// Check that data-* attributes with special characters are preserved (quotes get escaped)
// The original single-quoted JSON value gets converted to double quotes with escaped inner quotes
expect(outputHtml).toContain('data-config="{&quot;key&quot;:&quot;value&quot;}"');
expect(exitCode).toBe(0);
});
test("bun build uses data-* attributes from first bundled element when merging multiple scripts", async () => {
using dir = tempDir("issue-26216-merge", {
"index.html": `<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<script type="module" src="./app1.js" data-first="true"></script>
<script type="module" src="./app2.js" data-second="true"></script>
</head>
<body>
<div id="app"></div>
</body>
</html>`,
"app1.js": `console.log("app1");`,
"app2.js": `console.log("app2");`,
});
await using proc = Bun.spawn({
cmd: [bunExe(), "build", "./index.html", "--outdir=out"],
env: bunEnv,
cwd: String(dir),
stderr: "pipe",
stdout: "pipe",
});
const [stdout, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]);
const outputHtml = await Bun.file(`${dir}/out/index.html`).text();
// The first bundled script's data-* attributes should be used
expect(outputHtml).toContain('data-first="true"');
// The second script's attributes should NOT be included
expect(outputHtml).not.toContain('data-second="true"');
expect(exitCode).toBe(0);
});