mirror of
https://github.com/oven-sh/bun
synced 2026-02-09 10:28:47 +00:00
309 lines
10 KiB
Zig
309 lines
10 KiB
Zig
const HTMLScanner = @This();
|
|
|
|
allocator: std.mem.Allocator,
|
|
import_records: ImportRecord.List = .{},
|
|
log: *logger.Log,
|
|
source: *const logger.Source,
|
|
|
|
pub fn init(allocator: std.mem.Allocator, log: *logger.Log, source: *const logger.Source) HTMLScanner {
|
|
return .{
|
|
.allocator = allocator,
|
|
.import_records = .{},
|
|
.log = log,
|
|
.source = source,
|
|
};
|
|
}
|
|
|
|
pub fn deinit(this: *HTMLScanner) void {
|
|
for (this.import_records.slice()) |*record| {
|
|
this.allocator.free(record.path.text);
|
|
}
|
|
this.import_records.deinitWithAllocator(this.allocator);
|
|
}
|
|
|
|
fn createImportRecord(this: *HTMLScanner, input_path: []const u8, kind: ImportKind) !void {
|
|
// In HTML, sometimes people do /src/index.js
|
|
// In that case, we don't want to use the absolute filesystem path, we want to use the path relative to the project root
|
|
const path_to_use = if (input_path.len > 1 and input_path[0] == '/')
|
|
bun.path.joinAbsString(bun.fs.FileSystem.instance.top_level_dir, &[_][]const u8{input_path[1..]}, .auto)
|
|
|
|
// Check if imports to (e.g) "App.tsx" are actually relative imoprts w/o the "./"
|
|
else if (input_path.len > 2 and input_path[0] != '.' and input_path[1] != '/') blk: {
|
|
const index_of_dot = std.mem.lastIndexOfScalar(u8, input_path, '.') orelse break :blk input_path;
|
|
const ext = input_path[index_of_dot..];
|
|
if (ext.len > 4) break :blk input_path;
|
|
// /foo/bar/index.html -> /foo/bar
|
|
const dirname: []const u8 = std.fs.path.dirname(this.source.path.text) orelse break :blk input_path;
|
|
const resolved = bun.path.joinAbsString(dirname, &[_][]const u8{input_path}, .auto);
|
|
break :blk if (bun.sys.exists(resolved)) resolved else input_path;
|
|
} else input_path;
|
|
|
|
const record = ImportRecord{
|
|
.path = fs.Path.init(try this.allocator.dupeZ(u8, path_to_use)),
|
|
.kind = kind,
|
|
.range = logger.Range.None,
|
|
};
|
|
|
|
try this.import_records.push(this.allocator, record);
|
|
}
|
|
|
|
const debug = bun.Output.scoped(.HTMLScanner, true);
|
|
|
|
pub fn onWriteHTML(_: *HTMLScanner, bytes: []const u8) void {
|
|
_ = bytes; // bytes are not written in scan phase
|
|
}
|
|
|
|
pub fn onHTMLParseError(this: *HTMLScanner, message: []const u8) void {
|
|
this.log.addError(
|
|
this.source,
|
|
logger.Loc.Empty,
|
|
message,
|
|
) catch bun.outOfMemory();
|
|
}
|
|
|
|
pub fn onTag(this: *HTMLScanner, _: *lol.Element, path: []const u8, url_attribute: []const u8, kind: ImportKind) void {
|
|
_ = url_attribute;
|
|
this.createImportRecord(path, kind) catch {};
|
|
}
|
|
|
|
const processor = HTMLProcessor(HTMLScanner, false);
|
|
|
|
pub fn scan(this: *HTMLScanner, input: []const u8) !void {
|
|
try processor.run(this, input);
|
|
}
|
|
|
|
pub fn HTMLProcessor(
|
|
comptime T: type,
|
|
/// If the visitor should visit html, head, body
|
|
comptime visit_document_tags: bool,
|
|
) type {
|
|
return struct {
|
|
const TagHandler = struct {
|
|
/// CSS selector to match elements
|
|
selector: []const u8,
|
|
/// Whether this tag can have text content that needs to be processed
|
|
has_content: bool = false,
|
|
/// The attribute to extract the URL from
|
|
url_attribute: []const u8,
|
|
/// The kind of import to create
|
|
kind: ImportKind,
|
|
|
|
is_head_or_html: bool = false,
|
|
};
|
|
|
|
const tag_handlers = [_]TagHandler{
|
|
// Module scripts with src
|
|
.{
|
|
.selector = "script[src]",
|
|
.has_content = false,
|
|
.url_attribute = "src",
|
|
.kind = .stmt,
|
|
},
|
|
// CSS Stylesheets
|
|
.{
|
|
.selector = "link[rel='stylesheet'][href]",
|
|
.url_attribute = "href",
|
|
.kind = .at,
|
|
},
|
|
|
|
// CSS Assets
|
|
.{
|
|
.selector = "link[as='style'][href]",
|
|
.url_attribute = "href",
|
|
.kind = .at,
|
|
},
|
|
// Font files
|
|
.{
|
|
.selector = "link[as='font'][href], link[type^='font/'][href]",
|
|
.url_attribute = "href",
|
|
.kind = .url,
|
|
},
|
|
// Image assets
|
|
.{
|
|
.selector = "link[as='image'][href]",
|
|
.url_attribute = "href",
|
|
.kind = .url,
|
|
},
|
|
// Audio/Video assets
|
|
.{
|
|
.selector = "link[as='video'][href], link[as='audio'][href]",
|
|
.url_attribute = "href",
|
|
.kind = .url,
|
|
},
|
|
// Web Workers
|
|
.{
|
|
.selector = "link[as='worker'][href]",
|
|
.url_attribute = "href",
|
|
.kind = .stmt,
|
|
},
|
|
// Manifest files
|
|
.{
|
|
.selector = "link[rel='manifest'][href]",
|
|
.url_attribute = "href",
|
|
.kind = .url,
|
|
},
|
|
// Icons
|
|
.{
|
|
.selector = "link[rel='icon'][href], link[rel='apple-touch-icon'][href]",
|
|
.url_attribute = "href",
|
|
.kind = .url,
|
|
},
|
|
// Images with src
|
|
.{
|
|
.selector = "img[src]",
|
|
.url_attribute = "src",
|
|
.kind = .url,
|
|
},
|
|
// Images with srcset
|
|
.{
|
|
.selector = "img[srcset]",
|
|
.url_attribute = "srcset",
|
|
.kind = .url,
|
|
},
|
|
// Videos with src
|
|
.{
|
|
.selector = "video[src]",
|
|
.url_attribute = "src",
|
|
.kind = .url,
|
|
},
|
|
// Videos with poster
|
|
.{
|
|
.selector = "video[poster]",
|
|
.url_attribute = "poster",
|
|
.kind = .url,
|
|
},
|
|
// Audio with src
|
|
.{
|
|
.selector = "audio[src]",
|
|
.url_attribute = "src",
|
|
.kind = .url,
|
|
},
|
|
// Source elements with src
|
|
.{
|
|
.selector = "source[src]",
|
|
.url_attribute = "src",
|
|
.kind = .url,
|
|
},
|
|
// Source elements with srcset
|
|
.{
|
|
.selector = "source[srcset]",
|
|
.url_attribute = "srcset",
|
|
.kind = .url,
|
|
},
|
|
// // Iframes
|
|
// .{
|
|
// .selector = "iframe[src]",
|
|
// .url_attribute = "src",
|
|
// .kind = .url,
|
|
// },
|
|
};
|
|
|
|
fn generateHandlerForTag(comptime tag_info: TagHandler) fn (*T, *lol.Element) bool {
|
|
const Handler = struct {
|
|
pub fn handle(this: *T, element: *lol.Element) bool {
|
|
// Handle URL attribute if present
|
|
if (tag_info.url_attribute.len > 0) {
|
|
if (element.hasAttribute(tag_info.url_attribute) catch false) {
|
|
const value = element.getAttribute(tag_info.url_attribute);
|
|
defer value.deinit();
|
|
if (value.len > 0) {
|
|
debug("{s} {s}", .{ tag_info.selector, value.slice() });
|
|
T.onTag(this, element, value.slice(), tag_info.url_attribute, tag_info.kind);
|
|
}
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
};
|
|
return Handler.handle;
|
|
}
|
|
|
|
pub fn run(this: *T, input: []const u8) !void {
|
|
var builder = lol.HTMLRewriter.Builder.init();
|
|
defer builder.deinit();
|
|
|
|
var selectors: std.BoundedArray(*lol.HTMLSelector, tag_handlers.len + if (visit_document_tags) 3 else 0) = .{};
|
|
defer for (selectors.slice()) |selector| {
|
|
selector.deinit();
|
|
};
|
|
|
|
// Add handlers for each tag type
|
|
inline for (tag_handlers) |tag_info| {
|
|
const selector = try lol.HTMLSelector.parse(tag_info.selector);
|
|
selectors.appendAssumeCapacity(selector);
|
|
try builder.addElementContentHandlers(
|
|
selector,
|
|
T,
|
|
comptime generateHandlerForTag(tag_info),
|
|
this,
|
|
void,
|
|
null,
|
|
null,
|
|
void,
|
|
null,
|
|
null,
|
|
);
|
|
}
|
|
|
|
if (visit_document_tags) {
|
|
inline for (.{ "body", "head", "html" }, &.{ T.onBodyTag, T.onHeadTag, T.onHtmlTag }) |tag, cb| {
|
|
const head_selector = try lol.HTMLSelector.parse(tag);
|
|
selectors.appendAssumeCapacity(head_selector);
|
|
try builder.addElementContentHandlers(
|
|
head_selector,
|
|
T,
|
|
cb,
|
|
this,
|
|
void,
|
|
null,
|
|
null,
|
|
void,
|
|
null,
|
|
null,
|
|
);
|
|
}
|
|
}
|
|
|
|
const memory_settings = lol.MemorySettings{
|
|
.preallocated_parsing_buffer_size = @max(input.len / 4, 1024),
|
|
.max_allowed_memory_usage = 1024 * 1024 * 10,
|
|
};
|
|
|
|
errdefer {
|
|
const last_error = lol.HTMLString.lastError();
|
|
defer last_error.deinit();
|
|
|
|
if (last_error.len > 0) {
|
|
this.onHTMLParseError(last_error.slice());
|
|
}
|
|
}
|
|
|
|
var rewriter = try builder.build(
|
|
.UTF8,
|
|
memory_settings,
|
|
false,
|
|
T,
|
|
this,
|
|
T.onWriteHTML,
|
|
struct {
|
|
fn done(_: *T) void {}
|
|
}.done,
|
|
);
|
|
defer rewriter.deinit();
|
|
|
|
try rewriter.write(input);
|
|
try rewriter.end();
|
|
}
|
|
};
|
|
}
|
|
|
|
const lol = @import("./deps/lol-html.zig");
|
|
const std = @import("std");
|
|
|
|
const ImportKind = @import("./import_record.zig").ImportKind;
|
|
const ImportRecord = @import("./import_record.zig").ImportRecord;
|
|
|
|
const bun = @import("bun");
|
|
const fs = bun.fs;
|
|
const logger = bun.logger;
|