diff --git a/src/js_ast.zig b/src/js_ast.zig index 321c69c33c..c362e74864 100644 --- a/src/js_ast.zig +++ b/src/js_ast.zig @@ -1,5 +1,6 @@ const std = @import("std"); const logger = @import("logger.zig"); +const JSXRuntime = @import("options.zig").JSX.Runtime; usingnamespace @import("strings.zig"); usingnamespace @import("ast/base.zig"); @@ -60,6 +61,9 @@ pub const AssignTarget = enum { pub const LocRef = struct { loc: logger.Loc, ref: ?Ref }; pub const Flags = struct { + pub const JSXElement = packed struct { + is_key_before_rest: bool = false, + }; // Instead of 5 bytes for booleans, we can store it in 5 bits // It will still round up to 1 byte. But that's 4 bytes less! @@ -704,6 +708,9 @@ pub const E = struct { // if they have side effects. can_be_unwrapped_if_unused: bool = false, + // Used when printing to generate the source prop on the fly + was_jsx_element: bool = false, + pub fn hasSameFlagsAs(a: *Call, b: *Call) bool { return (a.optional_chain == b.optional_chain and a.is_direct_eval == b.is_direct_eval and @@ -810,10 +817,56 @@ pub const E = struct { ref: Ref, }; + /// In development mode, the new JSX transform has a few special props + /// - `React.jsxDEV(type, arguments, key, isStaticChildren, source, self)` + /// - `arguments`: + /// ```{ ...props, children: children, }``` + /// - `source`: https://github.com/babel/babel/blob/ef87648f3f05ccc393f89dea7d4c7c57abf398ce/packages/babel-plugin-transform-react-jsx-source/src/index.js#L24-L48 + /// ```{ + /// fileName: string | null, + /// columnNumber: number | null, + /// lineNumber: number | null, + /// }``` + /// - `children`: + /// - multiple children? the function is React.jsxsDEV, "jsxs" instead of "jsx" + /// - one child? the function is React.jsxDEV, + /// - no children? the function is React.jsxDEV and children is an empty array. + /// `isStaticChildren`: https://github.com/facebook/react/blob/4ca62cac45c288878d2532e5056981d177f9fdac/packages/react/src/jsx/ReactJSXElementValidator.js#L369-L384 + /// This flag means children is an array of JSX Elements literals. + /// The documentation on this is sparse, but it appears that + /// React just calls Object.freeze on the children array. + /// Object.freeze, historically, is quite a bit slower[0] than just not doing that. + /// Given that...I am choosing to always pass "false" to this. + /// This also skips extra state that we'd need to track. + /// If React Fast Refresh ends up using this later, then we can revisit this decision. + /// [0]: https://github.com/automerge/automerge/issues/177 pub const JSXElement = struct { + /// null represents a fragment tag: ?ExprNodeIndex = null, - properties: []G.Property, - children: ExprNodeList, + + /// props + properties: []G.Property = &([_]G.Property{}), + + /// element children + children: ExprNodeList = &([_]ExprNodeIndex{}), + + /// key is the key prop like + key: ?ExprNodeIndex = null, + + flags: Flags.JSXElement = Flags.JSXElement{}, + + pub const SpecialProp = enum { + __self, // old react transform used this as a prop + __source, + key, + any, + + pub const Map = std.ComptimeStringMap(SpecialProp, .{ + .{ "__self", .__self }, + .{ "__source", .__source }, + .{ "key", .key }, + }); + }; }; pub const Missing = struct { diff --git a/src/js_lexer.zig b/src/js_lexer.zig index 9108285aa2..be3befa61c 100644 --- a/src/js_lexer.zig +++ b/src/js_lexer.zig @@ -15,12 +15,15 @@ pub const T = tables.T; pub const CodePoint = tables.CodePoint; pub const Keywords = tables.Keywords; pub const tokenToString = tables.tokenToString; -pub const jsxEntity = tables.jsxEntity; pub const StrictModeReservedWords = tables.StrictModeReservedWords; pub const PropertyModifierKeyword = tables.PropertyModifierKeyword; pub const TypescriptStmtKeyword = tables.TypescriptStmtKeyword; pub const TypeScriptAccessibilityModifier = tables.TypeScriptAccessibilityModifier; +fn notimpl() noreturn { + std.debug.panic("not implemented yet!", .{}); +} + pub var emptyJavaScriptString = ([_]u16{0}); pub const JSONOptions = struct { @@ -29,13 +32,14 @@ pub const JSONOptions = struct { }; pub const Lexer = struct { + const LexerType = @This(); // pub const Error = error{ // UnexpectedToken, // EndOfFile, // }; - // err: ?@This().Error, + // err: ?LexerType.Error, log: *logger.Log, json_options: ?JSONOptions = null, for_global_name: bool = false, @@ -65,11 +69,11 @@ pub const Lexer = struct { prev_error_loc: logger.Loc = logger.Loc.Empty, allocator: *std.mem.Allocator, - pub fn loc(self: *@This()) logger.Loc { + pub fn loc(self: *LexerType) logger.Loc { return logger.usize2Loc(self.start); } - fn nextCodepointSlice(it: *@This()) callconv(.Inline) ?[]const u8 { + fn nextCodepointSlice(it: *LexerType) callconv(.Inline) ?[]const u8 { if (it.current >= it.source.contents.len) { // without this line, strings cut off one before the last characte it.end = it.current; @@ -83,15 +87,15 @@ pub const Lexer = struct { return it.source.contents[it.current - cp_len .. it.current]; } - pub fn syntaxError(self: *@This()) void { + pub fn syntaxError(self: *LexerType) void { self.addError(self.start, "Syntax Error!!", .{}, true); } - pub fn addDefaultError(self: *@This(), msg: []const u8) void { + pub fn addDefaultError(self: *LexerType, msg: []const u8) void { self.addError(self.start, "{s}", .{msg}, true); } - pub fn addError(self: *@This(), _loc: usize, comptime format: []const u8, args: anytype, panic: bool) void { + pub fn addError(self: *LexerType, _loc: usize, comptime format: []const u8, args: anytype, panic: bool) void { var __loc = logger.usize2Loc(_loc); if (__loc.eql(self.prev_error_loc)) { return; @@ -103,7 +107,7 @@ pub const Lexer = struct { msg.formatNoWriter(std.debug.panic); } - pub fn addRangeError(self: *@This(), r: logger.Range, comptime format: []const u8, args: anytype, panic: bool) void { + pub fn addRangeError(self: *LexerType, r: logger.Range, comptime format: []const u8, args: anytype, panic: bool) void { if (self.prev_error_loc.eql(r.loc)) { return; } @@ -122,7 +126,7 @@ pub const Lexer = struct { } } - fn doPanic(self: *@This(), content: []const u8) void { + fn doPanic(self: *LexerType, content: []const u8) void { if (@import("builtin").is_test) { self.did_panic = true; } else { @@ -130,11 +134,11 @@ pub const Lexer = struct { } } - pub fn codePointEql(self: *@This(), a: u8) bool { + pub fn codePointEql(self: *LexerType, a: u8) bool { return @intCast(CodePoint, a) == self.code_point; } - fn nextCodepoint(it: *@This()) callconv(.Inline) CodePoint { + fn nextCodepoint(it: *LexerType) callconv(.Inline) CodePoint { const slice = it.nextCodepointSlice() orelse return @as(CodePoint, -1); switch (slice.len) { @@ -148,7 +152,7 @@ pub const Lexer = struct { /// Look ahead at the next n codepoints without advancing the iterator. /// If fewer than n codepoints are available, then return the remainder of the string. - fn peek(it: *@This(), n: usize) []const u8 { + fn peek(it: *LexerType, n: usize) []const u8 { const original_i = it.current; defer it.current = original_i; @@ -162,11 +166,11 @@ pub const Lexer = struct { return it.source.contents[original_i..end_ix]; } - pub fn isIdentifierOrKeyword(lexer: @This()) bool { + pub fn isIdentifierOrKeyword(lexer: LexerType) bool { return @enumToInt(lexer.token) >= @enumToInt(T.t_identifier); } - fn parseStringLiteral(lexer: *@This()) void { + fn parseStringLiteral(lexer: *LexerType) void { var quote: CodePoint = lexer.code_point; var needs_slow_path = false; var suffixLen: usize = 1; @@ -275,7 +279,7 @@ pub const Lexer = struct { // // } } - fn step(lexer: *@This()) void { + fn step(lexer: *LexerType) void { lexer.code_point = lexer.nextCodepoint(); // Track the approximate number of newlines in the file so we can preallocate @@ -289,7 +293,7 @@ pub const Lexer = struct { } } - pub fn expect(self: *@This(), comptime token: T) void { + pub fn expect(self: *LexerType, comptime token: T) void { if (self.token != token) { self.expected(token); } @@ -297,7 +301,7 @@ pub const Lexer = struct { self.next(); } - pub fn expectOrInsertSemicolon(lexer: *@This()) void { + pub fn expectOrInsertSemicolon(lexer: *LexerType) void { if (lexer.token == T.t_semicolon or (!lexer.has_newline_before and lexer.token != T.t_close_brace and lexer.token != T.t_end_of_file)) { @@ -305,16 +309,16 @@ pub const Lexer = struct { } } - pub fn addUnsupportedSyntaxError(self: *@This(), msg: []const u8) void { + pub fn addUnsupportedSyntaxError(self: *LexerType, msg: []const u8) void { self.addError(self.end, "Unsupported syntax: {s}", .{msg}, true); } - pub fn scanIdentifierWithEscapes(self: *@This()) void { + pub fn scanIdentifierWithEscapes(self: *LexerType) void { self.addUnsupportedSyntaxError("escape sequence"); return; } - pub fn debugInfo(self: *@This()) void { + pub fn debugInfo(self: *LexerType) void { if (self.log.errors > 0) { const stderr = std.io.getStdErr().writer(); self.log.print(stderr) catch unreachable; @@ -327,14 +331,96 @@ pub const Lexer = struct { } } - pub fn expectContextualKeyword(self: *@This(), comptime keyword: string) void { + pub fn expectContextualKeyword(self: *LexerType, comptime keyword: string) void { if (!self.isContextualKeyword(keyword)) { self.addError(self.start, "\"{s}\"", .{keyword}, true); } self.next(); } - pub fn next(lexer: *@This()) void { + pub fn maybeExpandEquals(lexer: *LexerType) void { + switch (lexer.code_point) { + '>' => { + // "=" + ">" = "=>" + lexer.token = .t_equals_greater_than; + lexer.step(); + }, + '=' => { + // "=" + "=" = "==" + lexer.token = .t_equals_equals; + lexer.step(); + + if (lexer.code_point == '=') { + // "=" + "==" = "===" + lexer.token = .t_equals_equals_equals; + lexer.step(); + } + }, + else => {}, + } + } + + pub fn expectLessThan(lexer: *LexerType, is_inside_jsx_element: bool) void { + switch (lexer.token) { + .t_less_than => { + if (is_inside_jsx_element) { + lexer.nextInsideJSXElement(); + } else { + lexer.next(); + } + }, + .t_less_than_equals => { + lexer.token = .t_equals; + lexer.start += 1; + lexer.maybeExpandEquals(); + }, + .t_less_than_less_than => { + lexer.token = .t_less_than; + lexer.start += 1; + }, + .t_less_than_less_than_equals => { + lexer.token = .t_less_than_equals; + lexer.start += 1; + }, + else => { + lexer.expected(.t_less_than); + }, + } + } + + pub fn expectGreaterThan(lexer: *LexerType, is_inside_jsx_element: bool) !void { + switch (lexer.token) { + .t_greater_than => { + if (is_inside_jsx_element) { + try lexer.nextInsideJSXElement(); + } else { + lexer.next(); + } + }, + .t_greater_than_equals => { + lexer.token = .t_equals; + lexer.start += 1; + lexer.maybeExpandEquals(); + }, + .t_greater_than_greater_than => { + lexer.token = .t_greater_than; + lexer.start += 1; + }, + .t_greater_than_greater_than_equals => { + lexer.token = .t_greater_than_greater_than; + lexer.start += 1; + }, + .t_greater_than_greater_than_greater_than => { + lexer.token = .t_greater_than_greater_than_equals; + lexer.start += 1; + }, + else => { + lexer.expected(.t_greater_than); + }, + } + } + + pub fn next(lexer: *LexerType) void { lexer.has_newline_before = lexer.end == 0; lex: while (true) { @@ -900,7 +986,7 @@ pub const Lexer = struct { } } - pub fn expected(self: *@This(), token: T) void { + pub fn expected(self: *LexerType, token: T) void { if (tokenToString.get(token).len > 0) { self.expectedString(tokenToString.get(token)); } else { @@ -908,7 +994,7 @@ pub const Lexer = struct { } } - pub fn unexpected(lexer: *@This()) void { + pub fn unexpected(lexer: *LexerType) void { const found = finder: { if (lexer.start == lexer.source.contents.len) { break :finder "end of file"; @@ -920,15 +1006,15 @@ pub const Lexer = struct { lexer.addRangeError(lexer.range(), "Unexpected {s}", .{found}, true); } - pub fn raw(self: *@This()) []const u8 { + pub fn raw(self: *LexerType) []const u8 { return self.source.contents[self.start..self.end]; } - pub fn isContextualKeyword(self: *@This(), comptime keyword: string) bool { + pub fn isContextualKeyword(self: *LexerType, comptime keyword: string) bool { return self.token == .t_identifier and strings.eql(self.raw(), keyword); } - pub fn expectedString(self: *@This(), text: string) void { + pub fn expectedString(self: *LexerType, text: string) void { const found = finder: { if (self.source.contents.len != self.start) { break :finder self.raw(); @@ -940,7 +1026,7 @@ pub const Lexer = struct { self.addRangeError(self.range(), "Expected {s} but found {s}", .{ text, found }, true); } - pub fn scanCommentText(lexer: *@This()) void { + pub fn scanCommentText(lexer: *LexerType) void { var text = lexer.source.contents[lexer.start..lexer.end]; const has_preserve_annotation = text.len > 2 and text[2] == '!'; const is_multiline_comment = text[1] == '*'; @@ -965,20 +1051,20 @@ pub const Lexer = struct { // TODO: implement this // it's too complicated to handle all the edgecases right now given the state of Zig's standard library - pub fn removeMultilineCommentIndent(lexer: *@This(), _prefix: string, text: string) string { + pub fn removeMultilineCommentIndent(lexer: *LexerType, _prefix: string, text: string) string { return text; } - pub fn range(self: *@This()) logger.Range { + pub fn range(self: *LexerType) logger.Range { return logger.Range{ .loc = logger.usize2Loc(self.start), .len = std.math.lossyCast(i32, self.end - self.start), }; } - pub fn initGlobalName(log: *logger.Log, source: *logger.Source, allocator: *std.mem.Allocator) !@This() { + pub fn initGlobalName(log: *logger.Log, source: *logger.Source, allocator: *std.mem.Allocator) !LexerType { var empty_string_literal: JavascriptString = emptyJavaScriptString; - var lex = @This(){ + var lex = LexerType{ .log = log, .source = source.*, .string_literal = empty_string_literal, @@ -993,9 +1079,9 @@ pub const Lexer = struct { return lex; } - pub fn initTSConfig(log: *logger.Log, source: *logger.Source, allocator: *std.mem.Allocator) !@This() { + pub fn initTSConfig(log: *logger.Log, source: *logger.Source, allocator: *std.mem.Allocator) !LexerType { var empty_string_literal: JavascriptString = emptyJavaScriptString; - var lex = @This(){ + var lex = LexerType{ .log = log, .source = source.*, .string_literal = empty_string_literal, @@ -1013,9 +1099,9 @@ pub const Lexer = struct { return lex; } - pub fn initJSON(log: *logger.Log, source: *logger.Source, allocator: *std.mem.Allocator) !@This() { + pub fn initJSON(log: *logger.Log, source: *logger.Source, allocator: *std.mem.Allocator) !LexerType { var empty_string_literal: JavascriptString = &emptyJavaScriptString; - var lex = @This(){ + var lex = LexerType{ .log = log, .source = source.*, .string_literal = empty_string_literal, @@ -1033,9 +1119,9 @@ pub const Lexer = struct { return lex; } - pub fn init(log: *logger.Log, source: *logger.Source, allocator: *std.mem.Allocator) !@This() { + pub fn init(log: *logger.Log, source: *logger.Source, allocator: *std.mem.Allocator) !LexerType { var empty_string_literal: JavascriptString = &emptyJavaScriptString; - var lex = @This(){ + var lex = LexerType{ .log = log, .source = source.*, .string_literal = empty_string_literal, @@ -1049,7 +1135,7 @@ pub const Lexer = struct { return lex; } - pub fn scanRegExp(lexer: *@This()) void { + pub fn scanRegExp(lexer: *LexerType) void { while (true) { switch (lexer.code_point) { '/' => { @@ -1080,33 +1166,414 @@ pub const Lexer = struct { } // TODO: use wtf-8 encoding. - pub fn stringToUTF16(lexer: *@This(), str: string) JavascriptString { + pub fn stringToUTF16(lexer: *LexerType, str: string) JavascriptString { var buf: JavascriptString = lexer.allocator.alloc(u16, std.mem.len(str)) catch unreachable; - var i: usize = 0; // theres prob a faster/better way - for (str) |char| { + for (str) |char, i| { buf[i] = char; - i += 1; } return buf; } // TODO: use wtf-8 encoding. - pub fn utf16ToStringWithValidation(lexer: *@This(), js: JavascriptString) !string { + pub fn utf16ToStringWithValidation(lexer: *LexerType, js: JavascriptString) !string { return std.unicode.utf16leToUtf8Alloc(lexer.allocator, js); } // TODO: use wtf-8 encoding. - pub fn utf16ToString(lexer: *@This(), js: JavascriptString) string { + pub fn utf16ToString(lexer: *LexerType, js: JavascriptString) string { return std.unicode.utf16leToUtf8Alloc(lexer.allocator, js) catch unreachable; } - pub fn nextInsideJSXElement() void { - std.debug.panic("JSX not implemented yet.", .{}); + pub fn nextInsideJSXElement(lexer: *LexerType) !void { + lexer.has_newline_before = false; + + while (true) { + lexer.start = lexer.end; + lexer.token = .t_end_of_file; + + switch (lexer.code_point) { + -1 => { + lexer.token = .t_end_of_file; + }, + '\r', '\n', 0x2028, 0x2029 => { + lexer.step(); + lexer.has_newline_before = true; + continue; + }, + '\t', ' ' => { + lexer.step(); + continue; + }, + '.' => { + lexer.step(); + lexer.token = .t_dot; + }, + '=' => { + lexer.step(); + lexer.token = .t_equals; + }, + '{' => { + lexer.step(); + lexer.token = .t_open_brace; + }, + '}' => { + lexer.step(); + lexer.token = .t_close_brace; + }, + '<' => { + lexer.step(); + lexer.token = .t_less_than; + }, + '>' => { + lexer.step(); + lexer.token = .t_greater_than; + }, + '/' => { + // '/' or '//' or '/* ... */' + + lexer.step(); + switch (lexer.code_point) { + '/' => { + single_line_comment: { + while (true) { + lexer.step(); + switch (lexer.code_point) { + '\r', '\n', 0x2028, 0x2029 => { + break :single_line_comment; + }, + -1 => { + break :single_line_comment; + }, + else => {}, + } + } + continue; + } + }, + '*' => { + lexer.step(); + const start_range = lexer.range(); + multi_line_comment: { + while (true) { + switch (lexer.code_point) { + '*' => { + lexer.step(); + if (lexer.code_point == '/') { + lexer.step(); + break :multi_line_comment; + } + }, + '\r', '\n', 0x2028, 0x2029 => { + lexer.step(); + lexer.has_newline_before = true; + }, + -1 => { + lexer.start = lexer.end; + lexer.addError(lexer.start, "Expected \"*/\" to terminate multi-line comment", .{}, true); + }, + else => { + lexer.step(); + }, + } + } + continue; + } + }, + else => { + lexer.token = .t_slash; + }, + } + }, + '\'' => { + lexer.step(); + try lexer.parseJSXStringLiteral('\''); + }, + '"' => { + lexer.step(); + try lexer.parseJSXStringLiteral('"'); + }, + else => { + if (isWhitespace(lexer.code_point)) { + lexer.step(); + continue; + } + + if (isIdentifierStart(lexer.code_point)) { + lexer.step(); + while (isIdentifierContinue(lexer.code_point) or lexer.code_point == '-') { + lexer.step(); + } + + // Parse JSX namespaces. These are not supported by React or TypeScript + // but someone using JSX syntax in more obscure ways may find a use for + // them. A namespaced name is just always turned into a string so you + // can't use this feature to reference JavaScript identifiers. + if (lexer.code_point == ':') { + lexer.step(); + + if (isIdentifierStart(lexer.code_point)) { + while (isIdentifierStart(lexer.code_point) or lexer.code_point == '-') { + lexer.step(); + } + } else { + lexer.addError(lexer.range().endI(), "Expected identifier after \"{s}\" in namespaced JSX name", .{lexer.raw()}, true); + } + } + + lexer.identifier = lexer.raw(); + lexer.token = .t_identifier; + break; + } + + lexer.end = lexer.current; + lexer.token = .t_syntax_error; + }, + } + + return; + } + } + pub fn parseJSXStringLiteral(lexer: *LexerType, comptime quote: u8) !void { + var backslash = logger.Range.None; + var needs_decode = false; + + string_literal: while (true) { + switch (lexer.code_point) { + -1 => { + lexer.syntaxError(); + }, + '&' => { + needs_decode = true; + lexer.step(); + }, + '\\' => { + backslash = logger.Range{ .loc = logger.Loc{ + .start = @intCast(i32, lexer.end), + }, .len = 1 }; + lexer.step(); + continue; + }, + quote => { + if (backslash.len > 0) { + backslash.len += 1; + lexer.previous_backslash_quote_in_jsx = backslash; + } + lexer.step(); + // not sure about this! + break :string_literal; + }, + else => { + // Non-ASCII strings need the slow path + if (lexer.code_point >= 0x80) { + needs_decode = true; + } + lexer.step(); + }, + } + backslash = logger.Range.None; + } + + lexer.token = .t_string_literal; + const text = lexer.source.contents[lexer.start + 1 .. lexer.end - 1]; + + if (needs_decode) { + var out = std.ArrayList(u16).init(lexer.allocator); + // slow path + try lexer.decodeJSXEntities(text, &out); + lexer.string_literal = out.toOwnedSlice(); + } else { + // fast path + lexer.string_literal = lexer.stringToUTF16(text); + } } - fn scanRegExpValidateAndStep(lexer: *@This()) void { + pub fn expectJSXElementChild(lexer: *LexerType, token: T) !void { + if (lexer.token != token) { + lexer.expected(token); + } + + try lexer.nextJSXElementChild(); + } + + pub fn nextJSXElementChild(lexer: *LexerType) !void { + lexer.has_newline_before = false; + const original_start = lexer.end; + + while (true) { + lexer.start = lexer.end; + lexer.token = T.t_end_of_file; + + switch (lexer.code_point) { + -1 => { + lexer.token = .t_end_of_file; + }, + '{' => { + lexer.step(); + lexer.token = .t_open_brace; + }, + '<' => { + lexer.step(); + lexer.token = .t_less_than; + }, + else => { + var needs_fixing = false; + + string_literal: while (true) { + switch (lexer.code_point) { + -1 => { + lexer.syntaxError(); + }, + '&', '\r', '\n', 0x2028, 0x2029 => { + needs_fixing = true; + lexer.step(); + }, + '{', '<' => { + break :string_literal; + }, + else => { + // Non-ASCII strings need the slow path + if (lexer.code_point >= 0x80) { + needs_fixing = true; + } + lexer.step(); + }, + } + } + + lexer.token = .t_string_literal; + const text = lexer.source.contents[original_start..lexer.end]; + + if (needs_fixing) { + // slow path + lexer.string_literal = try fixWhitespaceAndDecodeJSXEntities(lexer, text); + + if (lexer.string_literal.len == 0) { + lexer.has_newline_before = true; + continue; + } + } else { + lexer.string_literal = lexer.stringToUTF16(text); + } + }, + } + + break; + } + } + + pub fn fixWhitespaceAndDecodeJSXEntities(lexer: *LexerType, text: string) !JavascriptString { + var decoded = std.ArrayList(u16).init(lexer.allocator); + var decoded_ptr = &decoded; + var i: usize = 0; + var after_last_non_whitespace: ?usize = null; + + // Trim whitespace off the end of the first line + var first_non_whitespace: ?usize = null; + + while (i < text.len) { + const width = try std.unicode.utf8ByteSequenceLength(text[i]); + const i_0 = i; + i += width; + var buf = [4]u8{ 0, 0, 0, 0 }; + std.mem.copy(u8, &buf, text[i_0..width]); + var c = std.mem.readIntNative(i32, &buf); + + switch (c) { + '\r', '\n', 0x2028, 0x2029 => { + if (first_non_whitespace != null and after_last_non_whitespace != null) { + // Newline + if (decoded.items.len > 0) { + try decoded.append(' '); + } + + // Trim whitespace off the start and end of lines in the middle + try lexer.decodeJSXEntities(text[first_non_whitespace.?..after_last_non_whitespace.?], &decoded); + } + + // Reset for the next line + first_non_whitespace = 0; + }, + '\t', ' ' => {}, + else => { + // Check for unusual whitespace characters + if (!isWhitespace(@intCast(CodePoint, c))) { + after_last_non_whitespace = i + width; + if (first_non_whitespace == null) { + first_non_whitespace = i; + } + } + }, + } + i += width; + } + + if (first_non_whitespace) |start| { + if (decoded.items.len > 0) { + try decoded.append(' '); + } + + try decodeJSXEntities(lexer, text[start..text.len], decoded_ptr); + } + + return decoded.toOwnedSlice(); + } + + pub fn decodeJSXEntities(lexer: *LexerType, text: string, out: *std.ArrayList(u16)) !void { + var i: usize = 0; + var buf = [4]u8{ 0, 0, 0, 0 }; + var c: i32 = 0; + var i_0: usize = 0; + var width: u3 = 0; + var buf_ptr = &buf; + + while (i < text.len) { + // We skip decoding because we've already decoded it here. + width = try std.unicode.utf8ByteSequenceLength(text[i]); + i_0 = i; + i += width; + std.mem.copy(u8, buf_ptr, text[i_0..width]); + c = std.mem.readIntNative(i32, buf_ptr); + + if (c == '&') { + if (strings.indexOfChar(text[i..text.len], ';')) |length| { + const entity = text[i .. i + length]; + if (entity[0] == '#') { + var number = entity[1..entity.len]; + var base: u8 = 10; + if (number.len > 1 and number[0] == 'x') { + number = number[1..number.len]; + base = 16; + } + c = try std.fmt.parseInt(i32, number, base); + i += length + 1; + } else if (tables.jsxEntity.get(entity)) |ent| { + c = ent; + i += length + 1; + } + } + } + + if (c <= 0xFFFF) { + try out.append(@intCast(u16, c)); + } else { + c -= 0x1000; + try out.ensureUnusedCapacity(2); + out.appendAssumeCapacity(@intCast(u16, 0xD800 + ((c >> 10) & 0x3FF))); + out.appendAssumeCapacity(@intCast(u16, 0xDC00 + (c & 0x3FF))); + } + } + } + pub fn expectInsideJSXElement(lexer: *LexerType, token: T) !void { + if (lexer.token != token) { + lexer.expected(token); + } + + try lexer.nextInsideJSXElement(); + } + + fn scanRegExpValidateAndStep(lexer: *LexerType) void { if (lexer.code_point == '\\') { lexer.step(); } @@ -1125,7 +1592,7 @@ pub const Lexer = struct { } } - pub fn rescanCloseBraceAsTemplateToken(lexer: *@This()) void { + pub fn rescanCloseBraceAsTemplateToken(lexer: *LexerType) void { if (lexer.token != .t_close_brace) { lexer.expected(.t_close_brace); } @@ -1138,7 +1605,7 @@ pub const Lexer = struct { lexer.rescan_close_brace_as_template_token = false; } - pub fn rawTemplateContents(lexer: *@This()) string { + pub fn rawTemplateContents(lexer: *LexerType) string { var text: string = undefined; switch (lexer.token) { @@ -1188,7 +1655,7 @@ pub const Lexer = struct { return bytes.toOwnedSliceLength(end + 1); } - fn parseNumericLiteralOrDot(lexer: *@This()) void { + fn parseNumericLiteralOrDot(lexer: *LexerType) void { // Number or dot; var first = lexer.code_point; lexer.step(); @@ -1658,7 +2125,7 @@ fn test_lexer(contents: []const u8) Lexer { return Lexer.init(log, &source, alloc.dynamic) catch unreachable; } -// test "@This().next()" { +// test "LexerType.next()" { // try alloc.setup(std.heap.page_allocator); // const msgs = std.ArrayList(logger.Msg).init(alloc.dynamic); // const log = logger.Log{ @@ -1666,7 +2133,7 @@ fn test_lexer(contents: []const u8) Lexer { // }; // const source = logger.Source.initPathString("index.js", "for (let i = 0; i < 100; i++) { console.log('hi'); }", std.heap.page_allocator); -// var lex = try @This().init(log, source, alloc.dynamic); +// var lex = try LexerType.init(log, source, alloc.dynamic); // lex.next(); // } @@ -1732,7 +2199,7 @@ pub fn test_stringLiteralEquals(expected: string, source_text: string) void { std.testing.expectEqualStrings(expected, lit); } -pub fn test_skipTo(lexer: *@This(), n: string) void { +pub fn test_skipTo(lexer: *LexerType, n: string) void { var i: usize = 0; while (i < n.len) { lexer.next(); @@ -1740,7 +2207,7 @@ pub fn test_skipTo(lexer: *@This(), n: string) void { } } -test "@This().rawTemplateContents" { +test "LexerType.rawTemplateContents" { test_stringLiteralEquals("hello!", "const a = 'hello!';"); test_stringLiteralEquals("hello!hi", "const b = 'hello!hi';"); test_stringLiteralEquals("hello!\n\nhi", "const b = `hello!\n\nhi`;"); diff --git a/src/js_parser/js_parser.zig b/src/js_parser/js_parser.zig index facfd1be09..7c8b25019a 100644 --- a/src/js_parser/js_parser.zig +++ b/src/js_parser/js_parser.zig @@ -1252,6 +1252,7 @@ pub const Parser = struct { preserve_unused_imports_ts: bool = false, use_define_for_class_fields: bool = false, suppress_warnings_about_weird_code: bool = true, + moduleType: ModuleType = ModuleType.esm, trim_unused_imports: bool = true, }; @@ -1296,6 +1297,24 @@ pub const Parser = struct { var after = List(js_ast.Part).init(p.allocator); var parts = List(js_ast.Part).init(p.allocator); try p.appendPart(&parts, stmts); + + // Auto-import JSX + if (p.options.jsx.parse) { + const jsx_symbol: Symbol = p.symbols.items[p.jsx_runtime_ref.inner_index]; + const jsx_fragment_symbol: Symbol = p.symbols.items[p.jsx_fragment_ref.inner_index]; + const jsx_factory_symbol: Symbol = p.symbols.items[p.jsx_factory_ref.inner_index]; + + if (jsx_symbol.use_count_estimate > 0 or jsx_fragment_symbol.use_count_estimate > 0 or jsx_factory_symbol.use_count_estimate > 0) { + var jsx_imports = [_]string{ p.options.jsx.jsx, p.options.jsx.fragment, p.options.jsx.factory }; + var symbols = StringRefMap.init(p.allocator); + defer symbols.deinit(); + try symbols.put(p.options.jsx.jsx, p.jsx_runtime_ref); + try symbols.put(p.options.jsx.fragment, p.jsx_fragment_ref); + try symbols.put(p.options.jsx.factory, p.jsx_factory_ref); + try p.generateImportStmt(p.options.jsx.import_source, &jsx_imports, &parts, symbols); + } + } + // for (stmts) |stmt| { // var _stmts = ([_]Stmt{stmt}); @@ -1385,6 +1404,7 @@ pub const Parser = struct { .parse = transform.loader == .tsx or transform.loader == .jsx, .factory = transform.jsx_factory, .fragment = transform.jsx_fragment, + .import_source = transform.jsx_import_source, }, }, .allocator = allocator, @@ -1441,6 +1461,39 @@ var e_missing_data = E.Missing{}; var s_missing = S.Empty{}; var nullExprData = Expr.Data{ .e_missing = &e_missing_data }; var nullStmtData = Stmt.Data{ .s_empty = &s_missing }; +pub const Prefill = struct { + pub const StringLiteral = struct { + pub var Key = [3]u16{ 'k', 'e', 'y' }; + pub var Children = [_]u16{ 'c', 'h', 'i', 'l', 'd', 'r', 'e', 'n' }; + pub var Filename = [_]u16{ 'f', 'i', 'l', 'e', 'n', 'a', 'm', 'e' }; + pub var LineNumber = [_]u16{ 'l', 'i', 'n', 'e', 'N', 'u', 'm', 'b', 'e', 'r' }; + pub var ColumnNumber = [_]u16{ 'c', 'o', 'l', 'u', 'm', 'n', 'N', 'u', 'm', 'b', 'e', 'r' }; + }; + pub const String = struct { + pub var Filename = E.String{ .value = &Prefill.StringLiteral.Filename }; + pub var LineNumber = E.String{ .value = &Prefill.StringLiteral.LineNumber }; + pub var ColumnNumber = E.String{ .value = &Prefill.StringLiteral.ColumnNumber }; + }; + pub const Data = struct { + pub var Filename = Expr.Data{ .e_string = &Prefill.String.Filename }; + pub var LineNumber = Expr.Data{ .e_string = &Prefill.String.LineNumber }; + pub var ColumnNumber = Expr.Data{ .e_string = &Prefill.String.ColumnNumber }; + }; + pub const Runtime = struct { + pub var JSXFilename = "JSX_fIlEnAmE"; + pub var JSXDevelopmentImportName = "jsxDEV"; + pub var JSXImportName = "jsx"; + }; +}; + +var keyString = E.String{ .value = &Prefill.StringLiteral.Key }; +var keyExprData = Expr.Data{ .e_string = &keyString }; +var jsxChildrenKeyString = E.String{ .value = &Prefill.StringLiteral.Children }; +var jsxChildrenKeyData = Expr.Data{ .e_string = &jsxChildrenKeyString }; +var nullExprValueData = E.Null{}; +var falseExprValueData = E.Boolean{ .value = false }; +var nullValueExpr = Expr.Data{ .e_null = &nullExprValueData }; +var falseValueExpr = Expr.Data{ .e_boolean = &falseExprValueData }; // P is for Parser! // public only because of Binding.ToExpr @@ -1474,6 +1527,7 @@ pub const P = struct { import_meta_ref: js_ast.Ref = js_ast.Ref.None, promise_ref: ?js_ast.Ref = null, + has_classic_runtime_warned: bool = false, data: js_ast.AstData, injected_define_symbols: List(Ref), @@ -1514,6 +1568,11 @@ pub const P = struct { // "visit" pass. enclosing_namespace_arg_ref: ?js_ast.Ref = null, + jsx_filename_ref: js_ast.Ref = Ref.None, + jsx_runtime_ref: js_ast.Ref = Ref.None, + jsx_factory_ref: js_ast.Ref = Ref.None, + jsx_fragment_ref: js_ast.Ref = Ref.None, + // Imports (both ES6 and CommonJS) are tracked at the top level import_records: List(ImportRecord), import_records_for_current_part: List(u32), @@ -2034,11 +2093,65 @@ pub const P = struct { return p.e(ident, loc); } + pub fn generateImportStmt(p: *P, import_path: string, imports: []string, parts: *List(js_ast.Part), symbols: StringRefMap) !void { + const import_record_i = p.addImportRecord(.stmt, logger.Loc.Empty, import_path); + var import_record = p.import_records.items[import_record_i]; + var import_path_identifier = try import_record.path.name.nonUniqueNameString(p.allocator); + var namespace_identifier = try p.allocator.alloc(u8, import_path_identifier.len + "import_".len); + var clause_items = try p.allocator.alloc(js_ast.ClauseItem, imports.len); + var stmts = try p.allocator.alloc(Stmt, 1); + var declared_symbols = try p.allocator.alloc(js_ast.DeclaredSymbol, imports.len); + std.mem.copy(u8, namespace_identifier[0.."import_".len], "import_"); + std.mem.copy( + u8, + namespace_identifier["import_".len..import_path_identifier.len], + import_path_identifier, + ); + + const namespace_ref = try p.newSymbol(.other, namespace_identifier); + try p.module_scope.generated.append(namespace_ref); + + for (imports) |alias, i| { + const ref = symbols.get(alias) orelse unreachable; + clause_items[i] = js_ast.ClauseItem{ .alias = imports[i], .original_name = imports[i], .alias_loc = logger.Loc{}, .name = LocRef{ .ref = ref, .loc = logger.Loc{} } }; + declared_symbols[i] = js_ast.DeclaredSymbol{ .ref = ref, .is_top_level = true }; + try p.is_import_item.put(ref, true); + try p.named_imports.put(ref, js_ast.NamedImport{ + .alias = alias, + .alias_loc = logger.Loc{}, + .namespace_ref = namespace_ref, + .import_record_index = import_record_i, + }); + } + + stmts[0] = p.s(S.Import{ + .namespace_ref = namespace_ref, + .items = clause_items, + .import_record_index = import_record_i, + }, logger.Loc{}); + + var import_records = try p.allocator.alloc(@TypeOf(import_record_i), 1); + import_records[0] = import_record_i; + + // Append a single import to the end of the file (ES6 imports are hoisted + // so we don't need to worry about where the import statement goes) + parts.append(js_ast.Part{ .stmts = stmts, .declared_symbols = declared_symbols, .import_record_indices = import_records }) catch unreachable; + } + pub fn prepareForVisitPass(p: *P) !void { try p.pushScopeForVisitPass(js_ast.Scope.Kind.entry, locModuleScope); p.fn_or_arrow_data_visit.is_outside_fn_or_arrow = true; p.module_scope = p.current_scope; p.has_es_module_syntax = p.es6_import_keyword.len > 0 or p.es6_export_keyword.len > 0 or p.top_level_await_keyword.len > 0; + if (p.options.jsx.parse) { + if (p.options.jsx.development) { + p.jsx_filename_ref = p.newSymbol(.other, Prefill.Runtime.JSXFilename) catch unreachable; + } + const jsx_importname = p.options.jsx.jsx; + p.jsx_fragment_ref = p.newSymbol(.other, p.options.jsx.fragment) catch unreachable; + p.jsx_runtime_ref = p.newSymbol(.other, jsx_importname) catch unreachable; + p.jsx_factory_ref = p.newSymbol(.other, p.options.jsx.factory) catch unreachable; + } // ECMAScript modules are always interpreted as strict mode. This has to be // done before "hoistSymbols" because strict mode can alter hoisting (!). @@ -2879,28 +2992,28 @@ pub const P = struct { }; var stmt = p.parseStmt(&_opts) catch unreachable; - var default_name: js_ast.LocRef = undefined; + const default_name: js_ast.LocRef = default_name_getter: { + switch (stmt.data) { + // This was just a type annotation + .s_type_script => { + return stmt; + }, - switch (stmt.data) { - // This was just a type annotation - .s_type_script => { - return stmt; - }, + .s_function => |func_container| { + if (func_container.func.name) |name| { + break :default_name_getter LocRef{ .loc = defaultLoc, .ref = name.ref }; + } else {} + }, + .s_class => |class| { + if (class.class.class_name) |name| { + break :default_name_getter LocRef{ .loc = defaultLoc, .ref = name.ref }; + } else {} + }, + else => {}, + } - .s_function => |func_container| { - if (func_container.func.name) |name| { - default_name = LocRef{ .loc = defaultLoc, .ref = name.ref }; - } else {} - }, - .s_class => |class| { - if (class.class.class_name) |name| { - default_name = LocRef{ .loc = defaultLoc, .ref = name.ref }; - } else {} - }, - else => { - p.panic("Internal error: unexpected stmt {s}", .{stmt}); - }, - } + break :default_name_getter createDefaultName(p, defaultLoc) catch unreachable; + }; return p.s( S.ExportDefault{ .default_name = default_name, .value = js_ast.StmtOrExpr{ .stmt = stmt } }, @@ -2923,27 +3036,28 @@ pub const P = struct { const stmt: Stmt = p.parseClassStmt(loc, &stmtOpts); // Use the statement name if present, since it's a better name - var default_name: LocRef = undefined; - switch (stmt.data) { - .s_class => |class| { - var ref: Ref = undefined; - var picked = false; - if (class.class.class_name) |loc_ref| { - if (loc_ref.ref) |_ref| { - ref = _ref; - picked = true; - } - } + const default_name: js_ast.LocRef = default_name_getter: { + switch (stmt.data) { + // This was just a type annotation + .s_type_script => { + return stmt; + }, - if (!picked) { - ref = (createDefaultName(p, defaultLoc) catch unreachable).ref orelse unreachable; - } - default_name = LocRef{ .loc = defaultLoc, .ref = ref }; - }, - else => { - default_name = createDefaultName(p, defaultLoc) catch unreachable; - }, - } + .s_function => |func_container| { + if (func_container.func.name) |_name| { + break :default_name_getter LocRef{ .loc = defaultLoc, .ref = _name.ref }; + } else {} + }, + .s_class => |class| { + if (class.class.class_name) |_name| { + break :default_name_getter LocRef{ .loc = defaultLoc, .ref = _name.ref }; + } else {} + }, + else => {}, + } + + break :default_name_getter createDefaultName(p, defaultLoc) catch unreachable; + }; return p.s(S.ExportDefault{ .default_name = default_name, .value = js_ast.StmtOrExpr{ .stmt = stmt } }, loc); }, @@ -3876,7 +3990,7 @@ pub const P = struct { } } } - std.debug.print("\n\nmVALUE {s}:{s}\n", .{ expr, name }); + // std.debug.print("\n\nmVALUE {s}:{s}\n", .{ expr, name }); p.lexer.expectOrInsertSemicolon(); return p.s(S.SExpr{ .value = expr }, loc); }, @@ -4030,7 +4144,7 @@ pub const P = struct { var let_range = p.lexer.range(); var raw = p.lexer.raw(); if (p.lexer.token != .t_identifier or !strings.eql(raw, "let")) { - std.debug.print("HI", .{}); + // std.debug.print("HI", .{}); return ExprOrLetStmt{ .stmt_or_expr = js_ast.StmtOrExpr{ .expr = p.parseExpr(.lowest) } }; } @@ -4888,8 +5002,12 @@ pub const P = struct { } pub fn loadNameFromRef(p: *P, ref: js_ast.Ref) string { - assert(ref.inner_index < p.allocated_names.items.len); - return p.allocated_names.items[ref.inner_index]; + if (ref.source_index == std.math.maxInt(Ref.Int)) { + assert(ref.inner_index < p.allocated_names.items.len); + return p.allocated_names.items[ref.inner_index]; + } else { + return p.symbols.items[ref.inner_index].original_name; + } } // This parses an expression. This assumes we've already parsed the "async" @@ -5682,6 +5800,7 @@ pub const P = struct { // Allow "in" inside call arguments const old_allow_in = p.allow_in; p.allow_in = true; + defer p.allow_in = old_allow_in; var args = List(Expr).init(p.allocator); p.lexer.expect(.t_open_paren); @@ -5705,7 +5824,6 @@ pub const P = struct { } p.lexer.expect(.t_close_paren); - p.allow_in = old_allow_in; return args.toOwnedSlice(); } @@ -5750,7 +5868,6 @@ pub const P = struct { // treat "c.d" as OptionalChainContinue in "a?.b + c.d". var old_optional_chain = optional_chain; optional_chain = null; - std.debug.print("\nTOKEN {s}", .{p.lexer.token}); switch (p.lexer.token) { .t_dot => { p.lexer.next(); @@ -6484,7 +6601,7 @@ pub const P = struct { pub fn _parsePrefix(p: *P, level: Level, errors: *DeferredErrors, flags: Expr.EFlags) Expr { const loc = p.lexer.loc(); const l = @enumToInt(level); - std.debug.print("Parse Prefix {s}:{s} @{s} ", .{ p.lexer.token, p.lexer.raw(), @tagName(level) }); + // std.debug.print("Parse Prefix {s}:{s} @{s} ", .{ p.lexer.token, p.lexer.raw(), @tagName(level) }); switch (p.lexer.token) { .t_super => { @@ -6637,7 +6754,7 @@ pub const P = struct { _ = p.pushScopeForParsePass(.function_args, loc) catch unreachable; defer p.popScope(); - std.debug.print("HANDLE START ", .{}); + // std.debug.print("HANDLE START ", .{}); return p.e(p.parseArrowBody(args, p.m(FnOrArrowDataParse{})) catch unreachable, loc); } @@ -7014,11 +7131,33 @@ pub const P = struct { } if (p.options.jsx.parse) { - notimpl(); + // Use NextInsideJSXElement() instead of Next() so we parse "<<" as "<" + p.lexer.nextInsideJSXElement() catch unreachable; + const element = p.parseJSXElement(loc) catch unreachable; + + // The call to parseJSXElement() above doesn't consume the last + // TGreaterThan because the caller knows what Next() function to call. + // Use Next() instead of NextInsideJSXElement() here since the next + // token is an expression. + p.lexer.next(); + return element; } if (p.options.ts) { - notimpl(); + // This is either an old-style type cast or a generic lambda function + + // "(x)" + // "(x) => {}" + if (p.trySkipTypeScriptTypeParametersThenOpenParenWithBacktracking()) { + p.lexer.expect(.t_open_paren); + return p.parseParenExpr(loc, level, ParenExprOpts{}) catch unreachable; + } + + // "x" + p.lexer.next(); + p.skipTypescriptType(.lowest); + p.lexer.expectGreaterThan(false) catch unreachable; + return p.parsePrefix(level, errors, flags); } p.lexer.unexpected(); @@ -7037,8 +7176,13 @@ pub const P = struct { return p.e(E.Missing{}, logger.Loc.Empty); } - pub fn jsxStringsToMemberExpression(p: *P, loc: logger.Loc, fragment: string) Expr { - notimpl(); + // esbuild's version of this function is much more complicated. + // I'm not sure why defines is strictly relevant for this case + // and I imagine all the allocations cause some performance + // guessing it's concurrency-related + pub fn jsxStringsToMemberExpression(p: *P, loc: logger.Loc, ref: Ref) Expr { + p.recordUsage(&ref); + return p.e(E.Identifier{ .ref = ref }, loc); } // Note: The caller has already parsed the "import" keyword @@ -7077,11 +7221,290 @@ pub const P = struct { return p.e(E.Import{ .expr = value, .leading_interior_comments = comments, .import_record_index = 0 }, loc); } - pub fn parseJSXElement(loc: logger.Loc) Expr { - // Parse the tag - //var startRange, startText, startTag := p.parseJSXTag();รท - notimpl(); - return p.e(E.Missing{}, logger.Loc.Empty); + const JSXTag = struct { + pub const TagType = enum { fragment, tag }; + pub const Data = union(TagType) { + fragment: u1, + tag: Expr, + + pub fn asExpr(d: *const Data) ?ExprNodeIndex { + switch (d.*) { + .tag => |tag| { + return tag; + }, + else => { + return null; + }, + } + } + }; + data: Data, + range: logger.Range, + name: string = "", + + pub fn parse(p: *P) !JSXTag { + const loc = p.lexer.loc(); + + // A missing tag is a fragment + if (p.lexer.token == .t_greater_than) { + return JSXTag{ + .range = logger.Range{ .loc = loc, .len = 0 }, + .data = Data{ .fragment = 1 }, + }; + } + + // The tag is an identifier + var name = p.lexer.identifier; + var tag_range = p.lexer.range(); + try p.lexer.expectInsideJSXElement(.t_identifier); + + // Certain identifiers are strings + //
= 'a' and name[0] <= 'z')) { + return JSXTag{ + .data = Data{ .tag = p.e(E.String{ .value = try strings.toUTF16Alloc(name, p.allocator) }, loc) }, + .range = tag_range, + }; + } + + // Otherwise, this is an identifier + //