Files
bun.sh/src/js_lexer.zig
2021-10-23 22:56:35 -07:00

2885 lines
118 KiB
Zig

const std = @import("std");
const logger = @import("logger.zig");
const tables = @import("js_lexer_tables.zig");
const alloc = @import("alloc.zig");
const build_options = @import("build_options");
const js_ast = @import("js_ast.zig");
usingnamespace @import("ast/base.zig");
usingnamespace @import("global.zig");
const unicode = std.unicode;
const Source = logger.Source;
pub const T = tables.T;
pub const Keywords = tables.Keywords;
pub const tokenToString = tables.tokenToString;
pub const StrictModeReservedWords = tables.StrictModeReservedWords;
pub const PropertyModifierKeyword = tables.PropertyModifierKeyword;
pub const TypescriptStmtKeyword = tables.TypescriptStmtKeyword;
pub const TypeScriptAccessibilityModifier = tables.TypeScriptAccessibilityModifier;
pub const ChildlessJSXTags = tables.ChildlessJSXTags;
fn notimpl() noreturn {
Global.panic("not implemented yet!", .{});
}
pub var emptyJavaScriptString = ([_]u16{0});
pub const JSONOptions = struct {
/// Enable JSON-specific warnings/errors
is_json: bool = false,
/// tsconfig.json supports comments & trailing comments
allow_comments: bool = false,
allow_trailing_commas: bool = false,
/// Loading JSON-in-JSON may start like \\""\\"
/// This is technically invalid, since we parse from the first value of the string
ignore_leading_escape_sequences: bool = false,
ignore_trailing_escape_sequences: bool = false,
};
pub fn NewLexer(comptime json_options: JSONOptions) type {
return struct {
const LexerType = @This();
const is_json = json_options.is_json;
const json = json_options;
pub const Error = error{
UTF8Fail,
OutOfMemory,
SyntaxError,
UnexpectedSyntax,
JSONStringsMustUseDoubleQuotes,
ParserError,
};
// pub const Error = error{
// UnexpectedToken,
// EndOfFile,
// };
// err: ?LexerType.Error,
log: *logger.Log,
for_global_name: bool = false,
source: *const logger.Source,
current: usize = 0,
start: usize = 0,
end: usize = 0,
did_panic: bool = false,
approximate_newline_count: usize = 0,
previous_backslash_quote_in_jsx: logger.Range = logger.Range.None,
token: T = T.t_end_of_file,
has_newline_before: bool = false,
has_pure_comment_before: bool = false,
preserve_all_comments_before: bool = false,
is_legacy_octal_literal: bool = false,
is_log_disabled: bool = false,
comments_to_preserve_before: std.ArrayList(js_ast.G.Comment),
all_original_comments: ?[]js_ast.G.Comment = null,
code_point: CodePoint = -1,
identifier: []const u8 = "",
jsx_factory_pragma_comment: ?js_ast.Span = null,
jsx_fragment_pragma_comment: ?js_ast.Span = null,
source_mapping_url: ?js_ast.Span = null,
number: f64 = 0.0,
rescan_close_brace_as_template_token: bool = false,
prev_error_loc: logger.Loc = logger.Loc.Empty,
regex_flags_start: ?u16 = null,
allocator: *std.mem.Allocator,
/// In JavaScript, strings are stored as UTF-16, but nearly every string is ascii.
/// This means, usually, we can skip UTF8 -> UTF16 conversions.
string_literal_buffer: std.ArrayList(u16),
string_literal_slice: string = "",
string_literal: JavascriptString,
string_literal_is_ascii: bool = false,
is_ascii_only: bool = true,
pub fn clone(self: *const LexerType) LexerType {
return LexerType{
.log = self.log,
.for_global_name = self.for_global_name,
.source = self.source,
.current = self.current,
.start = self.start,
.end = self.end,
.did_panic = self.did_panic,
.approximate_newline_count = self.approximate_newline_count,
.previous_backslash_quote_in_jsx = self.previous_backslash_quote_in_jsx,
.token = self.token,
.has_newline_before = self.has_newline_before,
.has_pure_comment_before = self.has_pure_comment_before,
.preserve_all_comments_before = self.preserve_all_comments_before,
.is_legacy_octal_literal = self.is_legacy_octal_literal,
.is_log_disabled = self.is_log_disabled,
.comments_to_preserve_before = self.comments_to_preserve_before,
.all_original_comments = self.all_original_comments,
.code_point = self.code_point,
.identifier = self.identifier,
.regex_flags_start = self.regex_flags_start,
.jsx_factory_pragma_comment = self.jsx_factory_pragma_comment,
.jsx_fragment_pragma_comment = self.jsx_fragment_pragma_comment,
.source_mapping_url = self.source_mapping_url,
.number = self.number,
.rescan_close_brace_as_template_token = self.rescan_close_brace_as_template_token,
.prev_error_loc = self.prev_error_loc,
.allocator = self.allocator,
.string_literal_buffer = self.string_literal_buffer,
.string_literal_slice = self.string_literal_slice,
.string_literal = self.string_literal,
.string_literal_is_ascii = self.string_literal_is_ascii,
.is_ascii_only = self.is_ascii_only,
};
}
pub fn loc(self: *LexerType) logger.Loc {
return logger.usize2Loc(self.start);
}
pub fn syntaxError(self: *LexerType) !void {
@setCold(true);
self.addError(self.start, "Syntax Error!!", .{}, true);
return Error.SyntaxError;
}
pub fn addDefaultError(self: *LexerType, msg: []const u8) !void {
@setCold(true);
self.addError(self.start, "{s}", .{msg}, true);
return Error.SyntaxError;
}
pub fn addSyntaxError(self: *LexerType, _loc: usize, comptime fmt: []const u8, args: anytype) !void {
@setCold(true);
self.addError(_loc, fmt, args, false);
return Error.SyntaxError;
}
pub fn addError(self: *LexerType, _loc: usize, comptime format: []const u8, args: anytype, panic: bool) void {
@setCold(true);
if (self.is_log_disabled) return;
var __loc = logger.usize2Loc(_loc);
if (__loc.eql(self.prev_error_loc)) {
return;
}
self.log.addErrorFmt(self.source, __loc, self.allocator, format, args) catch unreachable;
self.prev_error_loc = __loc;
}
pub fn addRangeError(self: *LexerType, r: logger.Range, comptime format: []const u8, args: anytype, panic: bool) !void {
@setCold(true);
if (self.is_log_disabled) return;
if (self.prev_error_loc.eql(r.loc)) {
return;
}
const errorMessage = std.fmt.allocPrint(self.allocator, format, args) catch unreachable;
var msg = self.log.addRangeError(self.source, r, errorMessage);
self.prev_error_loc = r.loc;
// if (panic) {
// return Error.ParserError;
// }
}
pub fn codePointEql(self: *LexerType, a: u8) bool {
return @intCast(CodePoint, a) == self.code_point;
}
/// Look ahead at the next n codepoints without advancing the iterator.
/// If fewer than n codepoints are available, then return the remainder of the string.
fn peek(it: *LexerType, n: usize) string {
const original_i = it.current;
defer it.current = original_i;
var end_ix = original_i;
var found: usize = 0;
while (found < n) : (found += 1) {
const next_codepoint = it.nextCodepointSlice();
if (next_codepoint.len == 0) break;
end_ix += next_codepoint.len;
}
return it.source.contents[original_i..end_ix];
}
pub fn isIdentifierOrKeyword(lexer: LexerType) bool {
return @enumToInt(lexer.token) >= @enumToInt(T.t_identifier);
}
pub fn stringLiteralUTF16(lexer: *LexerType) JavascriptString {
if (lexer.string_literal_is_ascii) {
return lexer.stringToUTF16(lexer.string_literal_slice);
} else {
return lexer.allocator.dupe(u16, lexer.string_literal) catch unreachable;
}
}
pub fn deinit(this: *LexerType) void {}
pub fn decodeEscapeSequences(lexer: *LexerType, start: usize, text: string, comptime BufType: type, buf_: *BufType) !void {
var buf = buf_.*;
defer buf_.* = buf;
lexer.is_ascii_only = false;
var iterator = strings.CodepointIterator{ .bytes = text[start..], .i = 0 };
var iter = strings.CodepointIterator.Cursor{};
const start_length = buf.items.len;
while (iterator.next(&iter)) {
const width = iter.width;
switch (iter.c) {
'\r' => {
// From the specification:
//
// 11.8.6.1 Static Semantics: TV and TRV
//
// TV excludes the code units of LineContinuation while TRV includes
// them. <CR><LF> and <CR> LineTerminatorSequences are normalized to
// <LF> for both TV and TRV. An explicit EscapeSequence is needed to
// include a <CR> or <CR><LF> sequence.
// Convert '\r\n' into '\n'
if (iter.i < text.len and text[iter.i] == '\n') {
iter.i += 1;
}
// Convert '\r' into '\n'
buf.append('\n') catch unreachable;
continue;
},
'\\' => {
_ = iterator.next(&iter) or return;
const c2 = iter.c;
const width2 = iter.width;
switch (iter.c) {
'b' => {
buf.append(std.mem.readIntNative(u16, "\\b")) catch unreachable;
continue;
},
'f' => {
buf.append(std.mem.readIntNative(u16, "\\f")) catch unreachable;
continue;
},
'n' => {
buf.append(std.mem.readIntNative(u16, "\\n")) catch unreachable;
continue;
},
'r' => {
buf.append(std.mem.readIntNative(u16, "\\r")) catch unreachable;
continue;
},
't' => {
buf.append(std.mem.readIntNative(u16, "\\t")) catch unreachable;
continue;
},
'v' => {
if (comptime is_json) {
lexer.end = start + iter.i - width2;
try lexer.syntaxError();
}
buf.append(std.mem.readIntNative(u16, "\\v")) catch unreachable;
continue;
},
// legacy octal literals
'0'...'7' => {
const octal_start = iter.i - 2;
if (comptime is_json) {
lexer.end = start + iter.i - width2;
try lexer.syntaxError();
}
// 1-3 digit octal
var is_bad = false;
var value: i64 = c2 - '0';
_ = iterator.next(&iter) or return lexer.syntaxError();
const c3: CodePoint = iter.c;
const width3 = iter.width;
switch (c3) {
'0'...'7' => {
value = value * 8 + c3 - '0';
iter.i += width3;
_ = iterator.next(&iter) or return lexer.syntaxError();
const c4 = iter.c;
const width4 = iter.width;
switch (c4) {
'0'...'7' => {
const temp = value * 8 + c4 - '0';
if (temp < 256) {
value = temp;
iter.i += width4;
}
},
'8', '9' => {
is_bad = true;
},
else => {},
}
},
'8', '9' => {
is_bad = true;
},
else => {},
}
iter.c = @intCast(i32, value);
if (is_bad) {
lexer.addRangeError(
logger.Range{ .loc = .{ .start = @intCast(i32, octal_start) }, .len = @intCast(i32, iter.i - octal_start) },
"Invalid legacy octal literal",
.{},
false,
) catch unreachable;
}
},
'8', '9' => {
iter.c = c2;
},
// 2-digit hexadecimal
'x' => {
if (comptime is_json) {
lexer.end = start + iter.i - width2;
try lexer.syntaxError();
}
var value: CodePoint = 0;
var c3: CodePoint = 0;
var width3: u3 = 0;
_ = iterator.next(&iter) or return lexer.syntaxError();
c3 = iter.c;
width3 = iter.width;
switch (c3) {
'0'...'9' => {
value = value * 16 | (c3 - '0');
},
'a'...'f' => {
value = value * 16 | (c3 + 10 - 'a');
},
'A'...'F' => {
value = value * 16 | (c3 + 10 - 'A');
},
else => {
lexer.end = start + iter.i - width3;
return lexer.syntaxError();
},
}
_ = iterator.next(&iter) or return lexer.syntaxError();
c3 = iter.c;
width3 = iter.width;
switch (c3) {
'0'...'9' => {
value = value * 16 | (c3 - '0');
},
'a'...'f' => {
value = value * 16 | (c3 + 10 - 'a');
},
'A'...'F' => {
value = value * 16 | (c3 + 10 - 'A');
},
else => {
lexer.end = start + iter.i - width3;
return lexer.syntaxError();
},
}
iter.c = value;
},
'u' => {
// We're going to make this an i64 so we don't risk integer overflows
// when people do weird things
var value: i64 = 0;
_ = iterator.next(&iter) or return lexer.syntaxError();
var c3 = iter.c;
var width3 = iter.width;
// variable-length
if (c3 == '{') {
if (comptime is_json) {
lexer.end = start + iter.i - width2;
try lexer.syntaxError();
}
const hex_start = iter.i - width - width2 - width3;
var is_first = true;
var is_out_of_range = false;
variableLength: while (true) {
_ = iterator.next(&iter) or break :variableLength;
c3 = iter.c;
switch (c3) {
'0'...'9' => {
value = value * 16 | (c3 - '0');
},
'a'...'f' => {
value = value * 16 | (c3 + 10 - 'a');
},
'A'...'F' => {
value = value * 16 | (c3 + 10 - 'A');
},
'}' => {
if (is_first) {
lexer.end = start + iter.i - width3;
return lexer.syntaxError();
}
break :variableLength;
},
else => {
lexer.end = start + iter.i - width3;
return lexer.syntaxError();
},
}
// '\U0010FFFF
// copied from golang utf8.MaxRune
if (value > 1114111) {
is_out_of_range = true;
}
is_first = false;
}
if (is_out_of_range) {
try lexer.addRangeError(
.{ .loc = .{ .start = @intCast(i32, start + hex_start) }, .len = @intCast(i32, (iter.i - hex_start)) },
"Unicode escape sequence is out of range",
.{},
true,
);
return;
}
// fixed-length
} else {
// Fixed-length
// comptime var j: usize = 0;
var j: usize = 0;
while (j < 4) : (j += 1) {
switch (c3) {
'0'...'9' => {
value = value * 16 | (c3 - '0');
},
'a'...'f' => {
value = value * 16 | (c3 + 10 - 'a');
},
'A'...'F' => {
value = value * 16 | (c3 + 10 - 'A');
},
else => {
lexer.end = start + iter.i - width3;
return lexer.syntaxError();
},
}
if (j < 3) {
_ = iterator.next(&iter) or return lexer.syntaxError();
c3 = iter.c;
width3 = iter.width;
}
}
}
iter.c = @truncate(CodePoint, value);
},
'\r' => {
if (comptime is_json) {
lexer.end = start + iter.i - width2;
try lexer.syntaxError();
}
// Ignore line continuations. A line continuation is not an escaped newline.
if (iter.i < text.len and text[iter.i + 1] == '\n') {
// Make sure Windows CRLF counts as a single newline
iter.i += 1;
}
continue;
},
'\n', 0x2028, 0x2029 => {
if (comptime is_json) {
lexer.end = start + iter.i - width2;
try lexer.syntaxError();
}
// Ignore line continuations. A line continuation is not an escaped newline.
continue;
},
else => {
if (comptime is_json) {
switch (c2) {
'"', '\\', '/' => {},
else => {
lexer.end = start + iter.i - width2;
try lexer.syntaxError();
},
}
}
iter.c = c2;
},
}
},
else => {},
}
switch (iter.c) {
-1 => return try lexer.addDefaultError("Unexpected end of file"),
0...0xFFFF => {
buf.append(@intCast(u16, iter.c)) catch unreachable;
},
else => {
iter.c -= 0x10000;
buf.ensureUnusedCapacity(2) catch unreachable;
buf.appendAssumeCapacity(@intCast(u16, 0xD800 + ((iter.c >> 10) & 0x3FF)));
buf.appendAssumeCapacity(@intCast(u16, 0xDC00 + (iter.c & 0x3FF)));
},
}
}
}
pub const InnerStringLiteral = packed struct { suffix_len: u3, needs_slow_path: bool };
fn parseStringLiteralInnter(lexer: *LexerType, comptime quote: CodePoint) !InnerStringLiteral {
var needs_slow_path = false;
var suffix_len: u3 = if (comptime quote == 0) 0 else 1;
stringLiteral: while (true) {
switch (lexer.code_point) {
'\\' => {
try lexer.step();
// Handle Windows CRLF
if (lexer.code_point == 'r' and comptime !is_json) {
try lexer.step();
if (lexer.code_point == '\n') {
try lexer.step();
}
continue :stringLiteral;
}
if (comptime is_json and json_options.ignore_trailing_escape_sequences) {
if (lexer.code_point == quote and lexer.current >= lexer.source.contents.len) {
try lexer.step();
break;
}
}
switch (lexer.code_point) {
'f', 't', 'r', 'n', '`', '\'', '0', '"', 0x2028, 0x2029 => {
try lexer.step();
continue :stringLiteral;
},
else => {
needs_slow_path = true;
},
}
},
// This indicates the end of the file
-1 => {
if (comptime quote != 0) {
try lexer.addDefaultError("Unterminated string literal");
}
break :stringLiteral;
},
'\r' => {
if (comptime quote != '`') {
try lexer.addDefaultError("Unterminated string literal");
}
// Template literals require newline normalization
needs_slow_path = true;
},
'\n' => {
// Implicitly-quoted strings end when they reach a newline OR end of file
// This only applies to .env
switch (comptime quote) {
0 => {
break :stringLiteral;
},
'`' => {},
else => {
try lexer.addDefaultError("Unterminated string literal");
},
}
},
'$' => {
if (comptime quote == '`') {
try lexer.step();
if (lexer.code_point == '{') {
suffix_len = 2;
try lexer.step();
if (lexer.rescan_close_brace_as_template_token) {
lexer.token = T.t_template_middle;
} else {
lexer.token = T.t_template_head;
}
break :stringLiteral;
}
continue :stringLiteral;
}
},
// exit condition
quote => {
try lexer.step();
break;
},
else => {
// Non-ASCII strings need the slow path
if (lexer.code_point >= 0x80) {
needs_slow_path = true;
} else if (is_json and lexer.code_point < 0x20) {
try lexer.syntaxError();
}
},
}
try lexer.step();
}
return InnerStringLiteral{ .needs_slow_path = needs_slow_path, .suffix_len = suffix_len };
}
pub fn parseStringLiteral(lexer: *LexerType, comptime quote: CodePoint) !void {
if (comptime quote != '`') {
lexer.token = T.t_string_literal;
} else if (lexer.rescan_close_brace_as_template_token) {
lexer.token = T.t_template_tail;
} else {
lexer.token = T.t_no_substitution_template_literal;
}
// quote is 0 when parsing JSON from .env
// .env values may not always be quoted.
try lexer.step();
var string_literal_details = try lexer.parseStringLiteralInnter(quote);
// Reset string literal
const base = if (comptime quote == 0) lexer.start else lexer.start + 1;
lexer.string_literal_slice = lexer.source.contents[base..@minimum(lexer.source.contents.len, lexer.end - string_literal_details.suffix_len)];
lexer.string_literal_is_ascii = !string_literal_details.needs_slow_path;
lexer.string_literal_buffer.shrinkRetainingCapacity(0);
if (string_literal_details.needs_slow_path) {
lexer.string_literal_buffer.ensureUnusedCapacity(lexer.string_literal_slice.len) catch unreachable;
try lexer.decodeEscapeSequences(0, lexer.string_literal_slice, @TypeOf(lexer.string_literal_buffer), &lexer.string_literal_buffer);
lexer.string_literal = lexer.string_literal_buffer.items;
}
lexer.is_ascii_only = lexer.is_ascii_only and lexer.string_literal_is_ascii;
if (comptime !FeatureFlags.allow_json_single_quotes) {
if (quote == '\'' and is_json) {
try lexer.addRangeError(lexer.range(), "JSON strings must use double quotes", .{}, true);
}
}
// for (text)
// // if (needs_slow_path) {
// // // Slow path
// // // lexer.string_literal = lexer.(lexer.start + 1, text);
// // } else {
// // // Fast path
// // }
}
inline fn nextCodepointSlice(it: *LexerType) []const u8 {
const cp_len = strings.utf8ByteSequenceLength(it.source.contents.ptr[it.current]);
it.end = it.current;
it.current += cp_len;
return if (!(it.current > it.source.contents.len)) it.source.contents[it.current - cp_len .. it.current] else "";
}
inline fn nextCodepoint(it: *LexerType) !CodePoint {
const slice = it.nextCodepointSlice();
return switch (slice.len) {
0 => -1,
1 => @as(CodePoint, slice[0]),
2 => @as(CodePoint, unicode.utf8Decode2(slice) catch unreachable),
3 => @as(CodePoint, unicode.utf8Decode3(slice) catch unreachable),
4 => @as(CodePoint, unicode.utf8Decode4(slice) catch unreachable),
else => unreachable,
};
}
inline fn step(lexer: *LexerType) !void {
lexer.code_point = try lexer.nextCodepoint();
// Track the approximate number of newlines in the file so we can preallocate
// the line offset table in the printer for source maps. The line offset table
// is the #1 highest allocation in the heap profile, so this is worth doing.
// This count is approximate because it handles "\n" and "\r\n" (the common
// cases) but not "\r" or "\u2028" or "\u2029". Getting this wrong is harmless
// because it's only a preallocation. The array will just grow if it's too small.
lexer.approximate_newline_count += @boolToInt(lexer.code_point == '\n');
}
pub inline fn expect(self: *LexerType, comptime token: T) !void {
if (self.token != token) {
try self.expected(token);
}
try self.next();
}
pub inline fn expectOrInsertSemicolon(lexer: *LexerType) !void {
if (lexer.token == T.t_semicolon or (!lexer.has_newline_before and
lexer.token != T.t_close_brace and lexer.token != T.t_end_of_file))
{
try lexer.expect(T.t_semicolon);
}
}
pub fn addUnsupportedSyntaxError(self: *LexerType, msg: []const u8) !void {
self.addError(self.end, "Unsupported syntax: {s}", .{msg}, true);
return Error.SyntaxError;
}
pub const IdentifierKind = enum { normal, private };
pub const ScanResult = struct { token: T, contents: string };
threadlocal var small_escape_sequence_buffer: [4096]u16 = undefined;
const FakeArrayList16 = struct {
items: []u16,
i: usize = 0,
pub fn append(fake: *FakeArrayList16, value: u16) !void {
std.debug.assert(fake.items.len < fake.i);
fake.items[fake.i] = value;
fake.i += 1;
}
pub fn appendAssumeCapacity(fake: *FakeArrayList16, value: u16) void {
std.debug.assert(fake.items.len < fake.i);
fake.items[fake.i] = value;
fake.i += 1;
}
pub fn ensureUnusedCapacity(fake: *FakeArrayList16, int: anytype) !void {
std.debug.assert(fake.items.len < fake.i + int);
}
};
threadlocal var large_escape_sequence_list: std.ArrayList(u16) = undefined;
threadlocal var large_escape_sequence_list_loaded: bool = false;
// This is an edge case that doesn't really exist in the wild, so it doesn't
// need to be as fast as possible.
pub fn scanIdentifierWithEscapes(lexer: *LexerType, comptime kind: IdentifierKind) anyerror!ScanResult {
var result = ScanResult{ .token = .t_end_of_file, .contents = "" };
// First pass: scan over the identifier to see how long it is
while (true) {
// Scan a unicode escape sequence. There is at least one because that's
// what caused us to get on this slow path in the first place.
if (lexer.code_point == '\\') {
try lexer.step();
if (lexer.code_point != 'u') {
try lexer.syntaxError();
}
try lexer.step();
if (lexer.code_point == '{') {
// Variable-length
try lexer.step();
while (lexer.code_point != '}') {
switch (lexer.code_point) {
'0'...'9', 'a'...'f', 'A'...'F' => {
try lexer.step();
},
else => {
try lexer.syntaxError();
},
}
}
try lexer.step();
} else {
// Fixed-length
// comptime var j: usize = 0;
switch (lexer.code_point) {
'0'...'9', 'a'...'f', 'A'...'F' => {
try lexer.step();
},
else => {
try lexer.syntaxError();
},
}
switch (lexer.code_point) {
'0'...'9', 'a'...'f', 'A'...'F' => {
try lexer.step();
},
else => {
try lexer.syntaxError();
},
}
switch (lexer.code_point) {
'0'...'9', 'a'...'f', 'A'...'F' => {
try lexer.step();
},
else => {
try lexer.syntaxError();
},
}
switch (lexer.code_point) {
'0'...'9', 'a'...'f', 'A'...'F' => {
try lexer.step();
},
else => {
try lexer.syntaxError();
},
}
}
continue;
}
if (!isIdentifierContinue(lexer.code_point)) {
break;
}
try lexer.step();
}
// Second pass: re-use our existing escape sequence parser
var original_text = lexer.raw();
if (original_text.len < 1024) {
var buf = FakeArrayList16{ .items = &small_escape_sequence_buffer, .i = 0 };
try lexer.decodeEscapeSequences(lexer.start, original_text, FakeArrayList16, &buf);
result.contents = lexer.utf16ToString(buf.items[0..buf.i]);
} else {
if (!large_escape_sequence_list_loaded) {
large_escape_sequence_list = try std.ArrayList(u16).initCapacity(lexer.allocator, original_text.len);
large_escape_sequence_list_loaded = true;
}
large_escape_sequence_list.shrinkRetainingCapacity(0);
try lexer.decodeEscapeSequences(lexer.start, original_text, std.ArrayList(u16), &large_escape_sequence_list);
result.contents = lexer.utf16ToString(large_escape_sequence_list.items);
}
var identifier = result.contents;
if (kind == .private) {
identifier = result.contents[1..];
}
if (!isIdentifier(identifier)) {
try lexer.addRangeError(
.{ .loc = logger.usize2Loc(lexer.start), .len = @intCast(i32, lexer.end - lexer.start) },
"Invalid identifier: \"{s}\"",
.{result.contents},
true,
);
}
result.contents = identifier;
// Escaped keywords are not allowed to work as actual keywords, but they are
// allowed wherever we allow identifiers or keywords. For example:
//
// // This is an error (equivalent to "var var;")
// var \u0076\u0061\u0072;
//
// // This is an error (equivalent to "var foo;" except for this rule)
// \u0076\u0061\u0072 foo;
//
// // This is an fine (equivalent to "foo.var;")
// foo.\u0076\u0061\u0072;
//
result.token = if (Keywords.has(result.contents)) .t_escaped_keyword else .t_identifier;
// const text = lexer.decodeEscapeSequences(lexer.start, lexer.raw(), )
return result;
}
pub fn expectContextualKeyword(self: *LexerType, comptime keyword: string) !void {
if (!self.isContextualKeyword(keyword)) {
if (std.builtin.mode == std.builtin.Mode.Debug) {
self.addError(self.start, "Expected \"{s}\" but found \"{s}\" (token: {s})", .{
keyword,
self.raw(),
self.token,
}, true);
} else {
self.addError(self.start, "Expected \"{s}\" but found \"{s}\"", .{ keyword, self.raw() }, true);
}
return Error.UnexpectedSyntax;
}
try self.next();
}
pub fn maybeExpandEquals(lexer: *LexerType) !void {
switch (lexer.code_point) {
'>' => {
// "=" + ">" = "=>"
lexer.token = .t_equals_greater_than;
try lexer.step();
},
'=' => {
// "=" + "=" = "=="
lexer.token = .t_equals_equals;
try lexer.step();
if (lexer.code_point == '=') {
// "=" + "==" = "==="
lexer.token = .t_equals_equals_equals;
try lexer.step();
}
},
else => {},
}
}
pub fn expectLessThan(lexer: *LexerType, comptime is_inside_jsx_element: bool) !void {
switch (lexer.token) {
.t_less_than => {
if (is_inside_jsx_element) {
try lexer.nextInsideJSXElement();
} else {
try lexer.next();
}
},
.t_less_than_equals => {
lexer.token = .t_equals;
lexer.start += 1;
try lexer.maybeExpandEquals();
},
.t_less_than_less_than => {
lexer.token = .t_less_than;
lexer.start += 1;
},
.t_less_than_less_than_equals => {
lexer.token = .t_less_than_equals;
lexer.start += 1;
},
else => {
try lexer.expected(.t_less_than);
},
}
}
pub fn expectGreaterThan(lexer: *LexerType, comptime is_inside_jsx_element: bool) !void {
switch (lexer.token) {
.t_greater_than => {
if (is_inside_jsx_element) {
try lexer.nextInsideJSXElement();
} else {
try lexer.next();
}
},
.t_greater_than_greater_than => {
lexer.token = .t_greater_than;
lexer.start += 1;
},
.t_greater_than_greater_than_greater_than => {
lexer.token = .t_greater_than_greater_than;
lexer.start += 1;
},
.t_greater_than_equals => {
lexer.token = .t_equals;
lexer.start += 1;
try lexer.maybeExpandEquals();
},
.t_greater_than_greater_than_equals => {
lexer.token = .t_greater_than_greater_than;
lexer.start += 1;
},
.t_greater_than_greater_than_greater_than_equals => {
lexer.token = .t_greater_than_greater_than_equals;
lexer.start += 1;
},
else => {
try lexer.expected(.t_greater_than);
},
}
}
pub fn next(lexer: *LexerType) !void {
lexer.has_newline_before = lexer.end == 0;
while (true) {
lexer.start = lexer.end;
lexer.token = T.t_end_of_file;
switch (lexer.code_point) {
-1 => {
lexer.token = T.t_end_of_file;
},
'#' => {
if (lexer.start == 0 and lexer.source.contents[1] == '!') {
// "#!/usr/bin/env node"
lexer.token = .t_hashbang;
hashbang: while (true) {
try lexer.step();
switch (lexer.code_point) {
'\r', '\n', 0x2028, 0x2029 => {
break :hashbang;
},
-1 => {
break :hashbang;
},
else => {},
}
}
lexer.identifier = lexer.raw();
} else {
try lexer.step();
if (lexer.code_point == '\\') {
lexer.identifier = (try lexer.scanIdentifierWithEscapes(.private)).contents;
lexer.token = T.t_private_identifier;
} else {
if (!isIdentifierStart(lexer.code_point)) {
try lexer.syntaxError();
}
try lexer.step();
while (isIdentifierContinue(lexer.code_point)) {
try lexer.step();
}
if (lexer.code_point == '\\') {
lexer.identifier = (try lexer.scanIdentifierWithEscapes(.private)).contents;
lexer.token = T.t_private_identifier;
} else {
lexer.token = T.t_private_identifier;
lexer.identifier = lexer.raw();
}
break;
}
}
},
'\r', '\n', 0x2028, 0x2029 => {
try lexer.step();
lexer.has_newline_before = true;
continue;
},
'\t', ' ' => {
try lexer.step();
continue;
},
'(' => {
try lexer.step();
lexer.token = T.t_open_paren;
},
')' => {
try lexer.step();
lexer.token = T.t_close_paren;
},
'[' => {
try lexer.step();
lexer.token = T.t_open_bracket;
},
']' => {
try lexer.step();
lexer.token = T.t_close_bracket;
},
'{' => {
try lexer.step();
lexer.token = T.t_open_brace;
},
'}' => {
try lexer.step();
lexer.token = T.t_close_brace;
},
',' => {
try lexer.step();
lexer.token = T.t_comma;
},
':' => {
try lexer.step();
lexer.token = T.t_colon;
},
';' => {
try lexer.step();
lexer.token = T.t_semicolon;
},
'@' => {
try lexer.step();
lexer.token = T.t_at;
},
'~' => {
try lexer.step();
lexer.token = T.t_tilde;
},
'?' => {
// '?' or '?.' or '??' or '??='
try lexer.step();
switch (lexer.code_point) {
'?' => {
try lexer.step();
switch (lexer.code_point) {
'=' => {
try lexer.step();
lexer.token = T.t_question_question_equals;
},
else => {
lexer.token = T.t_question_question;
},
}
},
'.' => {
lexer.token = T.t_question;
const current = lexer.current;
const contents = lexer.source.contents;
// Lookahead to disambiguate with 'a?.1:b'
if (current < contents.len) {
const c = contents[current];
if (c < '0' or c > '9') {
try lexer.step();
lexer.token = T.t_question_dot;
}
}
},
else => {
lexer.token = T.t_question;
},
}
},
'%' => {
// '%' or '%='
try lexer.step();
switch (lexer.code_point) {
'=' => {
try lexer.step();
lexer.token = T.t_percent_equals;
},
else => {
lexer.token = T.t_percent;
},
}
},
'&' => {
// '&' or '&=' or '&&' or '&&='
try lexer.step();
switch (lexer.code_point) {
'=' => {
try lexer.step();
lexer.token = T.t_ampersand_equals;
},
'&' => {
try lexer.step();
switch (lexer.code_point) {
'=' => {
try lexer.step();
lexer.token = T.t_ampersand_ampersand_equals;
},
else => {
lexer.token = T.t_ampersand_ampersand;
},
}
},
else => {
lexer.token = T.t_ampersand;
},
}
},
'|' => {
// '|' or '|=' or '||' or '||='
try lexer.step();
switch (lexer.code_point) {
'=' => {
try lexer.step();
lexer.token = T.t_bar_equals;
},
'|' => {
try lexer.step();
switch (lexer.code_point) {
'=' => {
try lexer.step();
lexer.token = T.t_bar_bar_equals;
},
else => {
lexer.token = T.t_bar_bar;
},
}
},
else => {
lexer.token = T.t_bar;
},
}
},
'^' => {
// '^' or '^='
try lexer.step();
switch (lexer.code_point) {
'=' => {
try lexer.step();
lexer.token = T.t_caret_equals;
},
else => {
lexer.token = T.t_caret;
},
}
},
'+' => {
// '+' or '+=' or '++'
try lexer.step();
switch (lexer.code_point) {
'=' => {
try lexer.step();
lexer.token = T.t_plus_equals;
},
'+' => {
try lexer.step();
lexer.token = T.t_plus_plus;
},
else => {
lexer.token = T.t_plus;
},
}
},
'-' => {
// '+' or '+=' or '++'
try lexer.step();
switch (lexer.code_point) {
'=' => {
try lexer.step();
lexer.token = T.t_minus_equals;
},
'-' => {
try lexer.step();
if (lexer.code_point == '>' and lexer.has_newline_before) {
try lexer.step();
lexer.log.addRangeWarning(lexer.source, lexer.range(), "Treating \"-->\" as the start of a legacy HTML single-line comment") catch unreachable;
singleLineHTMLCloseComment: while (true) {
switch (lexer.code_point) {
'\r', '\n', 0x2028, 0x2029 => {
break :singleLineHTMLCloseComment;
},
-1 => {
break :singleLineHTMLCloseComment;
},
else => {},
}
try lexer.step();
}
continue;
}
lexer.token = T.t_minus_minus;
},
else => {
lexer.token = T.t_minus;
},
}
},
'*' => {
// '*' or '*=' or '**' or '**='
try lexer.step();
switch (lexer.code_point) {
'=' => {
try lexer.step();
lexer.token = .t_asterisk_equals;
},
'*' => {
try lexer.step();
switch (lexer.code_point) {
'=' => {
try lexer.step();
lexer.token = .t_asterisk_asterisk_equals;
},
else => {
lexer.token = .t_asterisk_asterisk;
},
}
},
else => {
lexer.token = .t_asterisk;
},
}
},
'/' => {
// '/' or '/=' or '//' or '/* ... */'
try lexer.step();
if (lexer.for_global_name) {
lexer.token = .t_slash;
break;
}
switch (lexer.code_point) {
'=' => {
try lexer.step();
lexer.token = .t_slash_equals;
},
'/' => {
singleLineComment: while (true) {
try lexer.step();
switch (lexer.code_point) {
'\r', '\n', 0x2028, 0x2029 => {
break :singleLineComment;
},
-1 => {
break :singleLineComment;
},
else => {},
}
}
if (comptime is_json) {
if (!json.allow_comments) {
try lexer.addRangeError(lexer.range(), "JSON does not support comments", .{}, true);
return;
}
}
lexer.scanCommentText();
continue;
},
'*' => {
try lexer.step();
multiLineComment: while (true) {
switch (lexer.code_point) {
'*' => {
try lexer.step();
if (lexer.code_point == '/') {
try lexer.step();
break :multiLineComment;
}
},
'\r', '\n', 0x2028, 0x2029 => {
try lexer.step();
lexer.has_newline_before = true;
},
-1 => {
lexer.start = lexer.end;
try lexer.addSyntaxError(
lexer.start,
"Expected \"*/\" to terminate multi-line comment",
.{},
);
},
else => {
try lexer.step();
},
}
}
if (comptime is_json) {
if (!json.allow_comments) {
try lexer.addRangeError(lexer.range(), "JSON does not support comments", .{}, true);
return;
}
}
lexer.scanCommentText();
continue;
},
else => {
lexer.token = .t_slash;
},
}
},
'=' => {
// '=' or '=>' or '==' or '==='
try lexer.step();
switch (lexer.code_point) {
'>' => {
try lexer.step();
lexer.token = T.t_equals_greater_than;
},
'=' => {
try lexer.step();
switch (lexer.code_point) {
'=' => {
try lexer.step();
lexer.token = T.t_equals_equals_equals;
},
else => {
lexer.token = T.t_equals_equals;
},
}
},
else => {
lexer.token = T.t_equals;
},
}
},
'<' => {
// '<' or '<<' or '<=' or '<<=' or '<!--'
try lexer.step();
switch (lexer.code_point) {
'=' => {
try lexer.step();
lexer.token = T.t_less_than_equals;
},
'<' => {
try lexer.step();
switch (lexer.code_point) {
'=' => {
try lexer.step();
lexer.token = T.t_less_than_less_than_equals;
},
else => {
lexer.token = T.t_less_than_less_than;
},
}
},
// Handle legacy HTML-style comments
'!' => {
if (strings.eqlComptime(lexer.peek("--".len), "--")) {
try lexer.addUnsupportedSyntaxError("Legacy HTML comments not implemented yet!");
return;
}
lexer.token = T.t_less_than;
},
else => {
lexer.token = T.t_less_than;
},
}
},
'>' => {
// '>' or '>>' or '>>>' or '>=' or '>>=' or '>>>='
try lexer.step();
switch (lexer.code_point) {
'=' => {
try lexer.step();
lexer.token = T.t_greater_than_equals;
},
'>' => {
try lexer.step();
switch (lexer.code_point) {
'=' => {
try lexer.step();
lexer.token = T.t_greater_than_greater_than_equals;
},
'>' => {
try lexer.step();
switch (lexer.code_point) {
'=' => {
try lexer.step();
lexer.token = T.t_greater_than_greater_than_greater_than_equals;
},
else => {
lexer.token = T.t_greater_than_greater_than_greater_than;
},
}
},
else => {
lexer.token = T.t_greater_than_greater_than;
},
}
},
else => {
lexer.token = T.t_greater_than;
},
}
},
'!' => {
// '!' or '!=' or '!=='
try lexer.step();
switch (lexer.code_point) {
'=' => {
try lexer.step();
switch (lexer.code_point) {
'=' => {
try lexer.step();
lexer.token = T.t_exclamation_equals_equals;
},
else => {
lexer.token = T.t_exclamation_equals;
},
}
},
else => {
lexer.token = T.t_exclamation;
},
}
},
'\'' => {
try lexer.parseStringLiteral('\'');
},
'"' => {
try lexer.parseStringLiteral('"');
},
'`' => {
try lexer.parseStringLiteral('`');
},
'_', '$', 'a'...'z', 'A'...'Z' => {
try lexer.step();
while (isIdentifierContinue(lexer.code_point)) {
try lexer.step();
}
if (lexer.code_point != '\\') {
// this code is so hot that if you save lexer.raw() into a temporary variable
// it shows up in profiling
lexer.identifier = lexer.raw();
lexer.token = Keywords.get(lexer.identifier) orelse T.t_identifier;
} else {
const scan_result = try lexer.scanIdentifierWithEscapes(.normal);
lexer.identifier = scan_result.contents;
lexer.token = scan_result.token;
}
},
'\\' => {
if (comptime is_json and json_options.ignore_leading_escape_sequences) {
if (lexer.start == 0 or lexer.current == lexer.source.contents.len - 1) {
try lexer.step();
continue;
}
}
const scan_result = try lexer.scanIdentifierWithEscapes(.normal);
lexer.identifier = scan_result.contents;
lexer.token = scan_result.token;
},
'.', '0'...'9' => {
try lexer.parseNumericLiteralOrDot();
},
else => {
// Check for unusual whitespace characters
if (isWhitespace(lexer.code_point)) {
try lexer.step();
continue;
}
if (isIdentifierStart(lexer.code_point)) {
try lexer.step();
while (isIdentifierContinue(lexer.code_point)) {
try lexer.step();
}
if (lexer.code_point == '\\') {
const scan_result = try lexer.scanIdentifierWithEscapes(.normal);
lexer.identifier = scan_result.contents;
lexer.token = scan_result.token;
} else {
lexer.token = T.t_identifier;
lexer.identifier = lexer.raw();
}
break;
}
lexer.end = lexer.current;
lexer.token = T.t_syntax_error;
},
}
return;
}
}
pub fn expected(self: *LexerType, token: T) !void {
if (self.is_log_disabled) {
return error.Backtrack;
} else if (tokenToString.get(token).len > 0) {
try self.expectedString(tokenToString.get(token));
} else {
try self.unexpected();
}
}
pub fn unexpected(lexer: *LexerType) !void {
const found = finder: {
lexer.start = std.math.min(lexer.start, lexer.end);
if (lexer.start == lexer.source.contents.len) {
break :finder "end of file";
} else {
break :finder lexer.raw();
}
};
try lexer.addRangeError(lexer.range(), "Unexpected {s}", .{found}, true);
}
pub fn raw(self: *LexerType) []const u8 {
return self.source.contents[self.start..self.end];
}
pub fn isContextualKeyword(self: *LexerType, comptime keyword: string) bool {
return self.token == .t_identifier and strings.eqlComptime(self.raw(), keyword);
}
pub fn expectedString(self: *LexerType, text: string) !void {
const found = finder: {
if (self.source.contents.len != self.start) {
break :finder self.raw();
} else {
break :finder "end of file";
}
};
try self.addRangeError(self.range(), "Expected {s} but found {s}", .{ text, found }, true);
}
pub fn scanCommentText(lexer: *LexerType) void {
var text = lexer.source.contents[lexer.start..lexer.end];
const has_preserve_annotation = text.len > 2 and text[2] == '!';
const is_multiline_comment = text[1] == '*';
// Omit the trailing "*/" from the checks below
var endCommentText = text.len;
if (is_multiline_comment) {
endCommentText -= 2;
}
if (has_preserve_annotation or lexer.preserve_all_comments_before) {
if (is_multiline_comment) {
// text = lexer.removeMultilineCommentIndent(lexer.source.contents[0..lexer.start], text);
}
lexer.comments_to_preserve_before.append(js_ast.G.Comment{
.text = text,
.loc = lexer.loc(),
}) catch unreachable;
}
}
// TODO: implement this
// it's too complicated to handle all the edgecases right now given the state of Zig's standard library
pub fn removeMultilineCommentIndent(lexer: *LexerType, _prefix: string, text: string) string {
return text;
}
pub fn range(self: *LexerType) logger.Range {
return logger.Range{
.loc = logger.usize2Loc(self.start),
.len = std.math.lossyCast(i32, self.end - self.start),
};
}
pub fn initGlobalName(log: *logger.Log, source: *const logger.Source, allocator: *std.mem.Allocator) !LexerType {
var empty_string_literal: JavascriptString = &emptyJavaScriptString;
var lex = LexerType{
.log = log,
.source = source,
.string_literal_is_ascii = true,
.string_literal = empty_string_literal,
.string_literal_buffer = std.ArrayList(u16).init(allocator),
.prev_error_loc = logger.Loc.Empty,
.allocator = allocator,
.comments_to_preserve_before = std.ArrayList(js_ast.G.Comment).init(allocator),
.for_global_name = true,
};
try lex.step();
try lex.next();
return lex;
}
pub fn initTSConfig(log: *logger.Log, source: *const logger.Source, allocator: *std.mem.Allocator) !LexerType {
var empty_string_literal: JavascriptString = &emptyJavaScriptString;
var lex = LexerType{
.log = log,
.source = source,
.string_literal = empty_string_literal,
.string_literal_buffer = std.ArrayList(u16).init(allocator),
.prev_error_loc = logger.Loc.Empty,
.string_literal_is_ascii = true,
.allocator = allocator,
.comments_to_preserve_before = std.ArrayList(js_ast.G.Comment).init(allocator),
};
try lex.step();
try lex.next();
return lex;
}
pub fn initJSON(log: *logger.Log, source: *const logger.Source, allocator: *std.mem.Allocator) !LexerType {
var empty_string_literal: JavascriptString = &emptyJavaScriptString;
var lex = LexerType{
.log = log,
.string_literal_buffer = std.ArrayList(u16).init(allocator),
.source = source,
.string_literal = empty_string_literal,
.prev_error_loc = logger.Loc.Empty,
.allocator = allocator,
.comments_to_preserve_before = std.ArrayList(js_ast.G.Comment).init(allocator),
};
try lex.step();
try lex.next();
return lex;
}
pub fn init(log: *logger.Log, source: *const logger.Source, allocator: *std.mem.Allocator) !LexerType {
try tables.initJSXEntityMap();
var empty_string_literal: JavascriptString = &emptyJavaScriptString;
var lex = LexerType{
.log = log,
.source = source,
.string_literal = empty_string_literal,
.string_literal_buffer = std.ArrayList(u16).init(allocator),
.prev_error_loc = logger.Loc.Empty,
.allocator = allocator,
.comments_to_preserve_before = std.ArrayList(js_ast.G.Comment).init(allocator),
};
try lex.step();
try lex.next();
return lex;
}
pub fn toEString(lexer: *LexerType) js_ast.E.String {
if (lexer.string_literal_is_ascii) {
return js_ast.E.String{ .utf8 = lexer.string_literal_slice };
} else {
return js_ast.E.String{ .value = lexer.stringLiteralUTF16() };
}
}
pub fn scanRegExp(lexer: *LexerType) !void {
lexer.regex_flags_start = null;
while (true) {
switch (lexer.code_point) {
'/' => {
try lexer.step();
var has_set_flags_start = false;
while (isIdentifierContinue(lexer.code_point)) {
switch (lexer.code_point) {
'g', 'i', 'm', 's', 'u', 'y' => {
if (!has_set_flags_start) {
lexer.regex_flags_start = @truncate(u16, lexer.end - lexer.start);
has_set_flags_start = true;
}
try lexer.step();
},
else => {
try lexer.syntaxError();
},
}
}
return;
},
'[' => {
try lexer.step();
while (lexer.code_point != ']') {
try lexer.scanRegExpValidateAndStep();
}
try lexer.step();
},
else => {
try lexer.scanRegExpValidateAndStep();
},
}
}
}
// TODO: use wtf-8 encoding.
pub fn stringToUTF16(lexer: *LexerType, str: string) JavascriptString {
var buf: JavascriptString = lexer.allocator.alloc(u16, std.mem.len(str)) catch unreachable;
// theres prob a faster/better way
for (str) |char, i| {
buf[i] = char;
}
return buf;
}
// TODO: use wtf-8 encoding.
pub fn utf16ToStringWithValidation(lexer: *LexerType, js: JavascriptString) !string {
// return std.unicode.utf16leToUtf8Alloc(lexer.allocator, js);
return utf16ToString(lexer, js);
}
pub fn utf16ToString(lexer: *LexerType, js: JavascriptString) string {
var temp: [4]u8 = undefined;
var list = std.ArrayList(u8).initCapacity(lexer.allocator, js.len) catch unreachable;
var i: usize = 0;
while (i < js.len) : (i += 1) {
var r1 = @intCast(i32, js[i]);
if (r1 >= 0xD800 and r1 <= 0xDBFF and i + 1 < js.len) {
const r2 = @intCast(i32, js[i] + 1);
if (r2 >= 0xDC00 and r2 <= 0xDFFF) {
r1 = (r1 - 0xD800) << 10 | (r2 - 0xDC00) + 0x10000;
i += 1;
}
}
const width = strings.encodeWTF8Rune(&temp, r1);
list.appendSlice(temp[0..width]) catch unreachable;
}
return list.items;
// return std.unicode.utf16leToUtf8Alloc(lexer.allocator, js) catch unreachable;
}
pub fn nextInsideJSXElement(lexer: *LexerType) !void {
lexer.has_newline_before = false;
while (true) {
lexer.start = lexer.end;
lexer.token = .t_end_of_file;
switch (lexer.code_point) {
-1 => {
lexer.token = .t_end_of_file;
},
'\r', '\n', 0x2028, 0x2029 => {
try lexer.step();
lexer.has_newline_before = true;
continue;
},
'\t', ' ' => {
try lexer.step();
continue;
},
'.' => {
try lexer.step();
lexer.token = .t_dot;
},
'=' => {
try lexer.step();
lexer.token = .t_equals;
},
'{' => {
try lexer.step();
lexer.token = .t_open_brace;
},
'}' => {
try lexer.step();
lexer.token = .t_close_brace;
},
'<' => {
try lexer.step();
lexer.token = .t_less_than;
},
'>' => {
try lexer.step();
lexer.token = .t_greater_than;
},
'/' => {
// '/' or '//' or '/* ... */'
try lexer.step();
switch (lexer.code_point) {
'/' => {
single_line_comment: {
while (true) {
try lexer.step();
switch (lexer.code_point) {
'\r', '\n', 0x2028, 0x2029 => {
break :single_line_comment;
},
-1 => {
break :single_line_comment;
},
else => {},
}
}
}
continue;
},
'*' => {
try lexer.step();
const start_range = lexer.range();
multi_line_comment: {
while (true) {
switch (lexer.code_point) {
'*' => {
try lexer.step();
if (lexer.code_point == '/') {
try lexer.step();
break :multi_line_comment;
}
},
'\r', '\n', 0x2028, 0x2029 => {
try lexer.step();
lexer.has_newline_before = true;
},
-1 => {
lexer.start = lexer.end;
try lexer.addSyntaxError(lexer.start, "Expected \"*/\" to terminate multi-line comment", .{});
},
else => {
try lexer.step();
},
}
}
}
continue;
},
else => {
lexer.token = .t_slash;
},
}
},
'\'' => {
try lexer.step();
try lexer.parseJSXStringLiteral('\'');
},
'"' => {
try lexer.step();
try lexer.parseJSXStringLiteral('"');
},
else => {
if (isWhitespace(lexer.code_point)) {
try lexer.step();
continue;
}
if (isIdentifierStart(lexer.code_point)) {
try lexer.step();
while (isIdentifierContinue(lexer.code_point) or lexer.code_point == '-') {
try lexer.step();
}
// Parse JSX namespaces. These are not supported by React or TypeScript
// but someone using JSX syntax in more obscure ways may find a use for
// them. A namespaced name is just always turned into a string so you
// can't use this feature to reference JavaScript identifiers.
if (lexer.code_point == ':') {
try lexer.step();
if (isIdentifierStart(lexer.code_point)) {
while (isIdentifierStart(lexer.code_point) or lexer.code_point == '-') {
try lexer.step();
}
} else {
try lexer.addSyntaxError(lexer.range().endI(), "Expected identifier after \"{s}\" in namespaced JSX name", .{lexer.raw()});
}
}
lexer.identifier = lexer.raw();
lexer.token = .t_identifier;
break;
}
lexer.end = lexer.current;
lexer.token = .t_syntax_error;
},
}
return;
}
}
pub fn parseJSXStringLiteral(lexer: *LexerType, comptime quote: u8) !void {
var backslash = logger.Range.None;
var needs_decode = false;
string_literal: while (true) {
switch (lexer.code_point) {
-1 => {
try lexer.syntaxError();
},
'&' => {
needs_decode = true;
try lexer.step();
},
'\\' => {
backslash = logger.Range{ .loc = logger.Loc{
.start = @intCast(i32, lexer.end),
}, .len = 1 };
try lexer.step();
continue;
},
quote => {
if (backslash.len > 0) {
backslash.len += 1;
lexer.previous_backslash_quote_in_jsx = backslash;
}
try lexer.step();
// not sure about this!
break :string_literal;
},
else => {
// Non-ASCII strings need the slow path
if (lexer.code_point >= 0x80) {
needs_decode = true;
} else if ((comptime is_json) and lexer.code_point < 0x20) {
try lexer.syntaxError();
}
try lexer.step();
},
}
backslash = logger.Range.None;
}
lexer.token = .t_string_literal;
lexer.string_literal_slice = lexer.source.contents[lexer.start + 1 .. lexer.end - 1];
lexer.string_literal_is_ascii = !needs_decode;
lexer.string_literal_buffer.clearRetainingCapacity();
if (needs_decode) {
lexer.string_literal_buffer.ensureTotalCapacity(lexer.string_literal_slice.len) catch unreachable;
try lexer.decodeJSXEntities(lexer.string_literal_slice, &lexer.string_literal_buffer);
lexer.string_literal = lexer.string_literal_buffer.items;
}
}
pub fn expectJSXElementChild(lexer: *LexerType, token: T) !void {
if (lexer.token != token) {
try lexer.expected(token);
}
try lexer.nextJSXElementChild();
}
pub fn nextJSXElementChild(lexer: *LexerType) !void {
lexer.has_newline_before = false;
const original_start = lexer.end;
while (true) {
lexer.start = lexer.end;
lexer.token = T.t_end_of_file;
switch (lexer.code_point) {
-1 => {
lexer.token = .t_end_of_file;
},
'{' => {
try lexer.step();
lexer.token = .t_open_brace;
},
'<' => {
try lexer.step();
lexer.token = .t_less_than;
},
else => {
var needs_fixing = false;
string_literal: while (true) {
switch (lexer.code_point) {
-1 => {
try lexer.syntaxError();
},
'&', '\r', '\n', 0x2028, 0x2029 => {
needs_fixing = true;
try lexer.step();
},
'{', '<' => {
break :string_literal;
},
else => {
// Non-ASCII strings need the slow path
needs_fixing = needs_fixing or lexer.code_point >= 0x80;
try lexer.step();
},
}
}
lexer.token = .t_string_literal;
lexer.string_literal_slice = lexer.source.contents[original_start..lexer.end];
lexer.string_literal_is_ascii = !needs_fixing;
if (needs_fixing) {
// slow path
lexer.string_literal = try fixWhitespaceAndDecodeJSXEntities(lexer, lexer.string_literal_slice);
if (lexer.string_literal.len == 0) {
lexer.has_newline_before = true;
continue;
}
} else {
lexer.string_literal = &([_]u16{});
}
},
}
break;
}
}
threadlocal var jsx_decode_buf: std.ArrayList(u16) = undefined;
threadlocal var jsx_decode_init = false;
pub fn fixWhitespaceAndDecodeJSXEntities(lexer: *LexerType, text: string) !JavascriptString {
if (!jsx_decode_init) {
jsx_decode_init = true;
jsx_decode_buf = std.ArrayList(u16).init(default_allocator);
}
jsx_decode_buf.clearRetainingCapacity();
var decoded = jsx_decode_buf;
defer jsx_decode_buf = decoded;
var decoded_ptr = &decoded;
var i: u32 = 0;
var after_last_non_whitespace: ?u32 = null;
// Trim whitespace off the end of the first line
var first_non_whitespace: ?u32 = 0;
while (i < text.len) {
const width: u3 = strings.utf8ByteSequenceLength(text[i]);
const c: CodePoint = switch (width) {
0 => -1,
1 => @intCast(CodePoint, text[i]),
2 => @intCast(CodePoint, std.unicode.utf8Decode2(text[i..][0..2]) catch unreachable),
3 => @intCast(CodePoint, std.unicode.utf8Decode3(text[i..][0..3]) catch unreachable),
4 => @intCast(CodePoint, std.unicode.utf8Decode4(text[i..][0..4]) catch unreachable),
else => unreachable,
};
switch (c) {
'\r', '\n', 0x2028, 0x2029 => {
if (first_non_whitespace != null and after_last_non_whitespace != null) {
// Newline
if (decoded.items.len > 0) {
try decoded.append(' ');
}
// Trim whitespace off the start and end of lines in the middle
try lexer.decodeJSXEntities(text[first_non_whitespace.?..after_last_non_whitespace.?], &decoded);
}
// Reset for the next line
first_non_whitespace = null;
},
'\t', ' ' => {},
else => {
// Check for unusual whitespace characters
if (!isWhitespace(@intCast(CodePoint, c))) {
after_last_non_whitespace = i + width;
if (first_non_whitespace == null) {
first_non_whitespace = i;
}
}
},
}
i += width;
}
if (first_non_whitespace) |start| {
if (decoded.items.len > 0) {
try decoded.append(' ');
}
try decodeJSXEntities(lexer, text[start..text.len], decoded_ptr);
}
return decoded.items;
}
pub fn decodeJSXEntities(lexer: *LexerType, text: string, out: *std.ArrayList(u16)) !void {
var i: usize = 0;
var buf = [4]u8{ 0, 0, 0, 0 };
while (i < text.len) {
const width: u3 = strings.utf8ByteSequenceLength(text[i]);
var c: CodePoint = switch (width) {
0 => -1,
1 => @intCast(CodePoint, text[i]),
2 => @intCast(CodePoint, std.unicode.utf8Decode2(text[i..][0..2]) catch unreachable),
3 => @intCast(CodePoint, std.unicode.utf8Decode3(text[i..][0..3]) catch unreachable),
4 => @intCast(CodePoint, std.unicode.utf8Decode4(text[i..][0..4]) catch unreachable),
else => unreachable,
};
i += width;
if (c == '&') {
if (strings.indexOfChar(text[i..text.len], ';')) |length| {
const entity = text[i .. i + length];
if (entity[0] == '#') {
var number = entity[1..entity.len];
var base: u8 = 10;
if (number.len > 1 and number[0] == 'x') {
number = number[1..number.len];
base = 16;
}
c = try std.fmt.parseInt(i32, number, base);
i += length + 1;
} else if (tables.jsxEntity.get(entity)) |ent| {
c = ent;
i += length + 1;
}
}
}
if (c <= 0xFFFF) {
try out.append(@intCast(u16, c));
} else {
c -= 0x1000;
try out.ensureUnusedCapacity(2);
out.appendAssumeCapacity(@intCast(u16, 0xD800 + ((c >> 10) & 0x3FF)));
out.appendAssumeCapacity(@intCast(u16, 0xDC00 + (c & 0x3FF)));
}
}
}
pub fn expectInsideJSXElement(lexer: *LexerType, token: T) !void {
if (lexer.token != token) {
try lexer.expected(token);
}
try lexer.nextInsideJSXElement();
}
fn scanRegExpValidateAndStep(lexer: *LexerType) !void {
if (lexer.code_point == '\\') {
try lexer.step();
}
switch (lexer.code_point) {
'\r', '\n', 0x2028, 0x2029 => {
// Newlines aren't allowed in regular expressions
try lexer.syntaxError();
},
-1 => { // EOF
try lexer.syntaxError();
},
else => {
try lexer.step();
},
}
}
pub fn rescanCloseBraceAsTemplateToken(lexer: *LexerType) !void {
if (lexer.token != .t_close_brace) {
try lexer.expected(.t_close_brace);
}
lexer.rescan_close_brace_as_template_token = true;
lexer.code_point = '`';
lexer.current = lexer.end;
lexer.end -= 1;
try lexer.next();
lexer.rescan_close_brace_as_template_token = false;
}
pub fn rawTemplateContents(lexer: *LexerType) string {
var text: string = undefined;
switch (lexer.token) {
.t_no_substitution_template_literal, .t_template_tail => {
text = lexer.source.contents[lexer.start + 1 .. lexer.end - 1];
},
.t_template_middle, .t_template_head => {
text = lexer.source.contents[lexer.start + 1 .. lexer.end - 2];
},
else => {},
}
if (strings.indexOfChar(text, '\r') == null) {
return text;
}
// From the specification:
//
// 11.8.6.1 Static Semantics: TV and TRV
//
// TV excludes the code units of LineContinuation while TRV includes
// them. <CR><LF> and <CR> LineTerminatorSequences are normalized to
// <LF> for both TV and TRV. An explicit EscapeSequence is needed to
// include a <CR> or <CR><LF> sequence.
var bytes = MutableString.initCopy(lexer.allocator, text) catch unreachable;
var end: usize = 0;
var i: usize = 0;
var c: u8 = '0';
while (i < bytes.list.items.len) {
c = bytes.list.items[i];
i += 1;
if (c == '\r') {
// Convert '\r\n' into '\n'
if (i < bytes.list.items.len and bytes.list.items[i] == '\n') {
i += 1;
}
// Convert '\r' into '\n'
c = '\n';
}
bytes.list.items[end] = c;
end += 1;
}
return bytes.toOwnedSliceLength(end + 1);
}
fn parseNumericLiteralOrDot(lexer: *LexerType) !void {
// Number or dot;
var first = lexer.code_point;
try lexer.step();
// Dot without a digit after it;
if (first == '.' and (lexer.code_point < '0' or lexer.code_point > '9')) {
// "..."
if ((lexer.code_point == '.' and
lexer.current < lexer.source.contents.len) and
lexer.source.contents[lexer.current] == '.')
{
try lexer.step();
try lexer.step();
lexer.token = T.t_dot_dot_dot;
return;
}
// "."
lexer.token = T.t_dot;
return;
}
var underscoreCount: usize = 0;
var lastUnderscoreEnd: usize = 0;
var hasDotOrExponent = first == '.';
var base: f32 = 0.0;
lexer.is_legacy_octal_literal = false;
// Assume this is a number, but potentially change to a bigint later;
lexer.token = T.t_numeric_literal;
// Check for binary, octal, or hexadecimal literal;
if (first == '0') {
switch (lexer.code_point) {
'b', 'B' => {
base = 2;
},
'o', 'O' => {
base = 8;
},
'x', 'X' => {
base = 16;
},
'0'...'7', '_' => {
base = 8;
lexer.is_legacy_octal_literal = true;
},
else => {},
}
}
if (base != 0) {
// Integer literal;
var isFirst = true;
var isInvalidLegacyOctalLiteral = false;
lexer.number = 0;
if (!lexer.is_legacy_octal_literal) {
try lexer.step();
}
integerLiteral: while (true) {
switch (lexer.code_point) {
'_' => {
// Cannot have multiple underscores in a row;
if (lastUnderscoreEnd > 0 and lexer.end == lastUnderscoreEnd + 1) {
try lexer.syntaxError();
}
// The first digit must exist;
if (isFirst or lexer.is_legacy_octal_literal) {
try lexer.syntaxError();
}
lastUnderscoreEnd = lexer.end;
underscoreCount += 1;
},
'0', '1' => {
lexer.number = lexer.number * base + float64(lexer.code_point - '0');
},
'2', '3', '4', '5', '6', '7' => {
if (base == 2) {
try lexer.syntaxError();
}
lexer.number = lexer.number * base + float64(lexer.code_point - '0');
},
'8', '9' => {
if (lexer.is_legacy_octal_literal) {
isInvalidLegacyOctalLiteral = true;
} else if (base < 10) {
try lexer.syntaxError();
}
lexer.number = lexer.number * base + float64(lexer.code_point - '0');
},
'A', 'B', 'C', 'D', 'E', 'F' => {
if (base != 16) {
try lexer.syntaxError();
}
lexer.number = lexer.number * base + float64(lexer.code_point + 10 - 'A');
},
'a', 'b', 'c', 'd', 'e', 'f' => {
if (base != 16) {
try lexer.syntaxError();
}
lexer.number = lexer.number * base + float64(lexer.code_point + 10 - 'a');
},
else => {
// The first digit must exist;
if (isFirst) {
try lexer.syntaxError();
}
break :integerLiteral;
},
}
try lexer.step();
isFirst = false;
}
var isBigIntegerLiteral = lexer.code_point == 'n' and !hasDotOrExponent;
// Slow path: do we need to re-scan the input as text?
if (isBigIntegerLiteral or isInvalidLegacyOctalLiteral) {
var text = lexer.raw();
// Can't use a leading zero for bigint literals;
if (isBigIntegerLiteral and lexer.is_legacy_octal_literal) {
try lexer.syntaxError();
}
// Filter out underscores;
if (underscoreCount > 0) {
var bytes = lexer.allocator.alloc(u8, text.len - underscoreCount) catch unreachable;
var i: usize = 0;
for (text) |char| {
if (char != '_') {
bytes[i] = char;
i += 1;
}
}
}
// Store bigints as text to avoid precision loss;
if (isBigIntegerLiteral) {
lexer.identifier = text;
} else if (isInvalidLegacyOctalLiteral) {
if (std.fmt.parseFloat(f64, text)) |num| {
lexer.number = num;
} else |err| {
try lexer.addSyntaxError(lexer.start, "Invalid number {s}", .{text});
}
}
}
} else {
// Floating-point literal;
var isInvalidLegacyOctalLiteral = first == '0' and (lexer.code_point == '8' or lexer.code_point == '9');
// Initial digits;
while (true) {
if (lexer.code_point < '0' or lexer.code_point > '9') {
if (lexer.code_point != '_') {
break;
}
// Cannot have multiple underscores in a row;
if (lastUnderscoreEnd > 0 and lexer.end == lastUnderscoreEnd + 1) {
try lexer.syntaxError();
}
// The specification forbids underscores in this case;
if (isInvalidLegacyOctalLiteral) {
try lexer.syntaxError();
}
lastUnderscoreEnd = lexer.end;
underscoreCount += 1;
}
try lexer.step();
}
// Fractional digits;
if (first != '.' and lexer.code_point == '.') {
// An underscore must not come last;
if (lastUnderscoreEnd > 0 and lexer.end == lastUnderscoreEnd + 1) {
lexer.end -= 1;
try lexer.syntaxError();
}
hasDotOrExponent = true;
try lexer.step();
if (lexer.code_point == '_') {
try lexer.syntaxError();
}
while (true) {
if (lexer.code_point < '0' or lexer.code_point > '9') {
if (lexer.code_point != '_') {
break;
}
// Cannot have multiple underscores in a row;
if (lastUnderscoreEnd > 0 and lexer.end == lastUnderscoreEnd + 1) {
try lexer.syntaxError();
}
lastUnderscoreEnd = lexer.end;
underscoreCount += 1;
}
try lexer.step();
}
}
// Exponent;
if (lexer.code_point == 'e' or lexer.code_point == 'E') {
// An underscore must not come last;
if (lastUnderscoreEnd > 0 and lexer.end == lastUnderscoreEnd + 1) {
lexer.end -= 1;
try lexer.syntaxError();
}
hasDotOrExponent = true;
try lexer.step();
if (lexer.code_point == '+' or lexer.code_point == '-') {
try lexer.step();
}
if (lexer.code_point < '0' or lexer.code_point > '9') {
try lexer.syntaxError();
}
while (true) {
if (lexer.code_point < '0' or lexer.code_point > '9') {
if (lexer.code_point != '_') {
break;
}
// Cannot have multiple underscores in a row;
if (lastUnderscoreEnd > 0 and lexer.end == lastUnderscoreEnd + 1) {
try lexer.syntaxError();
}
lastUnderscoreEnd = lexer.end;
underscoreCount += 1;
}
try lexer.step();
}
}
// Take a slice of the text to parse;
var text = lexer.raw();
// Filter out underscores;
if (underscoreCount > 0) {
var i: usize = 0;
if (lexer.allocator.alloc(u8, text.len - underscoreCount)) |bytes| {
for (text) |char| {
if (char != '_') {
bytes[i] = char;
i += 1;
}
}
text = bytes;
} else |err| {
try lexer.addSyntaxError(lexer.start, "Out of Memory Wah Wah Wah", .{});
return;
}
}
if (lexer.code_point == 'n' and !hasDotOrExponent) {
// The only bigint literal that can start with 0 is "0n"
if (text.len > 1 and first == '0') {
try lexer.syntaxError();
}
// Store bigints as text to avoid precision loss;
lexer.identifier = text;
} else if (!hasDotOrExponent and lexer.end - lexer.start < 10) {
// Parse a 32-bit integer (very fast path);
var number: u32 = 0;
for (text) |c| {
number = number * 10 + @intCast(u32, c - '0');
}
lexer.number = @intToFloat(f64, number);
} else {
// Parse a double-precision floating-point number;
if (std.fmt.parseFloat(f64, text)) |num| {
lexer.number = num;
} else |err| {
try lexer.addSyntaxError(lexer.start, "Invalid number", .{});
}
}
}
// An underscore must not come last;
if (lastUnderscoreEnd > 0 and lexer.end == lastUnderscoreEnd + 1) {
lexer.end -= 1;
try lexer.syntaxError();
}
// Handle bigint literals after the underscore-at-end check above;
if (lexer.code_point == 'n' and !hasDotOrExponent) {
lexer.token = T.t_big_integer_literal;
try lexer.step();
}
// Identifiers can't occur immediately after numbers;
if (isIdentifierStart(lexer.code_point)) {
try lexer.syntaxError();
}
}
};
}
pub const Lexer = NewLexer(.{});
const JSIdentifier = @import("./js_lexer/identifier.zig");
pub const isIdentifierStart = JSIdentifier.Bitset.isIdentifierStart;
pub const isIdentifierContinue = JSIdentifier.Bitset.isIdentifierPart;
pub fn isWhitespace(codepoint: CodePoint) bool {
return switch (codepoint) {
0x000B, // line tabulation
0x0009, // character tabulation
0x000C, // form feed
0x0020, // space
0x00A0, // no-break space
// Unicode "Space_Separator" code points
0x1680, // ogham space mark
0x2000, // en quad
0x2001, // em quad
0x2002, // en space
0x2003, // em space
0x2004, // three-per-em space
0x2005, // four-per-em space
0x2006, // six-per-em space
0x2007, // figure space
0x2008, // punctuation space
0x2009, // thin space
0x200A, // hair space
0x202F, // narrow no-break space
0x205F, // medium mathematical space
0x3000, // ideographic space
0xFEFF, // zero width non-breaking space
=> true,
else => false,
};
}
pub fn isIdentifier(text: string) bool {
if (text.len == 0) {
return false;
}
var iter = strings.CodepointIterator{ .bytes = text, .i = 0 };
var cursor = strings.CodepointIterator.Cursor{};
if (!iter.next(&cursor)) return false;
if (!isIdentifierStart(cursor.c)) {
return false;
}
while (iter.next(&cursor)) {
if (!isIdentifierContinue(cursor.c)) {
return false;
}
}
return true;
}
pub const CodepointIterator = struct {
bytes: []const u8,
i: usize,
width: u3 = 0,
c: CodePoint = 0,
pub fn nextCodepointSlice(it: *CodepointIterator) []const u8 {
@setRuntimeSafety(false);
const cp_len = strings.utf8ByteSequenceLength(it.bytes[it.i]);
it.i += cp_len;
// without branching,
const slice = if (!(it.i > it.bytes.len)) it.bytes[it.i - cp_len .. it.i] else "";
it.width = @truncate(u3, slice.len);
return slice;
}
pub fn nextCodepoint(it: *CodepointIterator) ?CodePoint {
const slice = it.nextCodepointSlice();
it.c = switch (it.width) {
0 => it.c,
1 => @as(CodePoint, slice[0]),
2 => @as(CodePoint, unicode.utf8Decode2(slice) catch unreachable),
3 => @as(CodePoint, unicode.utf8Decode3(slice) catch unreachable),
4 => @as(CodePoint, unicode.utf8Decode4(slice) catch unreachable),
else => unreachable,
};
return if (slice.len > 0) it.c else null;
}
/// Look ahead at the next n codepoints without advancing the iterator.
/// If fewer than n codepoints are available, then return the remainder of the string.
pub fn peek(it: *CodepointIterator, n: usize) []const u8 {
const original_i = it.i;
defer it.i = original_i;
var end_ix = original_i;
var found: usize = 0;
while (found < n) : (found += 1) {
const next_codepoint = it.nextCodepointSlice() orelse return it.bytes[original_i..];
end_ix += next_codepoint.len;
}
return it.bytes[original_i..end_ix];
}
};
pub fn isIdentifierUTF16(text: []const u16) bool {
const n = text.len;
if (n == 0) {
return false;
}
var i: usize = 0;
while (i < n) : (i += 1) {
const is_start = i == 0;
var codepoint = @as(CodePoint, text[i]);
if (codepoint >= 0xD800 and codepoint <= 0xDBFF and i + 1 < n) {
const surrogate = @as(CodePoint, text[i + 1]);
if (surrogate >= 0xDC00 and surrogate <= 0xDFFF) {
codepoint = (codepoint << 10) + surrogate + (0x10000 - (0xD800 << 10) - 0xDC00);
i += 1;
}
}
if (is_start) {
if (!isIdentifierStart(@as(CodePoint, codepoint))) {
return false;
}
} else {
if (!isIdentifierContinue(@as(CodePoint, codepoint))) {
return false;
}
}
}
return true;
}
// TODO: implement this to actually work right
// this fn is a stub!
pub fn rangeOfIdentifier(source: *const Source, loc: logger.Loc) logger.Range {
const contents = source.contents;
if (loc.start == -1 or @intCast(usize, loc.start) >= contents.len) return logger.Range.None;
const iter = strings.CodepointIterator.init(contents[loc.toUsize()..]);
var cursor = strings.CodepointIterator.Cursor{};
var r = logger.Range{ .loc = loc, .len = 0 };
if (iter.bytes.len == 0) {
return r;
}
const text = iter.bytes;
const end = @intCast(u32, text.len);
if (!iter.next(&cursor)) return r;
// Handle private names
if (cursor.c == '#') {
if (!iter.next(&cursor)) {
r.len = 1;
return r;
}
}
if (isIdentifierStart(cursor.c) or cursor.c == '\\') {
defer r.len = @intCast(i32, cursor.i);
while (iter.next(&cursor)) {
if (cursor.c == '\\') {
// Search for the end of the identifier
// Skip over bracketed unicode escapes such as "\u{10000}"
if (cursor.i + 2 < end and text[cursor.i + 1] == 'u' and text[cursor.i + 2] == '{') {
cursor.i += 2;
while (cursor.i < end) {
if (text[cursor.i] == '}') {
cursor.i += 1;
break;
}
cursor.i += 1;
}
}
} else if (!isIdentifierContinue(cursor.c)) {
return r;
}
}
}
// const offset = @intCast(usize, loc.start);
// var i: usize = 0;
// for (text) |c| {
// if (isIdentifierStart(@as(CodePoint, c))) {
// for (source.contents[offset + i ..]) |c_| {
// if (!isIdentifierContinue(c_)) {
// r.len = std.math.lossyCast(i32, i);
// return r;
// }
// i += 1;
// }
// }
// i += 1;
// }
return r;
}
inline fn float64(num: anytype) f64 {
return @intToFloat(f64, num);
}
test "isIdentifier" {
const expect = std.testing.expect;
try expect(!isIdentifierStart(0x2029));
try expect(!isIdentifierStart(0));
try expect(!isIdentifierStart(1));
try expect(!isIdentifierStart(2));
try expect(!isIdentifierStart(3));
try expect(!isIdentifierStart(4));
try expect(!isIdentifierStart(5));
try expect(!isIdentifierStart(6));
try expect(!isIdentifierStart(7));
try expect(!isIdentifierStart(8));
try expect(!isIdentifierStart(9));
try expect(!isIdentifierStart(0x2028));
try expect(!isIdentifier("\\u2028"));
try expect(!isIdentifier("\\u2029"));
try expect(!isIdentifierContinue(':'));
try expect(!isIdentifier("javascript:"));
try expect(isIdentifier("javascript"));
try expect(!isIdentifier(":2"));
try expect(!isIdentifier("2:"));
try expect(isIdentifier("$"));
try expect(!isIdentifier("$:"));
}