Former-commit-id: 4d6a8f598a
This commit is contained in:
Jarred Sumner
2021-05-18 20:06:08 -07:00
parent 7396fae4e2
commit 2fac623977
7 changed files with 789 additions and 89 deletions

View File

@@ -22,6 +22,7 @@ pub const isWindows = std.Target.current.os.tag == .windows;
pub const enableTracing = true;
pub const isDebug = std.builtin.Mode.Debug == std.builtin.mode;
pub const isTest = std.builtin.is_test;
pub const Output = struct {
var source: *Source = undefined;

View File

@@ -1058,16 +1058,20 @@ pub const Stmt = struct {
}
pub fn empty() Stmt {
return Stmt.init(&Stmt.None, logger.Loc.Empty);
return Stmt.init(Stmt.None, logger.Loc.Empty);
}
var None = S.Empty{};
pub fn init(origData: anytype, loc: logger.Loc) Stmt {
if (@typeInfo(@TypeOf(origData)) != .Pointer) {
if (@typeInfo(@TypeOf(origData)) != .Pointer and @TypeOf(origData) != S.Empty) {
@compileError("Stmt.init needs a pointer.");
}
if (@TypeOf(origData) == S.Empty) {
return Stmt{ .loc = loc, .data = Data{ .s_empty = S.Empty{} } };
}
switch (@TypeOf(origData.*)) {
S.Block => {
return Stmt.comptime_init("s_block", S.Block, origData, loc);
@@ -1210,7 +1214,7 @@ pub const Stmt = struct {
return Stmt.comptime_alloc(allocator, "s_do_while", S.DoWhile, origData, loc);
},
S.Empty => {
return Stmt.comptime_alloc(allocator, "s_empty", S.Empty, origData, loc);
return Stmt{ .loc = loc, .data = Data{ .s_empty = S.Empty{} } };
},
S.Enum => {
return Stmt.comptime_alloc(allocator, "s_enum", S.Enum, origData, loc);
@@ -1336,7 +1340,7 @@ pub const Stmt = struct {
s_debugger: *S.Debugger,
s_directive: *S.Directive,
s_do_while: *S.DoWhile,
s_empty: *S.Empty,
s_empty: S.Empty,
s_enum: *S.Enum,
s_export_clause: *S.ExportClause,
s_export_default: *S.ExportDefault,
@@ -1382,7 +1386,12 @@ pub const Stmt = struct {
pub const Expr = struct {
loc: logger.Loc,
data: Data,
pub fn toEmpty(expr: *Expr) Expr {
return Expr{ .data = .{ .e_missing = E.Missing{} }, .loc = expr.loc };
}
pub fn isEmpty(expr: *Expr) bool {
return std.meta.activeTag(expr.data) == .e_missing;
}
pub const Query = struct { expr: Expr, loc: logger.Loc };
pub fn getProperty(expr: *const Expr, name: string) ?Query {
@@ -1829,9 +1838,7 @@ pub const Expr = struct {
return Expr{ .loc = loc, .data = Data{ .e_jsx_element = dat } };
},
E.Missing => {
var dat = allocator.create(E.Missing) catch unreachable;
dat.* = st;
return Expr{ .loc = loc, .data = Data{ .e_missing = dat } };
return Expr{ .loc = loc, .data = Data{ .e_missing = E.Missing{} } };
},
E.Number => {
var dat = allocator.create(E.Number) catch unreachable;
@@ -2460,7 +2467,7 @@ pub const Expr = struct {
e_import_identifier: *E.ImportIdentifier,
e_private_identifier: *E.PrivateIdentifier,
e_jsx_element: *E.JSXElement,
e_missing: *E.Missing,
e_missing: E.Missing,
e_number: *E.Number,
e_big_int: *E.BigInt,
e_object: *E.Object,

View File

@@ -191,25 +191,258 @@ pub const Lexer = struct {
}
}
fn parseStringLiteral(lexer: *LexerType) !void {
var quote: CodePoint = lexer.code_point;
var needs_slow_path = false;
var suffixLen: usize = 1;
pub fn decodeEscapeSequences(lexer: *LexerType, start: usize, text: string, buf: anytype) !void {
var iter = CodepointIterator{ .bytes = text[start..], .i = 0 };
const start_length = buf.items.len;
while (iter.nextCodepoint()) |c| {
const width = iter.width;
if (quote != '`') {
lexer.token = T.t_string_literal;
} else if (lexer.rescan_close_brace_as_template_token) {
lexer.token = T.t_template_tail;
} else {
lexer.token = T.t_no_substitution_template_literal;
switch (c) {
'\r' => {
// From the specification:
//
// 11.8.6.1 Static Semantics: TV and TRV
//
// TV excludes the code units of LineContinuation while TRV includes
// them. <CR><LF> and <CR> LineTerminatorSequences are normalized to
// <LF> for both TV and TRV. An explicit EscapeSequence is needed to
// include a <CR> or <CR><LF> sequence.
// Convert '\r\n' into '\n'
if (iter.i < text.len and text[iter.i] == '\n') {
iter.i += 1;
}
// Convert '\r' into '\n'
buf.append('\n') catch unreachable;
continue;
},
'\\' => {
const c2 = iter.nextCodepoint() orelse return;
const width2 = iter.width;
switch (c2) {
'b' => {
buf.append(std.mem.readIntNative(u16, "\\b")) catch unreachable;
continue;
},
'f' => {
buf.append(std.mem.readIntNative(u16, "\\f")) catch unreachable;
continue;
},
'n' => {
buf.append(std.mem.readIntNative(u16, "\\n")) catch unreachable;
continue;
},
'r' => {
buf.append(std.mem.readIntNative(u16, "\\r")) catch unreachable;
continue;
},
't' => {
buf.append(std.mem.readIntNative(u16, "\\t")) catch unreachable;
continue;
},
'v' => {
if (lexer.json_options != null) {
lexer.end = start + iter.i - width2;
try lexer.syntaxError();
}
buf.append(std.mem.readIntNative(u16, "\\v")) catch unreachable;
continue;
},
'0'...'7' => {
try lexer.addUnsupportedSyntaxError("Legacy octal literals are not supported.");
},
'8', '9' => {
try lexer.addUnsupportedSyntaxError("Legacy octal literals are not supported.");
},
'x' => {
if (lexer.json_options != null) {
lexer.end = start + iter.i - width2;
try lexer.syntaxError();
}
var value: CodePoint = 0;
var c3: CodePoint = 0;
var width3: u3 = 0;
comptime var j: usize = 0;
inline while (j < 2) : (j += 1) {
c3 = iter.nextCodepoint() orelse return lexer.syntaxError();
width3 = iter.width;
switch (c3) {
'0'...'9' => {
value = value * 16 | (c3 - '0');
},
'a'...'f' => {
value = value * 16 | (c3 + 10 - 'a');
},
'A'...'F' => {
value = value * 16 | (c3 + 10 - 'A');
},
else => {
lexer.end = start + iter.i - width3;
return lexer.syntaxError();
},
}
}
iter.c = value;
},
'u' => {
// We're going to make this an i64 so we don't risk integer overflows
// when people do weird things
var value: i64 = 0;
var c3 = iter.nextCodepoint() orelse return lexer.syntaxError();
var width3 = iter.width;
// variable-length
if (c3 == '{') {
if (lexer.json_options != null) {
lexer.end = start + iter.i - width2;
try lexer.syntaxError();
}
const hex_start = iter.i - width - width2 - width3;
var is_first = true;
var is_out_of_range = false;
variableLength: while (true) {
c3 = iter.nextCodepoint() orelse break :variableLength;
switch (c3) {
'0'...'9' => {
value = value * 16 | (c3 - '0');
},
'a'...'f' => {
value = value * 16 | (c3 + 10 - 'a');
},
'A'...'F' => {
value = value * 16 | (c3 + 10 - 'A');
},
'}' => {
if (is_first) {
lexer.end = start + iter.i - width3;
return lexer.syntaxError();
}
break :variableLength;
},
else => {
lexer.end = start + iter.i - width3;
return lexer.syntaxError();
},
}
// '\U0010FFFF
// copied from golang utf8.MaxRune
if (value > 1114111) {
is_out_of_range = true;
}
is_first = false;
}
if (is_out_of_range) {
try lexer.addRangeError(
.{ .loc = .{ .start = @intCast(i32, start + hex_start) }, .len = @intCast(i32, (iter.i - hex_start)) },
"Unicode escape sequence is out of range",
.{},
true,
);
return;
}
// fixed-length
} else {
// Fixed-length
comptime var j: usize = 0;
inline while (j < 4) : (j += 1) {
switch (c3) {
'0'...'9' => {
value = value * 16 | (c3 - '0');
},
'a'...'f' => {
value = value * 16 | (c3 + 10 - 'a');
},
'A'...'F' => {
value = value * 16 | (c3 + 10 - 'A');
},
else => {
lexer.end = start + iter.i - width3;
return lexer.syntaxError();
},
}
if (j < 3) {
c3 = iter.nextCodepoint() orelse return lexer.syntaxError();
width3 = iter.width;
}
}
}
iter.c = @truncate(CodePoint, value);
},
'\r' => {
if (lexer.json_options != null) {
lexer.end = start + iter.i - width2;
try lexer.syntaxError();
}
// Ignore line continuations. A line continuation is not an escaped newline.
if (iter.i < text.len and text[iter.i + 1] == '\n') {
// Make sure Windows CRLF counts as a single newline
iter.i += 1;
}
continue;
},
'\n', 0x2028, 0x2029 => {
if (lexer.json_options != null) {
lexer.end = start + iter.i - width2;
try lexer.syntaxError();
}
// Ignore line continuations. A line continuation is not an escaped newline.
continue;
},
else => {
if (lexer.json_options != null) {
switch (c2) {
'"', '\\', '/' => {},
else => {
lexer.end = start + iter.i - width2;
try lexer.syntaxError();
},
}
}
iter.c = c2;
},
}
},
else => {},
}
if (iter.c <= 0xFFFF) {
buf.append(@intCast(u16, c)) catch unreachable;
} else {
iter.c -= 0x10000;
buf.ensureUnusedCapacity(2) catch unreachable;
buf.appendAssumeCapacity(@intCast(u16, 0xD800 + ((iter.c >> 10) & 0x3FF)));
buf.appendAssumeCapacity(@intCast(u16, 0xDC00 + (iter.c & 0x3FF)));
}
}
try lexer.step();
}
pub const InnerStringLiteral = packed struct { suffix_len: u3, needs_slow_path: bool };
fn parseStringLiteralInnter(lexer: *LexerType, comptime quote: CodePoint) !InnerStringLiteral {
var needs_slow_path = false;
var suffix_len: u3 = 1;
stringLiteral: while (true) {
switch (lexer.code_point) {
'\\' => {
needs_slow_path = true;
try lexer.step();
// Skip slow path for \n in a string literal
// This is pretty common, shows up in e.g. React
// Example code: array.split("\n")
// We don't need to decode as UTF16 for that. We know it's just a newline char.
needs_slow_path = lexer.code_point != 'n';
// Handle Windows CRLF
if (lexer.code_point == '\r' and lexer.json_options != null) {
@@ -245,7 +478,7 @@ pub const Lexer = struct {
if (quote == '`') {
try lexer.step();
if (lexer.code_point == '{') {
suffixLen = 2;
suffix_len = 2;
try lexer.step();
if (lexer.rescan_close_brace_as_template_token) {
lexer.token = T.t_template_middle;
@@ -257,12 +490,15 @@ pub const Lexer = struct {
continue :stringLiteral;
}
},
// exit condition
quote => {
try lexer.step();
break;
},
else => {
if (quote == lexer.code_point) {
try lexer.step();
break :stringLiteral;
}
// Non-ASCII strings need the slow path
if (lexer.code_point >= 0x80) {
needs_slow_path = true;
@@ -274,19 +510,41 @@ pub const Lexer = struct {
try lexer.step();
}
return InnerStringLiteral{ .needs_slow_path = needs_slow_path, .suffix_len = suffix_len };
}
fn parseStringLiteral(lexer: *LexerType) !void {
var quote: CodePoint = lexer.code_point;
if (quote != '`') {
lexer.token = T.t_string_literal;
} else if (lexer.rescan_close_brace_as_template_token) {
lexer.token = T.t_template_tail;
} else {
lexer.token = T.t_no_substitution_template_literal;
}
try lexer.step();
var string_literal_details = switch (quote) {
'`' => try lexer.parseStringLiteralInnter('`'),
'\'' => try lexer.parseStringLiteralInnter('\''),
'"' => try lexer.parseStringLiteralInnter('"'),
else => unreachable,
};
// Reset string literal
lexer.string_literal_slice = lexer.source.contents[lexer.start + 1 .. lexer.end - suffixLen];
lexer.string_literal_is_ascii = !needs_slow_path;
lexer.string_literal_slice = lexer.source.contents[lexer.start + 1 .. lexer.end - string_literal_details.suffix_len];
lexer.string_literal_is_ascii = !string_literal_details.needs_slow_path;
lexer.string_literal_buffer.shrinkRetainingCapacity(0);
if (needs_slow_path) {
lexer.string_literal_buffer.ensureTotalCapacity(lexer.string_literal_slice.len) catch unreachable;
var slice = lexer.string_literal_buffer.allocatedSlice();
lexer.string_literal_buffer.items = slice[0..strings.toUTF16Buf(lexer.string_literal_slice, slice)];
if (string_literal_details.needs_slow_path) {
lexer.string_literal_buffer.ensureUnusedCapacity(lexer.string_literal_slice.len) catch unreachable;
try lexer.decodeEscapeSequences(0, lexer.string_literal_slice, &lexer.string_literal_buffer);
}
if (quote == '\'' and lexer.json_options != null) {
try lexer.addRangeError(lexer.range(), "JSON strings must use double quotes", .{}, true);
}
// for (text)
// // if (needs_slow_path) {
// // // Slow path
@@ -333,8 +591,131 @@ pub const Lexer = struct {
return Error.SyntaxError;
}
pub fn scanIdentifierWithEscapes(self: *LexerType) !void {
try self.addUnsupportedSyntaxError("escape sequence");
pub const IdentifierKind = enum { normal, private };
pub const ScanResult = struct { token: T, contents: string };
threadlocal var small_escape_sequence_buffer: [4096]u16 = undefined;
const FakeArrayList16 = struct {
items: []u16,
i: usize = 0,
pub fn append(fake: *FakeArrayList16, value: u16) !void {
std.debug.assert(fake.items.len < fake.i);
fake.items[fake.i] = value;
fake.i += 1;
}
pub fn appendAssumeCapacity(fake: *FakeArrayList16, value: u16) void {
std.debug.assert(fake.items.len < fake.i);
fake.items[fake.i] = value;
fake.i += 1;
}
pub fn ensureUnusedCapacity(fake: *FakeArrayList16, int: anytype) !void {
std.debug.assert(fake.items.len < fake.i + int);
}
};
threadlocal var large_escape_sequence_list: std.ArrayList(u16) = undefined;
threadlocal var large_escape_sequence_list_loaded: bool = false;
// This is an edge case that doesn't really exist in the wild, so it doesn't
// need to be as fast as possible.
pub fn scanIdentifierWithEscapes(lexer: *LexerType, comptime kind: IdentifierKind) !ScanResult {
var result = ScanResult{ .token = .t_end_of_file, .contents = "" };
// First pass: scan over the identifier to see how long it is
while (true) {
// Scan a unicode escape sequence. There is at least one because that's
// what caused us to get on this slow path in the first place.
if (lexer.code_point == '\\') {
try lexer.step();
if (lexer.code_point != 'u') {
try lexer.syntaxError();
}
try lexer.step();
if (lexer.code_point == '{') {
// Variable-length
try lexer.step();
while (lexer.code_point != '}') {
switch (lexer.code_point) {
'0'...'9', 'a'...'f', 'A'...'F' => {
try lexer.step();
},
else => {
try lexer.syntaxError();
},
}
}
try lexer.step();
} else {
// Fixed-length
comptime var j: usize = 0;
inline while (j < 4) : (j += 1) {
switch (lexer.code_point) {
'0'...'9', 'a'...'f', 'A'...'F' => {
try lexer.step();
},
else => {
try lexer.syntaxError();
},
}
}
}
continue;
}
if (!isIdentifierContinue(lexer.code_point)) {
break;
}
try lexer.step();
}
// Second pass: re-use our existing escape sequence parser
var original_text = lexer.raw();
if (original_text.len < 1024) {
var buf = FakeArrayList16{ .items = &small_escape_sequence_buffer, .i = 0 };
try lexer.decodeEscapeSequences(lexer.start, original_text, &buf);
result.contents = lexer.utf16ToString(buf.items[0..buf.i]);
} else {
if (!large_escape_sequence_list_loaded) {
large_escape_sequence_list = try std.ArrayList(u16).initCapacity(lexer.allocator, original_text.len);
large_escape_sequence_list_loaded = true;
}
large_escape_sequence_list.shrinkRetainingCapacity(0);
try lexer.decodeEscapeSequences(lexer.start, original_text, &large_escape_sequence_list);
result.contents = lexer.utf16ToString(large_escape_sequence_list.items);
}
var identifier = result.contents;
if (kind == .private) {
identifier = result.contents[1..];
}
if (!isIdentifier(identifier)) {
try lexer.addRangeError(
.{ .loc = logger.usize2Loc(lexer.start), .len = @intCast(i32, lexer.end - lexer.start) },
"Invalid identifier: \"{s}\"",
.{result.contents},
true,
);
}
result.contents = identifier;
// Escaped keywords are not allowed to work as actual keywords, but they are
// allowed wherever we allow identifiers or keywords. For example:
//
// // This is an error (equivalent to "var var;")
// var \u0076\u0061\u0072;
//
// // This is an error (equivalent to "var foo;" except for this rule)
// \u0076\u0061\u0072 foo;
//
// // This is an fine (equivalent to "foo.var;")
// foo.\u0076\u0061\u0072;
//
result.token = if (Keywords.has(result.contents)) .t_escaped_keyword else .t_identifier;
// const text = lexer.decodeEscapeSequences(lexer.start, lexer.raw(), )
return result;
}
pub fn debugInfo(self: *LexerType) void {
@@ -462,31 +843,46 @@ pub const Lexer = struct {
'#' => {
if (lexer.start == 0 and lexer.source.contents[1] == '!') {
try lexer.addUnsupportedSyntaxError("#!hashbang is not supported yet.");
return;
}
try lexer.step();
if (!isIdentifierStart(lexer.code_point)) {
try lexer.syntaxError();
}
try lexer.step();
if (isIdentifierStart(lexer.code_point)) {
try lexer.step();
while (isIdentifierContinue(lexer.code_point)) {
// "#!/usr/bin/env node"
lexer.token = .t_hashbang;
hashbang: while (true) {
try lexer.step();
switch (lexer.code_point) {
'\r', '\n', 0x2028, 0x2029 => {
break :hashbang;
},
-1 => {
break :hashbang;
},
else => {},
}
}
lexer.identifier = lexer.raw();
} else {
try lexer.step();
if (lexer.code_point == '\\') {
try lexer.scanIdentifierWithEscapes();
const scan_result = try lexer.scanIdentifierWithEscapes(.private);
lexer.identifier = scan_result.contents;
lexer.token = T.t_private_identifier;
// lexer.Identifier, lexer.Token = lexer.scanIdentifierWithEscapes(normalIdentifier);
} else {
lexer.token = T.t_private_identifier;
lexer.identifier = lexer.raw();
if (!isIdentifierStart(lexer.code_point)) {
try lexer.syntaxError();
}
try lexer.step();
while (isIdentifierContinue(lexer.code_point)) {
try lexer.step();
}
if (lexer.code_point == '\\') {
const scan_result = try lexer.scanIdentifierWithEscapes(.private);
lexer.identifier = scan_result.contents;
lexer.token = T.t_private_identifier;
} else {
lexer.token = T.t_private_identifier;
lexer.identifier = lexer.raw();
}
break;
}
break;
}
},
'\r', '\n', 0x2028, 0x2029 => {
@@ -966,7 +1362,9 @@ pub const Lexer = struct {
}
if (lexer.code_point == '\\') {
try lexer.scanIdentifierWithEscapes();
const scan_result = try lexer.scanIdentifierWithEscapes(.normal);
lexer.identifier = scan_result.contents;
lexer.token = scan_result.token;
} else {
const contents = lexer.raw();
lexer.identifier = contents;
@@ -975,8 +1373,9 @@ pub const Lexer = struct {
},
'\\' => {
// TODO: normal
try lexer.scanIdentifierWithEscapes();
const scan_result = try lexer.scanIdentifierWithEscapes(.normal);
lexer.identifier = scan_result.contents;
lexer.token = scan_result.token;
},
'.', '0'...'9' => {
@@ -996,8 +1395,9 @@ pub const Lexer = struct {
try lexer.step();
}
if (lexer.code_point == '\\') {
// lexer.Identifier, lexer.Token = lexer.scanIdentifierWithEscapes(normalIdentifier);
const scan_result = try lexer.scanIdentifierWithEscapes(.normal);
lexer.identifier = scan_result.contents;
lexer.token = scan_result.token;
} else {
lexer.token = T.t_identifier;
lexer.identifier = lexer.raw();
@@ -2143,26 +2543,114 @@ pub fn isIdentifierUTF16(text: JavascriptString) bool {
return true;
}
pub const CodepointIterator = struct {
bytes: []const u8,
i: usize,
width: u3 = 0,
c: CodePoint = 0,
pub fn nextCodepointSlice(it: *CodepointIterator) ?[]const u8 {
if (it.i >= it.bytes.len) {
return null;
}
const cp_len = std
.unicode.utf8ByteSequenceLength(it.bytes[it.i]) catch unreachable;
it.i += cp_len;
return it.bytes[it.i - cp_len .. it.i];
}
pub fn nextCodepoint(it: *CodepointIterator) ?CodePoint {
const slice = it.nextCodepointSlice() orelse return null;
it.width = @intCast(u3, slice.len);
it.c = switch (it.width) {
1 => @intCast(CodePoint, slice[0]),
2 => @intCast(CodePoint, std.unicode.utf8Decode2(slice) catch unreachable),
3 => @intCast(CodePoint, std.unicode.utf8Decode3(slice) catch unreachable),
4 => @intCast(CodePoint, std.unicode.utf8Decode4(slice) catch unreachable),
else => unreachable,
};
return it.c;
}
/// Look ahead at the next n codepoints without advancing the iterator.
/// If fewer than n codepoints are available, then return the remainder of the string.
pub fn peek(it: *CodepointIterator, n: usize) []const u8 {
const original_i = it.i;
defer it.i = original_i;
var end_ix = original_i;
var found: usize = 0;
while (found < n) : (found += 1) {
const next_codepoint = it.nextCodepointSlice() orelse return it.bytes[original_i..];
end_ix += next_codepoint.len;
}
return it.bytes[original_i..end_ix];
}
};
// TODO: implement this to actually work right
// this fn is a stub!
pub fn rangeOfIdentifier(source: *const Source, loc: logger.Loc) logger.Range {
const text = source.contents[loc.toUsize()..];
var r = logger.Range{ .loc = loc, .len = 0 };
const offset = @intCast(usize, loc.start);
var i: usize = 0;
for (source.contents[offset..]) |c| {
if (isIdentifierStart(@as(CodePoint, c))) {
for (source.contents[offset + i ..]) |c_| {
if (!isIdentifierContinue(c_)) {
r.len = std.math.lossyCast(i32, i);
return r;
if (text.len == 0) {
return r;
}
var iter = CodepointIterator{ .bytes = text, .i = 0 };
var c = @intCast(CodePoint, iter.nextCodepoint() orelse unreachable);
// Handle private names
if (c == '#') {
c = @intCast(CodePoint, iter.nextCodepoint() orelse {
r.len = 1;
return r;
});
}
if (isIdentifierStart(c) or c == '\\') {
defer r.len = @intCast(i32, iter.i);
while (iter.nextCodepoint()) |code_point| {
if (code_point == '\\') {
// Search for the end of the identifier
// Skip over bracketed unicode escapes such as "\u{10000}"
if (iter.i + 2 < text.len and text[iter.i + 1] == 'u' and text[iter.i + 2] == '{') {
iter.i += 2;
while (iter.i < text.len) {
if (text[iter.i] == '}') {
iter.i += 1;
break;
}
iter.i += 1;
}
}
i += 1;
} else if (!isIdentifierContinue(code_point)) {
return r;
}
}
i += 1;
}
// const offset = @intCast(usize, loc.start);
// var i: usize = 0;
// for (text) |c| {
// if (isIdentifierStart(@as(CodePoint, c))) {
// for (source.contents[offset + i ..]) |c_| {
// if (!isIdentifierContinue(c_)) {
// r.len = std.math.lossyCast(i32, i);
// return r;
// }
// i += 1;
// }
// }
// i += 1;
// }
return r;
}

View File

@@ -201,7 +201,7 @@ pub const StrictModeReservedWords = std.ComptimeStringMap(bool, .{
.{ "yield", true },
});
pub const CodePoint = i22;
pub const CodePoint = i32;
pub const PropertyModifierKeyword = enum {
p_abstract,

View File

@@ -520,6 +520,155 @@ pub const SideEffects = enum {
}
}
pub fn simpifyUnusedExpr(p: *P, expr: Expr) ?Expr {
switch (expr.data) {
.e_null, .e_undefined, .e_missing, .e_boolean, .e_number, .e_big_int, .e_string, .e_this, .e_reg_exp, .e_function, .e_arrow, .e_import_meta => {
return null;
},
.e_dot => |dot| {
if (dot.can_be_removed_if_unused) {
return null;
}
},
.e_identifier => |ident| {
if (ident.must_keep_due_to_with_stmt) {
return expr;
}
if (ident.can_be_removed_if_unused or p.symbols.items[ident.ref.inner_index].kind != .unbound) {
return null;
}
},
.e_if => |__if__| {
__if__.yes = simpifyUnusedExpr(p, __if__.yes) orelse __if__.yes.toEmpty();
__if__.no = simpifyUnusedExpr(p, __if__.no) orelse __if__.no.toEmpty();
// "foo() ? 1 : 2" => "foo()"
if (__if__.yes.isEmpty() and __if__.no.isEmpty()) {
return simpifyUnusedExpr(p, __if__.test_);
}
},
.e_call => |call| {
// A call that has been marked "__PURE__" can be removed if all arguments
// can be removed. The annotation causes us to ignore the target.
if (call.can_be_unwrapped_if_unused) {
return Expr.joinAllWithComma(call.args, p.allocator);
}
},
.e_binary => |bin| {
switch (bin.op) {
// We can simplify "==" and "!=" even though they can call "toString" and/or
// "valueOf" if we can statically determine that the types of both sides are
// primitives. In that case there won't be any chance for user-defined
// "toString" and/or "valueOf" to be called.
.bin_loose_eq, .bin_loose_ne => {
if (isPrimitiveWithSideEffects(bin.left.data) and isPrimitiveWithSideEffects(bin.right.data)) {
return Expr.joinWithComma(simpifyUnusedExpr(p, bin.left) orelse bin.left.toEmpty(), simpifyUnusedExpr(p, bin.right) orelse bin.right.toEmpty(), p.allocator);
}
},
else => {},
}
},
.e_new => |call| {
// A constructor call that has been marked "__PURE__" can be removed if all arguments
// can be removed. The annotation causes us to ignore the target.
if (call.can_be_unwrapped_if_unused) {
return Expr.joinAllWithComma(call.args, p.allocator);
}
},
else => {},
}
return expr;
}
// If this is in a dead branch, then we want to trim as much dead code as we
// can. Everything can be trimmed except for hoisted declarations ("var" and
// "function"), which affect the parent scope. For example:
//
// function foo() {
// if (false) { var x; }
// x = 1;
// }
//
// We can't trim the entire branch as dead or calling foo() will incorrectly
// assign to a global variable instead.
// The main goal here is to trim conditionals
pub fn shouldKeepStmtInDeadControlFlow(stmt: Stmt) bool {
switch (stmt.data) {
.s_empty, .s_expr, .s_throw, .s_return, .s_break, .s_continue, .s_class, .s_debugger => {
// Omit these statements entirely
return false;
},
.s_local => |local| {
return local.kind != .k_var;
// if (local.kind != .k_var) {
// // Omit these statements entirely
// return false;
// }
},
.s_block => |block| {
for (block.stmts) |child| {
if (shouldKeepStmtInDeadControlFlow(child)) {
return true;
}
}
return false;
},
.s_if => |_if_| {
if (shouldKeepStmtInDeadControlFlow(_if_.yes)) {
return true;
}
const no = _if_.no orelse return false;
return shouldKeepStmtInDeadControlFlow(no);
},
.s_while => |__while__| {
return shouldKeepStmtInDeadControlFlow(__while__.body);
},
.s_do_while => |__while__| {
return shouldKeepStmtInDeadControlFlow(__while__.body);
},
.s_for => |__for__| {
if (__for__.init) |init_| {
if (shouldKeepStmtInDeadControlFlow(init_)) {
return true;
}
}
return shouldKeepStmtInDeadControlFlow(__for__.body);
},
.s_for_in => |__for__| {
return shouldKeepStmtInDeadControlFlow(__for__.init) or shouldKeepStmtInDeadControlFlow(__for__.body);
},
.s_for_of => |__for__| {
return shouldKeepStmtInDeadControlFlow(__for__.init) or shouldKeepStmtInDeadControlFlow(__for__.body);
},
.s_label => |label| {
return shouldKeepStmtInDeadControlFlow(label.stmt);
},
else => {
return true;
},
}
}
pub const Equality = struct { equal: bool = false, ok: bool = false };
// Returns "equal, ok". If "ok" is false, then nothing is known about the two
@@ -642,9 +791,10 @@ pub const SideEffects = enum {
.bin_comma => {
return isPrimitiveWithSideEffects(e.right.data);
},
else => {},
}
},
.e_if => {
.e_if => |e| {
return isPrimitiveWithSideEffects(e.yes.data) and isPrimitiveWithSideEffects(e.no.data);
},
else => {},
@@ -1283,6 +1433,14 @@ pub const Parser = struct {
var result: js_ast.Result = undefined;
if (self.p) |p| {
// Consume a leading hashbang comment
var hashbang: string = "";
if (p.lexer.token == .t_hashbang) {
hashbang = p.lexer.identifier;
try p.lexer.next();
}
// Parse the file in the first pass, but do not bind symbols
var opts = ParseStatementOptions{ .is_module_scope = true };
debugl("<p.parseStmtsUpTo>");
@@ -1499,8 +1657,8 @@ const ParseStatementOptions = struct {
var e_missing_data = E.Missing{};
var s_missing = S.Empty{};
var nullExprData = Expr.Data{ .e_missing = &e_missing_data };
var nullStmtData = Stmt.Data{ .s_empty = &s_missing };
var nullExprData = Expr.Data{ .e_missing = e_missing_data };
var nullStmtData = Stmt.Data{ .s_empty = s_missing };
pub const Prefill = struct {
pub const StringLiteral = struct {
pub var Key = [3]u16{ 'k', 'e', 'y' };
@@ -1523,10 +1681,10 @@ pub const Prefill = struct {
pub var BMissing = B{ .b_missing = &BMissing_ };
pub var BMissing_ = B.Missing{};
pub var EMissing = Expr.Data{ .e_missing = &EMissing_ };
pub var EMissing = Expr.Data{ .e_missing = EMissing_ };
pub var EMissing_ = E.Missing{};
pub var SEmpty = Stmt.Data{ .s_empty = &SEmpty_ };
pub var SEmpty = Stmt.Data{ .s_empty = SEmpty_ };
pub var SEmpty_ = S.Empty{};
pub var Filename = Expr.Data{ .e_string = &Prefill.String.Filename };
@@ -4032,7 +4190,7 @@ pub const P = struct {
const name = p.lexer.identifier;
var emiss = E.Missing{};
// Parse either an async function, an async expression, or a normal expression
var expr: Expr = Expr{ .loc = loc, .data = Expr.Data{ .e_missing = &emiss } };
var expr: Expr = Expr{ .loc = loc, .data = Expr.Data{ .e_missing = emiss } };
if (is_identifier and strings.eqlComptime(p.lexer.raw(), "async")) {
var async_range = p.lexer.range();
try p.lexer.next();
@@ -4589,7 +4747,7 @@ pub const P = struct {
const name = p.lexer.identifier;
const loc = p.lexer.loc();
const e_str = p.lexer.toEString();
const e_str = E.String{ .utf8 = name };
if (!p.lexer.isIdentifierOrKeyword()) {
try p.lexer.expect(.t_identifier);
@@ -7262,7 +7420,7 @@ pub const P = struct {
}
return p.e(E.Array{
.items = items.toOwnedSlice(),
.comma_after_spread = comma_after_spread,
.comma_after_spread = comma_after_spread.toNullable(),
.is_single_line = is_single_line,
}, loc);
},
@@ -7325,7 +7483,7 @@ pub const P = struct {
}
return p.e(E.Object{
.properties = properties.toOwnedSlice(),
.comma_after_spread = comma_after_spread,
.comma_after_spread = comma_after_spread.toNullable(),
.is_single_line = is_single_line,
}, loc);
},
@@ -9707,11 +9865,8 @@ pub const P = struct {
.s_expr => |data| {
p.stmt_expr_value = data.value.data;
data.value = p.visitExpr(data.value);
// TODO:
// if (p.options.mangle_syntax) {
// }
// simplify unused
data.value = SideEffects.simpifyUnusedExpr(p, data.value) orelse data.value.toEmpty();
},
.s_throw => |data| {
data.value = p.visitExpr(data.value);
@@ -10622,9 +10777,10 @@ pub const P = struct {
// Save the current control-flow liveness. This represents if we are
// currently inside an "if (false) { ... }" block.
var old_is_control_flow_dead = p.is_control_flow_dead;
defer p.is_control_flow_dead = old_is_control_flow_dead;
// visit all statements first
var visited = List(Stmt).init(p.allocator);
var visited = try List(Stmt).initCapacity(p.allocator, stmts.items.len);
var before = List(Stmt).init(p.allocator);
var after = List(Stmt).init(p.allocator);
defer before.deinit();
@@ -10657,8 +10813,21 @@ pub const P = struct {
try p.visitAndAppendStmt(list, stmt);
}
p.is_control_flow_dead = old_is_control_flow_dead;
try stmts.resize(visited.items.len + before.items.len + after.items.len);
var visited_count = visited.items.len;
if (p.is_control_flow_dead) {
var end: usize = 0;
for (visited.items) |item, i| {
if (!SideEffects.shouldKeepStmtInDeadControlFlow(item)) {
continue;
}
visited.items[end] = item;
end += 1;
}
visited_count = end;
}
try stmts.resize(visited_count + before.items.len + after.items.len);
var i: usize = 0;
for (before.items) |item| {
@@ -10666,7 +10835,8 @@ pub const P = struct {
i += 1;
}
for (visited.items) |item| {
const visited_slice = visited.items[0..visited_count];
for (visited_slice) |item| {
stmts.items[i] = item;
i += 1;
}

View File

@@ -199,24 +199,54 @@ pub fn NewPrinter(comptime ascii_only: bool) type {
p.js.appendChar(str) catch unreachable;
},
string => {
if (isDebug or isTest) {
if (str[0] == 0 or (str[0] == '\\' and str[1] == '0')) {
Global.panic("Attempted to print null char", .{});
}
}
p.js.append(str) catch unreachable;
},
u8 => {
if (isDebug or isTest) {
if (str == 0) {
Global.panic("Attempted to print null char", .{});
}
}
p.js.appendChar(str) catch unreachable;
},
u16 => {
if (isDebug or isTest) {
if (str == 0) {
Global.panic("Attempted to print null char", .{});
}
}
p.js.appendChar(@intCast(u8, str)) catch unreachable;
},
u21 => {
if (isDebug or isTest) {
if (str == 0) {
Global.panic("Attempted to print null char", .{});
}
}
p.js.appendChar(@intCast(u8, str)) catch unreachable;
},
else => {
if (isDebug or isTest) {
if (str[0] == 0 or (str[0] == '\\' and str[1] == '0')) {
Global.panic("Attempted to print null char", .{});
}
}
p.js.append(@as(string, str)) catch unreachable;
},
}
}
pub fn unsafePrint(p: *Printer, str: string) void {
if (isDebug or isTest) {
if (str[0] == 0 or (str[0] == '\\' and str[1] == '0')) {
Global.panic("Attempted to print null char", .{});
}
}
p.js.appendAssumeCapacity(str);
}

View File

@@ -30,8 +30,12 @@ pub const Kind = enum {
pub const Loc = packed struct {
start: i32 = -1,
pub fn toNullable(loc: *Loc) ?Loc {
return if (loc.start == -1) null else loc.*;
}
// TODO: remove this stupidity
pub fn toUsize(self: *Loc) usize {
pub fn toUsize(self: *const Loc) usize {
return @intCast(usize, self.start);
}