This commit is contained in:
Jarred Sumner
2021-04-20 19:09:17 -07:00
parent 4ff1850768
commit 49f4011a66
3 changed files with 342 additions and 73 deletions

View File

@@ -2,7 +2,3 @@ pub const JavascriptString = []u16;
pub const NodeIndex = u32;
pub const NodeIndexNone = 4294967293;
pub const BindingNodeIndex = NodeIndex;
pub const StmtNodeIndex = NodeIndex;
pub const ExprNodeIndex = NodeIndex;

View File

@@ -6,6 +6,10 @@ usingnamespace @import("ast/base.zig");
const ImportRecord = @import("import_record.zig").ImportRecord;
pub const BindingNodeIndex = NodeIndex;
pub const StmtNodeIndex = NodeIndex;
pub const ExprNodeIndex = NodeIndex;
// TODO: figure out if we actually need this
// -- original comment --
// Files are parsed in parallel for speed. We want to allow each parser to
@@ -38,27 +42,6 @@ pub const ImportItemStatus = enum(u8) {
pub const LocRef = struct { loc: logger.Loc, ref: ?Ref };
pub const FnBody = struct {
loc: logger.Loc,
stmts: []StmtNodeIndex,
};
pub const Fn = struct {
name: ?LocRef,
open_parens_loc: logger.Loc,
args: []Arg,
body: FnBody,
arguments_ref: ?Ref,
is_async: bool = false,
is_generator: bool = false,
has_rest_arg: bool = false,
has_if_scope: bool = false,
// This is true if the function is a method
is_unique_formal_parameters: bool = false,
};
pub const Binding = struct {
data: B,
};
@@ -106,15 +89,6 @@ pub const B = union(enum) {
pub const Missing = struct {};
};
pub const Arg = struct {
ts_decorators: ?[]Expr = null,
binding: Binding,
default: ?Expr = null,
// "constructor(public x: boolean) {}"
is_typescript_ctor_field: bool = false,
};
pub const ClauseItem = struct {
alias: string,
alias_loc: logger.Loc,
@@ -159,7 +133,7 @@ pub const G = struct {
key: ExprNodeIndex,
// This is omitted for class fields
value: ?Expr,
value: ?ExprNodeIndex = null,
// This is used when parsing a pattern that uses default values:
//
@@ -177,6 +151,36 @@ pub const G = struct {
is_static: bool = false,
was_shorthand: bool = false,
};
pub const FnBody = struct {
loc: logger.Loc,
stmts: []StmtNodeIndex,
};
pub const Fn = struct {
name: ?LocRef,
open_parens_loc: logger.Loc,
args: ?[]Arg = null,
body: ?FnBody = null,
arguments_ref: ?Ref,
is_async: bool = false,
is_generator: bool = false,
has_rest_arg: bool = false,
has_if_scope: bool = false,
// This is true if the function is a method
is_unique_formal_parameters: bool = false,
};
pub const Arg = struct {
ts_decorators: ?[]ExprNodeIndex = null,
binding: BindingNodeIndex,
default: ?ExprNodeIndex = null,
// "constructor(public x: boolean) {}"
is_typescript_ctor_field: bool = false,
};
};
pub const Symbol = struct {
@@ -533,8 +537,8 @@ pub const E = struct {
};
pub const Arrow = struct {
args: []Arg,
body: FnBody,
args: []G.Arg,
body: G.FnBody,
is_async: bool = false,
has_rest_arg: bool = false,
@@ -1023,7 +1027,7 @@ pub const S = struct {
};
pub const Function = struct {
func: Fn,
func: G.Fn,
is_export: bool,
};
@@ -1395,6 +1399,81 @@ pub const Dependency = struct {
part_index: u32 = 0,
};
pub const ExprList = std.ArrayList(Expr);
pub const StmtList = std.ArrayList(Stmt);
pub const BindingList = std.ArrayList(Binding);
pub const AstData = struct {
expr_list: ExprList,
stmt_list: StmtList,
binding_list: BindingList,
pub fn init(allocator: *std.mem.Allocator) AstData {
return AstData{
.expr_list = ExprList.init(allocator),
.stmt_list = StmtList.init(allocator),
.binding_list = BindingList.init(allocator),
};
}
pub fn deinit(self: *AstData) void {
self.expr_list.deinit();
self.stmt_list.deinit();
self.binding_list.deinit();
}
pub fn expr(self: *AstData, index: ExprNodeIndex) Expr {
return self.expr_list.items[index];
}
pub fn stmt(self: *AstData, index: StmtNodeIndex) Stmt {
return self.stmt_list.items[index];
}
pub fn binding(self: *AstData, index: BindingNodeIndex) Binding {
return self.binding_list.items[index];
}
pub fn add_(self: *AstData, t: anytype) !void {
return switch (@TypeOf(t)) {
Stmt => {
try self.stmt_list.append(t);
},
Expr => {
try self.expr_list.append(t);
},
Binding => {
try self.binding_list.append(t);
},
else => {
@compileError("Invalid type passed to AstData.add. Expected Stmt, Expr, or Binding.");
},
};
}
pub fn add(self: *AstData, t: anytype) !NodeIndex {
return switch (@TypeOf(t)) {
Stmt => {
var len = self.stmt_list.items.len;
try self.stmt_list.append(t);
return @intCast(StmtNodeIndex, len);
},
Expr => {
var len = self.expr_list.items.len;
try self.expr_list.append(t);
return @intCast(ExprNodeIndex, len);
},
Binding => {
var len = self.binding_list.items.len;
try self.binding_list.append(t);
return @intCast(BindingNodeIndex, len);
},
else => {
@compileError("Invalid type passed to AstData.add. Expected Stmt, Expr, or Binding.");
},
};
}
};
// Each file is made up of multiple parts, and each part consists of one or
// more top-level statements. Parts are used for tree shaking and code
// splitting analysis. Individual parts of a file can be discarded by tree
@@ -1403,6 +1482,7 @@ pub const Dependency = struct {
pub const Part = struct {
stmts: []Stmt,
expr: []Expr,
bindings: []Binding,
scopes: []*Scope,
// Each is an index into the file-level import record list
@@ -1502,7 +1582,7 @@ pub const StrictModeKind = enum {
pub const Scope = struct {
kind: Kind = Kind.block,
parent: ?*Scope,
children: []*Scope,
children: std.ArrayList(*Scope),
members: std.StringHashMap(Member),
generated: ?[]Ref = null,
@@ -1537,7 +1617,7 @@ pub const Scope = struct {
pub fn recursiveSetStrictMode(s: *Scope, kind: StrictModeKind) void {
if (s.strict_mode == .sloppy_mode) {
s.strict_mode = kind;
for (s.children) |child| {
for (s.children.items) |child| {
child.recursiveSetStrictMode(kind);
}
}
@@ -1557,3 +1637,4 @@ pub const Scope = struct {
// test "ast" {
// const ast = Ast{};
// }

View File

@@ -6,10 +6,16 @@ const js_ast = @import("js_ast.zig");
const options = @import("options.zig");
const alloc = @import("alloc.zig");
usingnamespace @import("strings.zig");
usingnamespace @import("ast/base.zig");
usingnamespace js_ast.G;
const BindingNodeIndex = js_ast.BindingNodeIndex;
const StmtNodeIndex = js_ast.StmtNodeIndex;
const ExprNodeIndex = js_ast.ExprNodeIndex;
const S = js_ast.S;
const B = js_ast.B;
const G = js_ast.G;
const T = js_lexer.T;
const E = js_ast.E;
const Stmt = js_ast.Stmt;
@@ -56,14 +62,14 @@ const ScopeOrder = struct {
// restored on the call stack around code that parses nested functions and
// arrow expressions.
const FnOrArrowDataParse = struct {
async_range: logger.Range,
arrow_arg_errors: void,
async_range: ?logger.Range = null,
allow_await: bool = false,
allow_yield: bool = false,
allow_super_call: bool = false,
is_top_level: bool = false,
is_constructor: bool = false,
is_type_script_declare: bool = false,
is_typescript_declare: bool = false,
arrow_arg_errors: ?DeferredArrowArgErrors = null,
// In TypeScript, forward declarations of functions have no bodies
allow_missing_body_for_type_script: bool = false,
@@ -246,7 +252,7 @@ const P = struct {
allocated_names: List(string),
latest_arrow_arg_loc: logger.Loc = logger.Loc.Empty,
forbid_suffix_after_as_loc: logger.Loc = logger.Loc.Empty,
current_scope: *js_ast.Scope,
current_scope: ?*js_ast.Scope = null,
scopes_for_current_part: List(*js_ast.Scope),
symbols: List(js_ast.Symbol),
ts_use_counts: List(u32),
@@ -256,6 +262,8 @@ const P = struct {
import_meta_ref: js_ast.Ref = js_ast.Ref.None,
promise_ref: ?js_ast.Ref = null,
data: js_ast.AstData,
injected_define_symbols: []js_ast.Ref,
symbol_uses: SymbolUseMap,
declared_symbols: List(js_ast.DeclaredSymbol),
@@ -491,6 +499,18 @@ const P = struct {
return null;
}
pub fn logArrowArgErrors(errors: *DeferredArrowArgErrors) void {
if (errors.invalid_expr_await.len > 0) {
var r = errors.invalid_expr_await;
p.log.AddRangeError(&p.source, r, "Cannot use an \"await\" expression here");
}
if (errors.invalid_expr_yield.len > 0) {
var r = errors.invalid_expr_yield;
p.log.AddRangeError(&p.source, r, "Cannot use a \"yield\" expression here");
}
}
pub fn keyNameForError(p: *P, key: js_ast.Expr) string {
switch (key.data) {
js_ast.E.String => {
@@ -509,7 +529,7 @@ const P = struct {
pub fn prepareForVisitPass(p: *P) !void {
try p.pushScopeForVisitPass(js_ast.Scope.Kind.entry, locModuleScope);
p.fn_or_arrow_data_visit.is_outside_fn_or_arrow = true;
p.module_scope = p.current_scope;
p.module_scope = p.current_scope orelse unreachable;
p.has_es_module_syntax = p.es6_import_keyword.len > 0 or p.es6_export_keyword.len > 0 or p.top_level_await_keyword.len > 0;
// ECMAScript modules are always interpreted as strict mode. This has to be
@@ -562,18 +582,18 @@ const P = struct {
try p.scopes_for_current_part.append(order.scope);
}
pub fn pushScopeForParsePass(p: *P, kind: js_ast.Scope.Kind, loc: logger.Loc) !int {
var parent = p.current_scope;
var scope = js_ast.Scope.initPtr(p.allocator);
pub fn pushScopeForParsePass(p: *P, kind: js_ast.Scope.Kind, loc: logger.Loc) !usize {
var parent = p.current_scope orelse unreachable;
var scope = try js_ast.Scope.initPtr(p.allocator);
scope.kind = kind;
scope.parent = parent;
scope.label_ref = null;
if (parent) |_parent| {
try _parent.children.append(scope);
scope.strict_mode = _parent.strict_mode;
}
var i = parent.children.items.len;
try parent.children.append(scope);
scope.strict_mode = parent.strict_mode;
p.current_scope = scope;
// Enforce that scope locations are strictly increasing to help catch bugs
@@ -581,16 +601,16 @@ const P = struct {
if (p.scopes_in_order.items.len > 0) {
const prev_start = p.scopes_in_order.items[p.scopes_in_order.items.len - 1].loc.start;
if (prev_start >= loc.start) {
std.debug.panic("Scope location {i} must be greater than {i}", .{ loc.start, prev_start });
std.debug.panic("Scope location {d} must be greater than {d}", .{ loc.start, prev_start });
}
}
// Copy down function arguments into the function body scope. That way we get
// errors if a statement in the function body tries to re-declare any of the
// arguments.
if (kind == js_ast.ScopeFunctionBody) {
if (scope.parent.kind != js_ast.ScopeFunctionArgs) {
std.debug.panic("Internal error");
if (kind == js_ast.Scope.Kind.function_body) {
if (parent.kind != js_ast.Scope.Kind.function_args) {
std.debug.panic("Internal error", .{});
}
// for name, member := range scope.parent.members {
@@ -602,13 +622,15 @@ const P = struct {
// }
// }
}
return i;
}
pub fn forbidLexicalDecl(p: *P, loc: logger.Loc) !void {
try p.log.addRangeError(p.source, p.lexer.range(), "Cannot use a declaration in a single-statement context");
}
pub fn parseFnStmt(p: *P, loc: logger.Loc, opts: *ParseStatementOptions, asyncRange: ?logger.Range) !js_ast.Stmt {
pub fn parseFnStmt(p: *P, loc: logger.Loc, opts: *ParseStatementOptions, asyncRange: ?logger.Range) !NodeIndex {
const isGenerator = p.lexer.token == T.t_asterisk;
const isAsync = asyncRange != null;
@@ -623,6 +645,8 @@ const P = struct {
.forbid => {
try p.forbidLexicalDecl(loc);
},
// Allow certain function statements in certain single-statement contexts
.allow_fn_inside_if, .allow_fn_inside_label => {
if (opts.is_typescript_declare or isGenerator or isAsync) {
try p.forbidLexicalDecl(loc);
@@ -630,16 +654,156 @@ const P = struct {
},
else => {},
}
var name: ?js_ast.LocRef = null;
var nameText: string = undefined;
// The name is optional for "export default function() {}" pseudo-statements
if (!opts.is_name_optional or p.lexer.token == T.t_identifier) {
var nameLoc = p.lexer.loc();
nameText = p.lexer.identifier;
p.lexer.expect(T.t_identifier);
name = js_ast.LocRef{
.loc = nameLoc,
.ref = null,
};
}
// Even anonymous functions can have TypeScript type parameters
if (p.options.ts) {
p.skipTypescriptTypeParameters();
}
// Introduce a fake block scope for function declarations inside if statements
var ifStmtScopeIndex: usize = 0;
var hasIfScope = opts.lexical_decl == .allow_fn_inside_if;
if (hasIfScope) {
ifStmtScopeIndex = try p.pushScopeForParsePass(js_ast.Scope.Kind.block, loc);
}
var scopeIndex = try p.pushScopeForParsePass(js_ast.Scope.Kind.function_args, p.lexer.loc());
var func = p.parseFn(name, FnOrArrowDataParse{
.async_range = asyncRange,
.allow_await = isAsync,
.allow_yield = isGenerator,
.is_typescript_declare = opts.is_typescript_declare,
// Only allow omitting the body if we're parsing TypeScript
.allow_missing_body_for_type_script = p.options.ts,
});
// Don't output anything if it's just a forward declaration of a function
if (opts.is_typescript_declare or func.body == null) {
p.popAndDiscardScope(scopeIndex);
}
return 0;
}
pub fn parseStmt(p: *P, opts: *ParseStatementOptions) !js_ast.Stmt {
pub fn popAndDiscardScope(p: *P, scope_index: usize) void {
// Move up to the parent scope
var to_discard = p.current_scope orelse unreachable;
var parent = to_discard.parent orelse unreachable;
p.current_scope = parent;
// Truncate the scope order where we started to pretend we never saw this scope
p.scopes_in_order.shrinkRetainingCapacity(scope_index);
var children = parent.children;
// Remove the last child from the parent scope
var last = children.items.len - 1;
if (children.items[last] != to_discard) {
std.debug.panic("Internal error", .{});
}
_ = children.popOrNull();
}
pub fn parseFn(p: *P, name: ?js_ast.LocRef, opts: FnOrArrowDataParse) G.Fn {
// if data.allowAwait && data.allowYield {
// p.markSyntaxFeature(compat.AsyncGenerator, data.asyncRange)
// }
var func = G.Fn{
.name = name,
.has_rest_arg = false,
.is_async = opts.allow_await,
.is_generator = opts.allow_yield,
.arguments_ref = null,
.open_parens_loc = p.lexer.loc(),
};
p.lexer.expect(T.t_open_paren);
// Await and yield are not allowed in function arguments
var old_fn_or_arrow_data = opts;
p.fn_or_arrow_data_parse.allow_await = false;
p.fn_or_arrow_data_parse.allow_yield = false;
// If "super()" is allowed in the body, it's allowed in the arguments
p.fn_or_arrow_data_parse.allow_super_call = opts.allow_super_call;
while (p.lexer.token != T.t_close_paren) {
// Skip over "this" type annotations
if (p.options.ts and p.lexer.token == T.t_this) {
p.lexer.next();
if (p.lexer.token == T.t_colon) {
p.lexer.next();
p.skipTypescriptType(js_ast.Op.Level.lowest);
}
if (p.lexer.token != T.t_comma) {
break;
}
p.lexer.next();
continue;
}
}
var ts_decorators: []ExprNodeIndex = undefined;
if (opts.allow_ts_decorators) {
ts_decorators = p.parseTypeScriptDecorators();
}
if (!func.has_rest_arg and p.lexer.token == T.t_dot_dot_dot) {
// p.markSyntaxFeature
p.lexer.next();
func.has_rest_arg = true;
}
var is_typescript_ctor_field = false;
var is_identifier = p.lexer.token == T.t_identifier;
// var arg = p.parseBinding();
return func;
}
// pub fn parseBinding(p: *P)
// TODO:
pub fn parseTypeScriptDecorators(p: *P) []ExprNodeIndex {
notimpl();
return undefined;
}
// TODO:
pub fn skipTypescriptType(p: *P, level: js_ast.Op.Level) void {
notimpl();
return undefined;
}
// TODO:
pub fn skipTypescriptTypeParameters(p: *P) void {
notimpl();
return undefined;
}
pub fn parseStmt(p: *P, opts: *ParseStatementOptions) !NodeIndex {
var loc = p.lexer.loc();
var stmt: js_ast.Stmt = undefined;
switch (p.lexer.token) {
js_lexer.T.t_semicolon => {
p.lexer.next();
return js_ast.Stmt.init(js_ast.S.Empty{}, loc);
return p.data.add(js_ast.Stmt.init(js_ast.S.Empty{}, loc));
},
js_lexer.T.t_export => {
@@ -700,7 +864,7 @@ const P = struct {
p.lexer.expect(T.t_identifier);
p.lexer.expectOrInsertSemicolon();
return Stmt.init(S.TypeScript{}, loc);
return p.data.add(Stmt.init(S.TypeScript{}, loc));
}
if (p.lexer.isContextualKeyword("async")) {
@@ -712,29 +876,29 @@ const P = struct {
p.lexer.expect(T.t_function);
opts.is_export = true;
return try p.parseFnStmt(loc, opts, asyncRange);
return p.parseFnStmt(loc, opts, asyncRange);
}
return stmt;
},
else => {
notimpl();
return @intCast(NodeIndex, 0);
},
}
},
else => {
notimpl();
return @intCast(NodeIndex, 0);
},
}
return stmt;
return @intCast(NodeIndex, 0);
}
pub fn parseStmtsUpTo(p: *P, eend: js_lexer.T, opts: *ParseStatementOptions) ![]js_ast.Stmt {
var stmts = List(js_ast.Stmt).init(p.allocator);
try stmts.ensureCapacity(1);
pub fn parseStmtsUpTo(p: *P, eend: js_lexer.T, opts: *ParseStatementOptions) !void {
var data = p.data;
try data.stmt_list.ensureCapacity(1);
var returnWithoutSemicolonStart: i32 = -1;
opts.lexical_decl = .allow_all;
@@ -743,7 +907,7 @@ const P = struct {
run: while (true) {
if (p.lexer.comments_to_preserve_before) |comments| {
for (comments) |comment| {
try stmts.append(Stmt.init(S.Comment{
try data.add_(Stmt.init(S.Comment{
.text = comment.text,
}, p.lexer.loc()));
}
@@ -753,10 +917,10 @@ const P = struct {
break :run;
}
var stmt = p.parseStmt(opts);
}
const node_index = p.parseStmt(opts) catch break :run;
return stmts.toOwnedSlice();
var stmt = p.data.stmt(node_index);
}
}
pub fn init(allocator: *std.mem.Allocator, log: logger.Log, source: logger.Source, lexer: js_lexer.Lexer, opts: Parser.Options) !*P {
@@ -782,11 +946,39 @@ const P = struct {
parser.options = opts;
parser.source = source;
parser.lexer = lexer;
parser.data = js_ast.AstData.init(allocator);
return parser;
}
};
// The "await" and "yield" expressions are never allowed in argument lists but
// may or may not be allowed otherwise depending on the details of the enclosing
// function or module. This needs to be handled when parsing an arrow function
// argument list because we don't know if these expressions are not allowed until
// we reach the "=>" token (or discover the absence of one).
//
// Specifically, for await:
//
// // This is ok
// async function foo() { (x = await y) }
//
// // This is an error
// async function foo() { (x = await y) => {} }
//
// And for yield:
//
// // This is ok
// function* foo() { (x = yield y) }
//
// // This is an error
// function* foo() { (x = yield y) => {} }
//
const DeferredArrowArgErrors = struct {
invalid_expr_await: logger.Range = logger.Range.None,
invalid_expr_yield: logger.Range = logger.Range.None,
};
test "js_parser.init" {
try alloc.setup(std.heap.page_allocator);