diff options
author | 2021-04-28 21:58:02 -0700 | |
---|---|---|
committer | 2021-04-28 21:58:02 -0700 | |
commit | cabe773a4f0a12e411f9f3c9698da6bbd90ec474 (patch) | |
tree | c20cfd9ba22c4ca999c850edb04012d94d72ccb5 /src | |
parent | 435a6e9b187168d869024d1002951e4bfa76333a (diff) | |
download | bun-cabe773a4f0a12e411f9f3c9698da6bbd90ec474.tar.gz bun-cabe773a4f0a12e411f9f3c9698da6bbd90ec474.tar.zst bun-cabe773a4f0a12e411f9f3c9698da6bbd90ec474.zip |
wip
Former-commit-id: b37acf309c8f42d49dc47eea446f89a3dbe9f6e2
Diffstat (limited to 'src')
-rw-r--r-- | src/ast/base.zig | 10 | ||||
-rw-r--r-- | src/defines.zig | 155 | ||||
-rw-r--r-- | src/global_name_parser.zig | 0 | ||||
-rw-r--r-- | src/js_ast.zig | 93 | ||||
-rw-r--r-- | src/js_lexer.zig | 2368 | ||||
-rw-r--r-- | src/js_lexer_tables.zig | 26 | ||||
-rw-r--r-- | src/js_parser.zig | 1314 | ||||
-rw-r--r-- | src/js_printer.zig | 18 | ||||
-rw-r--r-- | src/json_parser.zig | 27 | ||||
-rw-r--r-- | src/logger.zig | 30 | ||||
-rw-r--r-- | src/main.zig | 4 | ||||
-rw-r--r-- | src/string_immutable.zig | 35 |
12 files changed, 2614 insertions, 1466 deletions
diff --git a/src/ast/base.zig b/src/ast/base.zig index f941e3745..1b2e3f087 100644 --- a/src/ast/base.zig +++ b/src/ast/base.zig @@ -27,7 +27,7 @@ pub const Ref = packed struct { inner_index: Int = 0, // 2 bits of padding for whatever is the parent - pub const Int = u31; + pub const Int = u30; pub const None = Ref{ .inner_index = std.math.maxInt(Ref.Int) }; pub fn isNull(self: *const Ref) bool { return self.source_index == std.math.maxInt(Ref.Int) and self.inner_index == std.math.maxInt(Ref.Int); @@ -55,3 +55,11 @@ pub const RequireOrImportMeta = struct { exports_ref: Ref = Ref.None, is_wrapper_async: bool = false, }; +pub fn debug(comptime fmt: []const u8, args: anytype) callconv(.Inline) void { + // std.debug.print(fmt, args); +} +pub fn debugl( + comptime fmt: []const u8, +) callconv(.Inline) void { + // std.debug.print("{s}\n", .{fmt}); +} diff --git a/src/defines.zig b/src/defines.zig index 6021df57f..a27956839 100644 --- a/src/defines.zig +++ b/src/defines.zig @@ -1,12 +1,159 @@ const std = @import("std"); const js_ast = @import("./js_ast.zig"); +const alloc = @import("alloc.zig"); + +usingnamespace @import("strings.zig"); const GlobalDefinesKey = @import("./defines-table.zig").GlobalDefinesKey; -pub const defaultIdentifierDefines = comptime {}; +const Globals = struct { + pub const Undefined = js_ast.E.Undefined{}; + pub const UndefinedPtr = &Globals.Undefined; + + pub const NaN = js_ast.E.Number{ .value = std.math.nan(f64) }; + pub const NanPtr = &Globals.NaN; + + pub const Infinity = js_ast.E.Number{ .value = std.math.inf(f64) }; + pub const InfinityPtr = &Globals.Infinity; +}; + +pub const DefineData = struct { + value: js_ast.Expr.Data = DefaultValue, + + // True if accessing this value is known to not have any side effects. For + // example, a bare reference to "Object.create" can be removed because it + // does not have any observable side effects. + can_be_removed_if_unused: bool = false, + + // True if a call to this value is known to not have any side effects. For + // example, a bare call to "Object()" can be removed because it does not + // have any observable side effects. + call_can_be_unwrapped_if_unused: bool = false, + + pub const DefaultValue = js_ast.Expr.Data{ .e_undefined = Globals.UndefinedPtr }; + + // All the globals have the same behavior. + // So we can create just one struct for it. + pub const GlobalDefineData = DefineData{}; + + pub fn merge(a: DefineData, b: DefineData) DefineData { + return DefineData{ + .value = b.value, + .can_be_removed_if_unsued = a.can_be_removed_if_unsued, + .call_can_be_unwrapped_if_unused = a.call_can_be_unwrapped_if_unused, + }; + } +}; + +fn arePartsEqual(a: []string, b: []string) bool { + if (a.len != b.len) { + return false; + } + + var i: usize = 0; + while (i < a.len) : (i += 1) { + if (!strings.eql(a[i], b[i])) { + return false; + } + } + + return true; +} + +pub const IdentifierDefine = DefineData; + +pub const DotDefine = struct { + parts: []string, + data: DefineData, +}; + +pub const Define = struct { + identifiers: std.StringHashMapUnmanaged(IdentifierDefine), + dots: std.StringHashMapUnmanaged([]DotDefine), + allocator: *std.mem.Allocator, + + pub fn init(allocator: *std.mem.Allocator, user_defines: std.StringHashMap(DefineData)) !*@This() { + var define = try allocator.create(Define); + define.allocator = allocator; + try define.identifiers.ensureCapacity(allocator, 641); + try define.dots.ensureCapacity(allocator, 38); + + // Step 1. Load the globals into the hash tables + for (GlobalDefinesKey) |global| { + if (global.len == 1) { + // TODO: when https://github.com/ziglang/zig/pull/8596 is merged, switch to putAssumeCapacityNoClobber + define.identifiers.putAssumeCapacity(global[0], IdentifierDefine.GlobalDefineData); + } else { + // TODO: when https://github.com/ziglang/zig/pull/8596 is merged, switch to putAssumeCapacityNoClobber + define.dots.putAssumeCapacity(global[global.len - 1], DotDefine{ + .parts = global[0 .. global.len - 1], + .data = DefineData.GlobalDefineData, + }); + } + } + + // Step 2. Swap in certain literal values because those can be constant folded + define.identifiers.putAssumeCapacity("undefined", DefineData{ + .value = js_ast.Expr.Data{ .e_undefined = Globals.UndefinedPtr }, + }); + define.identifiers.putAssumeCapacity("NaN", DefineData{ + .value = js_ast.Expr.Data{ .e_number = Globals.NanPtr }, + }); + define.identifiers.putAssumeCapacity("Infinity", DefineData{ + .value = js_ast.Expr.Data{ .e_number = Globals.InfinityPtr }, + }); + + // Step 3. Load user data into hash tables + // At this stage, user data has already been validated. + if (user_defines.count() > 0) { + var iter = user_defines.iterator(); + while (iter.next()) |user_define| { + // If it has a dot, then it's a DotDefine. + // e.g. process.env.NODE_ENV + if (strings.lastIndexOfChar(user_define.key, '.')) |last_dot| { + const tail = user_define.key[last_dot + 1 .. user_define.key.len]; + const parts = std.mem.tokenize(user_define.key[0..last_dot], ".").rest(); + var didFind = false; + var initial_values = &([_]DotDefine{}); + + // "NODE_ENV" + if (define.dots.getEntry()) |entry| { + for (entry.value) |*part| { + // ["process", "env"] == ["process", "env"] + if (arePartsEqual(part.parts, parts)) { + part.data = part.data.merge(user_define.value); + didFind = true; + break; + } + } + + initial_values = entry.value; + } + + if (!didFind) { + var list = try std.ArrayList(DotDefine).initCapacity(allocator, initial_values.len + 1); + if (initial_values.len > 0) { + list.appendSliceAssumeCapacity(initial_values); + } -pub const IdentifierDefine = struct {}; + list.appendAssumeCapacity(DotDefine{ + .data = user_define.value, + // TODO: do we need to allocate this? + .parts = parts, + }); + try define.dots.put(allocator, tail, list.toOwnedSlice()); + } + } else { + // IS_BROWSER + try define.identifiers.put(user_define.key, user_define.value); + } + } + } -pub const DotDefine = struct {}; + return define; + } +}; -pub const Defines = struct {}; +test "defines" { + +} diff --git a/src/global_name_parser.zig b/src/global_name_parser.zig new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/src/global_name_parser.zig diff --git a/src/js_ast.zig b/src/js_ast.zig index 9602f42ab..4701020fd 100644 --- a/src/js_ast.zig +++ b/src/js_ast.zig @@ -93,6 +93,73 @@ pub const Binding = struct { loc: logger.Loc, data: B, + pub fn ToExpr(comptime expr_type: type, comptime func_type: anytype) type { + const ExprType = expr_type; + return struct { + context: *ExprType, + allocator: *std.mem.Allocator, + pub const Context = @This(); + + pub fn wrapIdentifier(ctx: *const Context, loc: logger.Loc, ref: Ref) Expr { + return func_type(ctx.context, loc, ref); + } + + pub fn init(context: *ExprType) Context { + return Context{ .context = context, .allocator = context.allocator }; + } + }; + } + + pub fn toExpr(binding: *const Binding, wrapper: anytype) Expr { + var loc = binding.loc; + + switch (binding.data) { + .b_missing => { + return Expr.alloc(wrapper.allocator, E.Missing{}, loc); + }, + + .b_identifier => |b| { + return wrapper.wrapIdentifier(loc, b.ref); + }, + .b_array => |b| { + var exprs = wrapper.allocator.alloc(Expr, b.items.len) catch unreachable; + var i: usize = 0; + while (i < exprs.len) : (i += 1) { + const item = b.items[i]; + exprs[i] = convert: { + const expr = toExpr(&item.binding, wrapper); + if (b.has_spread and i == exprs.len - 1) { + break :convert Expr.alloc(wrapper.allocator, E.Spread{ .value = expr }, expr.loc); + } else if (item.default_value) |default| { + break :convert Expr.assign(expr, default, wrapper.allocator); + } else { + break :convert expr; + } + }; + } + + return Expr.alloc(wrapper.allocator, E.Array{ .items = exprs, .is_single_line = b.is_single_line }, loc); + }, + .b_object => |b| { + var properties = wrapper.allocator.alloc(G.Property, b.properties.len) catch unreachable; + var i: usize = 0; + while (i < properties.len) : (i += 1) { + const item = b.properties[i]; + properties[i] = G.Property{ + .flags = item.flags, + .kind = if (item.flags.is_spread) G.Property.Kind.spread else G.Property.Kind.normal, + .value = toExpr(&item.value, wrapper), + .initializer = item.default_value, + }; + } + return Expr.alloc(wrapper.allocator, E.Object{ .properties = properties, .is_single_line = b.is_single_line }, loc); + }, + else => { + std.debug.panic("Interanl error", .{}); + }, + } + } + pub const Tag = packed enum { b_identifier, b_array, @@ -511,7 +578,7 @@ pub const Symbol = struct { // single inner array, so you can join the maps together by just make a // single outer array containing all of the inner arrays. See the comment on // "Ref" for more detail. - symbols_for_source: [][]Symbol = undefined, + symbols_for_source: [][]Symbol, pub fn get(self: *Map, ref: Ref) ?Symbol { return self.symbols_for_source[ref.source_index][ref.inner_index]; @@ -522,6 +589,10 @@ pub const Symbol = struct { return Map{ .symbols_for_source = symbols_for_source }; } + pub fn initList(list: [][]Symbol) Map { + return Map{ .symbols_for_source = list }; + } + pub fn follow(symbols: *Map, ref: Ref) Ref { if (symbols.get(ref)) |*symbol| { const link = symbol.link orelse return ref; @@ -1974,12 +2045,11 @@ pub const Expr = struct { } }; - pub fn assign(a: *Expr, b: *Expr, allocator: *std.mem.Allocator) Expr { - std.debug.assert(a != b); + pub fn assign(a: Expr, b: Expr, allocator: *std.mem.Allocator) Expr { return alloc(allocator, E.Binary{ .op = .bin_assign, - .left = a.*, - .right = b.*, + .left = a, + .right = b, }, a.loc); } pub fn at(expr: *Expr, t: anytype, allocator: *std.mem.allocator) callconv(.Inline) Expr { @@ -2061,15 +2131,13 @@ pub const Expr = struct { return null; } - pub fn assignStmt(a: *Expr, b: *Expr, allocator: *std.mem.Allocator) Stmt { + pub fn assignStmt(a: Expr, b: Expr, allocator: *std.mem.Allocator) Stmt { return Stmt.alloc( allocator, S.SExpr{ - .op = .assign, - .left = a, - .right = b, + .value = Expr.assign(a, b, allocator), }, - loc, + a.loc, ); } @@ -2883,7 +2951,9 @@ pub const Scope = struct { } }; -pub fn printmem(comptime format: string, args: anytype) void {} +pub fn printmem(comptime format: string, args: anytype) void { + // std.debug.print(format, args); +} test "Binding.init" { var binding = Binding.alloc( @@ -3073,3 +3143,4 @@ test "Expr.init" { // Stmt | 192 // STry | 384 // -- ESBuild bit sizes + diff --git a/src/js_lexer.zig b/src/js_lexer.zig index bd018a254..a6625b3ff 100644 --- a/src/js_lexer.zig +++ b/src/js_lexer.zig @@ -8,8 +8,6 @@ const js_ast = @import("js_ast.zig"); usingnamespace @import("ast/base.zig"); usingnamespace @import("strings.zig"); -const _f = @import("./test/fixtures.zig"); - const unicode = std.unicode; const Source = logger.Source; @@ -21,1264 +19,1365 @@ pub const jsxEntity = tables.jsxEntity; pub const StrictModeReservedWords = tables.StrictModeReservedWords; pub const PropertyModifierKeyword = tables.PropertyModifierKeyword; pub const TypescriptStmtKeyword = tables.TypescriptStmtKeyword; +pub const TypeScriptAccessibilityModifier = tables.TypeScriptAccessibilityModifier; pub const JSONOptions = struct { allow_comments: bool = false, allow_trailing_commas: bool = false, }; -pub fn NewLexerType(comptime jsonOptions: ?JSONOptions) type { - return struct { - // pub const Error = error{ - // UnexpectedToken, - // EndOfFile, - // }; - - // err: ?@This().Error, - log: *logger.Log, - source: logger.Source, - current: usize = 0, - start: usize = 0, - end: usize = 0, - did_panic: bool = false, - approximate_newline_count: i32 = 0, - legacy_octal_loc: logger.Loc = logger.Loc.Empty, - previous_backslash_quote_in_jsx: logger.Range = logger.Range.None, - token: T = T.t_end_of_file, - has_newline_before: bool = false, - has_pure_comment_before: bool = false, - preserve_all_comments_before: bool = false, - is_legacy_octal_literal: bool = false, - comments_to_preserve_before: std.ArrayList(js_ast.G.Comment), - all_original_comments: ?[]js_ast.G.Comment = null, - code_point: CodePoint = -1, - string_literal: JavascriptString, - identifier: []const u8 = "", - jsx_factory_pragma_comment: ?js_ast.Span = null, - jsx_fragment_pragma_comment: ?js_ast.Span = null, - source_mapping_url: ?js_ast.Span = null, - number: f64 = 0.0, - rescan_close_brace_as_template_token: bool = false, - for_global_name: bool = false, - prev_error_loc: logger.Loc = logger.Loc.Empty, - allocator: *std.mem.Allocator, - - pub fn loc(self: *@This()) logger.Loc { - return logger.usize2Loc(self.start); +pub const Lexer = struct { + // pub const Error = error{ + // UnexpectedToken, + // EndOfFile, + // }; + + // err: ?@This().Error, + log: *logger.Log, + json_options: ?JSONOptions = null, + for_global_name: bool = false, + source: logger.Source, + current: usize = 0, + start: usize = 0, + end: usize = 0, + did_panic: bool = false, + approximate_newline_count: i32 = 0, + legacy_octal_loc: logger.Loc = logger.Loc.Empty, + previous_backslash_quote_in_jsx: logger.Range = logger.Range.None, + token: T = T.t_end_of_file, + has_newline_before: bool = false, + has_pure_comment_before: bool = false, + preserve_all_comments_before: bool = false, + is_legacy_octal_literal: bool = false, + comments_to_preserve_before: std.ArrayList(js_ast.G.Comment), + all_original_comments: ?[]js_ast.G.Comment = null, + code_point: CodePoint = -1, + string_literal: JavascriptString, + identifier: []const u8 = "", + jsx_factory_pragma_comment: ?js_ast.Span = null, + jsx_fragment_pragma_comment: ?js_ast.Span = null, + source_mapping_url: ?js_ast.Span = null, + number: f64 = 0.0, + rescan_close_brace_as_template_token: bool = false, + prev_error_loc: logger.Loc = logger.Loc.Empty, + allocator: *std.mem.Allocator, + + pub fn loc(self: *@This()) logger.Loc { + return logger.usize2Loc(self.start); + } + + fn nextCodepointSlice(it: *@This()) callconv(.Inline) ?[]const u8 { + if (it.current >= it.source.contents.len) { + return null; } - fn nextCodepointSlice(it: *@This()) callconv(.Inline) ?[]const u8 { - if (it.current >= it.source.contents.len) { - return null; - } + const cp_len = unicode.utf8ByteSequenceLength(it.source.contents[it.current]) catch unreachable; + it.end = it.current; + it.current += cp_len; - const cp_len = unicode.utf8ByteSequenceLength(it.source.contents[it.current]) catch unreachable; - it.end = it.current; - it.current += cp_len; + return it.source.contents[it.current - cp_len .. it.current]; + } - return it.source.contents[it.current - cp_len .. it.current]; - } + pub fn syntaxError(self: *@This()) void { + self.addError(self.start, "Syntax Error!!", .{}, true); + } - pub fn syntaxError(self: *@This()) void { - self.addError(self.start, "Syntax Error!!", .{}, true); - } + pub fn addDefaultError(self: *@This(), msg: []const u8) void { + self.addError(self.start, "{s}", .{msg}, true); + } - pub fn addDefaultError(self: *@This(), msg: []const u8) void { - self.addError(self.start, "{s}", .{msg}, true); + pub fn addError(self: *@This(), _loc: usize, comptime format: []const u8, args: anytype, panic: bool) void { + var __loc = logger.usize2Loc(_loc); + if (__loc.eql(self.prev_error_loc)) { + return; } - pub fn addError(self: *@This(), _loc: usize, comptime format: []const u8, args: anytype, panic: bool) void { - var __loc = logger.usize2Loc(_loc); - if (__loc.eql(self.prev_error_loc)) { - return; - } + self.log.addErrorFmt(self.source, __loc, self.allocator, format, args) catch unreachable; + self.prev_error_loc = __loc; + var msg = self.log.msgs.items[self.log.msgs.items.len - 1]; + msg.formatNoWriter(std.debug.panic); + } - self.log.addErrorFmt(self.source, __loc, self.allocator, format, args) catch unreachable; - self.prev_error_loc = __loc; - var msg = self.log.msgs.items[self.log.msgs.items.len - 1]; - msg.formatNoWriter(std.debug.panic); + pub fn addRangeError(self: *@This(), r: logger.Range, comptime format: []const u8, args: anytype, panic: bool) void { + if (self.prev_error_loc.eql(r.loc)) { + return; } - pub fn addRangeError(self: *@This(), r: logger.Range, comptime format: []const u8, args: anytype, panic: bool) void { - if (self.prev_error_loc.eql(r.loc)) { - return; - } + const errorMessage = std.fmt.allocPrint(self.allocator, format, args) catch unreachable; + var msg = self.log.addRangeError(self.source, r, errorMessage); + self.prev_error_loc = r.loc; - const errorMessage = std.fmt.allocPrint(self.allocator, format, args) catch unreachable; - var msg = self.log.addRangeError(self.source, r, errorMessage); - self.prev_error_loc = r.loc; + if (panic) { + var fixedBuffer = [_]u8{0} ** 8096; + var stream = std.io.fixedBufferStream(&fixedBuffer); + const writer = stream.writer(); + self.log.print(writer) catch unreachable; - if (panic) { - self.doPanic(errorMessage); - } + std.debug.panic("{s}", .{fixedBuffer[0..stream.pos]}); } + } - fn doPanic(self: *@This(), content: []const u8) void { - if (@import("builtin").is_test) { - self.did_panic = true; - } else { - std.debug.panic("{s}", .{content}); - } + fn doPanic(self: *@This(), content: []const u8) void { + if (@import("builtin").is_test) { + self.did_panic = true; + } else { + std.debug.panic("{s}", .{content}); } + } - pub fn codePointEql(self: *@This(), a: u8) bool { - return @intCast(CodePoint, a) == self.code_point; - } + pub fn codePointEql(self: *@This(), a: u8) bool { + return @intCast(CodePoint, a) == self.code_point; + } - fn nextCodepoint(it: *@This()) callconv(.Inline) CodePoint { - const slice = it.nextCodepointSlice() orelse return @as(CodePoint, -1); + fn nextCodepoint(it: *@This()) callconv(.Inline) CodePoint { + const slice = it.nextCodepointSlice() orelse return @as(CodePoint, -1); - switch (slice.len) { - 1 => return @as(CodePoint, slice[0]), - 2 => return @as(CodePoint, unicode.utf8Decode2(slice) catch unreachable), - 3 => return @as(CodePoint, unicode.utf8Decode3(slice) catch unreachable), - 4 => return @as(CodePoint, unicode.utf8Decode4(slice) catch unreachable), - else => unreachable, - } + switch (slice.len) { + 1 => return @as(CodePoint, slice[0]), + 2 => return @as(CodePoint, unicode.utf8Decode2(slice) catch unreachable), + 3 => return @as(CodePoint, unicode.utf8Decode3(slice) catch unreachable), + 4 => return @as(CodePoint, unicode.utf8Decode4(slice) catch unreachable), + else => unreachable, } + } - /// Look ahead at the next n codepoints without advancing the iterator. - /// If fewer than n codepoints are available, then return the remainder of the string. - fn peek(it: *@This(), n: usize) []const u8 { - const original_i = it.current; - defer it.current = original_i; - - var end_ix = original_i; - var found: usize = 0; - while (found < n) : (found += 1) { - const next_codepoint = it.nextCodepointSlice() orelse return it.source.contents[original_i..]; - end_ix += next_codepoint.len; - } - - return it.source.contents[original_i..end_ix]; + /// Look ahead at the next n codepoints without advancing the iterator. + /// If fewer than n codepoints are available, then return the remainder of the string. + fn peek(it: *@This(), n: usize) []const u8 { + const original_i = it.current; + defer it.current = original_i; + + var end_ix = original_i; + var found: usize = 0; + while (found < n) : (found += 1) { + const next_codepoint = it.nextCodepointSlice() orelse return it.source.contents[original_i..]; + end_ix += next_codepoint.len; } - pub fn isIdentifierOrKeyword(lexer: @This()) bool { - return @enumToInt(lexer.token) >= @enumToInt(T.t_identifier); - } + return it.source.contents[original_i..end_ix]; + } - fn parseStringLiteral(lexer: *@This()) void { - var quote: CodePoint = lexer.code_point; - var needs_slow_path = false; - var suffixLen: usize = 1; + pub fn isIdentifierOrKeyword(lexer: @This()) bool { + return @enumToInt(lexer.token) >= @enumToInt(T.t_identifier); + } - if (quote != '`') { - lexer.token = T.t_string_literal; - } else if (lexer.rescan_close_brace_as_template_token) { - lexer.token = T.t_template_tail; - } else { - lexer.token = T.t_no_substitution_template_literal; - } - lexer.step(); + fn parseStringLiteral(lexer: *@This()) void { + var quote: CodePoint = lexer.code_point; + var needs_slow_path = false; + var suffixLen: usize = 1; - stringLiteral: while (true) { - switch (lexer.code_point) { - '\\' => { - needs_slow_path = true; - lexer.step(); + if (quote != '`') { + lexer.token = T.t_string_literal; + } else if (lexer.rescan_close_brace_as_template_token) { + lexer.token = T.t_template_tail; + } else { + lexer.token = T.t_no_substitution_template_literal; + } + lexer.step(); - // Handle Windows CRLF - if (lexer.code_point == '\r' and jsonOptions != null) { + stringLiteral: while (true) { + switch (lexer.code_point) { + '\\' => { + needs_slow_path = true; + lexer.step(); + + // Handle Windows CRLF + if (lexer.code_point == '\r' and lexer.json_options != null) { + lexer.step(); + if (lexer.code_point == '\n') { lexer.step(); - if (lexer.code_point == '\n') { - lexer.step(); - } - continue :stringLiteral; } - }, - // This indicates the end of the file + continue :stringLiteral; + } + }, + // This indicates the end of the file - -1 => { - lexer.addDefaultError("Unterminated string literal"); - }, + -1 => { + lexer.addDefaultError("Unterminated string literal"); + }, - '\r' => { - if (quote != '`') { - lexer.addDefaultError("Unterminated string literal"); - } + '\r' => { + if (quote != '`') { + lexer.addDefaultError("Unterminated string literal"); + } - // Template literals require newline normalization - needs_slow_path = true; - }, + // Template literals require newline normalization + needs_slow_path = true; + }, - '\n' => { - if (quote != '`') { - lexer.addDefaultError("Unterminated string literal"); - } - }, + '\n' => { + if (quote != '`') { + lexer.addDefaultError("Unterminated string literal"); + } + }, - '$' => { - if (quote == '`') { + '$' => { + if (quote == '`') { + lexer.step(); + if (lexer.code_point == '{') { + suffixLen = 2; lexer.step(); - if (lexer.code_point == '{') { - suffixLen = 2; - lexer.step(); - if (lexer.rescan_close_brace_as_template_token) { - lexer.token = T.t_template_middle; - } else { - lexer.token = T.t_template_head; - } - break :stringLiteral; + if (lexer.rescan_close_brace_as_template_token) { + lexer.token = T.t_template_middle; + } else { + lexer.token = T.t_template_head; } - continue :stringLiteral; - } - }, - - else => { - if (quote == lexer.code_point) { - lexer.step(); break :stringLiteral; } - // Non-ASCII strings need the slow path - if (lexer.code_point >= 0x80) { - needs_slow_path = true; - } else if (jsonOptions != null and lexer.code_point < 0x20) { - lexer.syntaxError(); - } - }, - } - lexer.step(); - } + continue :stringLiteral; + } + }, - const text = lexer.source.contents[lexer.start + 1 .. lexer.end - suffixLen]; - if (needs_slow_path) { - lexer.string_literal = lexer.stringToUTF16(text); - } else { - lexer.string_literal = lexer.allocator.alloc(u16, text.len) catch unreachable; - var i: usize = 0; - for (text) |byte| { - lexer.string_literal[i] = byte; - i += 1; - } + else => { + if (quote == lexer.code_point) { + lexer.step(); + break :stringLiteral; + } + // Non-ASCII strings need the slow path + if (lexer.code_point >= 0x80) { + needs_slow_path = true; + } else if (lexer.json_options != null and lexer.code_point < 0x20) { + lexer.syntaxError(); + } + }, } + lexer.step(); + } - if (quote == '\'' and jsonOptions != null) { - lexer.addRangeError(lexer.range(), "JSON strings must use double quotes", .{}, true); + const text = lexer.source.contents[lexer.start + 1 .. lexer.end - suffixLen]; + if (needs_slow_path) { + lexer.string_literal = lexer.stringToUTF16(text); + } else { + lexer.string_literal = lexer.allocator.alloc(u16, text.len) catch unreachable; + var i: usize = 0; + for (text) |byte| { + lexer.string_literal[i] = byte; + i += 1; } - // for (text) - // // if (needs_slow_path) { - // // // Slow path - - // // // lexer.string_literal = lexer.(lexer.start + 1, text); - // // } else { - // // // Fast path - - // // } } - fn step(lexer: *@This()) void { - lexer.code_point = lexer.nextCodepoint(); - - // Track the approximate number of newlines in the file so we can preallocate - // the line offset table in the printer for source maps. The line offset table - // is the #1 highest allocation in the heap profile, so this is worth doing. - // This count is approximate because it handles "\n" and "\r\n" (the common - // cases) but not "\r" or "\u2028" or "\u2029". Getting this wrong is harmless - // because it's only a preallocation. The array will just grow if it's too small. - if (lexer.code_point == '\n') { - lexer.approximate_newline_count += 1; - } + if (quote == '\'' and lexer.json_options != null) { + lexer.addRangeError(lexer.range(), "JSON strings must use double quotes", .{}, true); } + // for (text) + // // if (needs_slow_path) { + // // // Slow path - pub fn expect(self: *@This(), comptime token: T) void { - if (self.token != token) { - self.expected(token); - } + // // // lexer.string_literal = lexer.(lexer.start + 1, text); + // // } else { + // // // Fast path - self.next(); - } + // // } + } - pub fn expectOrInsertSemicolon(lexer: *@This()) void { - if (lexer.token == T.t_semicolon or (!lexer.has_newline_before and - lexer.token != T.t_close_brace and lexer.token != T.t_end_of_file)) - { - lexer.expect(T.t_semicolon); - } + fn step(lexer: *@This()) void { + lexer.code_point = lexer.nextCodepoint(); + + // Track the approximate number of newlines in the file so we can preallocate + // the line offset table in the printer for source maps. The line offset table + // is the #1 highest allocation in the heap profile, so this is worth doing. + // This count is approximate because it handles "\n" and "\r\n" (the common + // cases) but not "\r" or "\u2028" or "\u2029". Getting this wrong is harmless + // because it's only a preallocation. The array will just grow if it's too small. + if (lexer.code_point == '\n') { + lexer.approximate_newline_count += 1; } + } - pub fn addUnsupportedSyntaxError(self: *@This(), msg: []const u8) void { - self.addError(self.end, "Unsupported syntax: {s}", .{msg}, true); + pub fn expect(self: *@This(), comptime token: T) void { + if (self.token != token) { + self.expected(token); } - pub fn scanIdentifierWithEscapes(self: *@This()) void { - self.addUnsupportedSyntaxError("escape sequence"); - return; + self.next(); + } + + pub fn expectOrInsertSemicolon(lexer: *@This()) void { + if (lexer.token == T.t_semicolon or (!lexer.has_newline_before and + lexer.token != T.t_close_brace and lexer.token != T.t_end_of_file)) + { + lexer.expect(T.t_semicolon); } + } + + pub fn addUnsupportedSyntaxError(self: *@This(), msg: []const u8) void { + self.addError(self.end, "Unsupported syntax: {s}", .{msg}, true); + } + + pub fn scanIdentifierWithEscapes(self: *@This()) void { + self.addUnsupportedSyntaxError("escape sequence"); + return; + } - pub fn debugInfo(self: *@This()) void { - if (self.log.errors > 0) { - const stderr = std.io.getStdErr().writer(); - self.log.print(stderr) catch unreachable; + pub fn debugInfo(self: *@This()) void { + if (self.log.errors > 0) { + const stderr = std.io.getStdErr().writer(); + self.log.print(stderr) catch unreachable; + } else { + if (self.token == T.t_identifier or self.token == T.t_string_literal) { + std.debug.print(" {s} ", .{self.raw()}); } else { - if (self.token == T.t_identifier or self.token == T.t_string_literal) { - std.debug.print(" {s} ", .{self.raw()}); - } else { - std.debug.print(" <{s}> ", .{tokenToString.get(self.token)}); - } + std.debug.print(" <{s}> ", .{tokenToString.get(self.token)}); } } + } - pub fn expectContextualKeyword(self: *@This(), comptime keyword: string) void { - if (!self.isContextualKeyword(keyword)) { - self.addError(self.start, "\"{s}\"", .{keyword}, true); - } - self.next(); + pub fn expectContextualKeyword(self: *@This(), comptime keyword: string) void { + if (!self.isContextualKeyword(keyword)) { + self.addError(self.start, "\"{s}\"", .{keyword}, true); } + self.next(); + } - pub fn next(lexer: *@This()) void { - lexer.has_newline_before = lexer.end == 0; + pub fn next(lexer: *@This()) void { + lexer.has_newline_before = lexer.end == 0; - lex: while (true) { - lexer.start = lexer.end; - lexer.token = T.t_end_of_file; + lex: while (true) { + lexer.start = lexer.end; + lexer.token = T.t_end_of_file; - switch (lexer.code_point) { - -1 => { - lexer.token = T.t_end_of_file; - }, + switch (lexer.code_point) { + -1 => { + lexer.token = T.t_end_of_file; + }, - '#' => { - if (lexer.start == 0 and lexer.source.contents[1] == '!') { - lexer.addUnsupportedSyntaxError("#!hashbang is not supported yet."); - return; - } + '#' => { + if (lexer.start == 0 and lexer.source.contents[1] == '!') { + lexer.addUnsupportedSyntaxError("#!hashbang is not supported yet."); + return; + } - lexer.step(); - if (!isIdentifierStart(lexer.code_point)) { - lexer.syntaxError(); - } - lexer.step(); + lexer.step(); + if (!isIdentifierStart(lexer.code_point)) { + lexer.syntaxError(); + } + lexer.step(); - if (isIdentifierStart(lexer.code_point)) { - lexer.step(); - while (isIdentifierContinue(lexer.code_point)) { - lexer.step(); - } - if (lexer.code_point == '\\') { - lexer.scanIdentifierWithEscapes(); - lexer.token = T.t_private_identifier; - // lexer.Identifier, lexer.Token = lexer.scanIdentifierWithEscapes(normalIdentifier); - } else { - lexer.token = T.t_private_identifier; - lexer.identifier = lexer.raw(); - } - break; - } - }, - '\r', '\n', 0x2028, 0x2029 => { - lexer.step(); - lexer.has_newline_before = true; - continue; - }, - '\t', ' ' => { - lexer.step(); - continue; - }, - '(' => { + if (isIdentifierStart(lexer.code_point)) { lexer.step(); - lexer.token = T.t_open_paren; - }, - ')' => { - lexer.step(); - lexer.token = T.t_close_paren; - }, - '[' => { - lexer.step(); - lexer.token = T.t_open_bracket; - }, - ']' => { - lexer.step(); - lexer.token = T.t_close_bracket; - }, - '{' => { - lexer.step(); - lexer.token = T.t_open_brace; - }, - '}' => { - lexer.step(); - lexer.token = T.t_close_brace; - }, - ',' => { - lexer.step(); - lexer.token = T.t_comma; - }, - ':' => { - lexer.step(); - lexer.token = T.t_colon; - }, - ';' => { - lexer.step(); - lexer.token = T.t_semicolon; - }, - '@' => { - lexer.step(); - lexer.token = T.t_at; - }, - '~' => { - lexer.step(); - lexer.token = T.t_tilde; - }, - '?' => { - // '?' or '?.' or '??' or '??=' - lexer.step(); - switch (lexer.code_point) { - '?' => { - lexer.step(); - switch (lexer.code_point) { - '=' => { - lexer.step(); - lexer.token = T.t_question_question_equals; - }, - else => { - lexer.token = T.t_question_question; - }, - } - }, - - '.' => { - lexer.token = T.t_question; - const current = lexer.current; - const contents = lexer.source.contents; - - // Lookahead to disambiguate with 'a?.1:b' - if (current < contents.len) { - const c = contents[current]; - if (c < '0' or c > '9') { - lexer.step(); - lexer.token = T.t_question_dot; - } - } - }, - else => { - lexer.token = T.t_question; - }, + while (isIdentifierContinue(lexer.code_point)) { + lexer.step(); } - }, - '%' => { - // '%' or '%=' - lexer.step(); - switch (lexer.code_point) { - '=' => { - lexer.step(); - lexer.token = T.t_percent_equals; - }, - - else => { - lexer.token = T.t_percent; - }, + if (lexer.code_point == '\\') { + lexer.scanIdentifierWithEscapes(); + lexer.token = T.t_private_identifier; + // lexer.Identifier, lexer.Token = lexer.scanIdentifierWithEscapes(normalIdentifier); + } else { + lexer.token = T.t_private_identifier; + lexer.identifier = lexer.raw(); } - }, + break; + } + }, + '\r', '\n', 0x2028, 0x2029 => { + lexer.step(); + lexer.has_newline_before = true; + continue; + }, + '\t', ' ' => { + lexer.step(); + continue; + }, + '(' => { + lexer.step(); + lexer.token = T.t_open_paren; + }, + ')' => { + lexer.step(); + lexer.token = T.t_close_paren; + }, + '[' => { + lexer.step(); + lexer.token = T.t_open_bracket; + }, + ']' => { + lexer.step(); + lexer.token = T.t_close_bracket; + }, + '{' => { + lexer.step(); + lexer.token = T.t_open_brace; + }, + '}' => { + lexer.step(); + lexer.token = T.t_close_brace; + }, + ',' => { + lexer.step(); + lexer.token = T.t_comma; + }, + ':' => { + lexer.step(); + lexer.token = T.t_colon; + }, + ';' => { + lexer.step(); + lexer.token = T.t_semicolon; + }, + '@' => { + lexer.step(); + lexer.token = T.t_at; + }, + '~' => { + lexer.step(); + lexer.token = T.t_tilde; + }, + '?' => { + // '?' or '?.' or '??' or '??=' + lexer.step(); + switch (lexer.code_point) { + '?' => { + lexer.step(); + switch (lexer.code_point) { + '=' => { + lexer.step(); + lexer.token = T.t_question_question_equals; + }, + else => { + lexer.token = T.t_question_question; + }, + } + }, - '&' => { - // '&' or '&=' or '&&' or '&&=' - lexer.step(); - switch (lexer.code_point) { - '=' => { - lexer.step(); - lexer.token = T.t_ampersand_equals; - }, + '.' => { + lexer.token = T.t_question; + const current = lexer.current; + const contents = lexer.source.contents; - '&' => { - lexer.step(); - switch (lexer.code_point) { - '=' => { - lexer.step(); - lexer.token = T.t_ampersand_ampersand_equals; - }, - - else => { - lexer.token = T.t_ampersand_ampersand; - }, + // Lookahead to disambiguate with 'a?.1:b' + if (current < contents.len) { + const c = contents[current]; + if (c < '0' or c > '9') { + lexer.step(); + lexer.token = T.t_question_dot; } - }, - else => { - lexer.token = T.t_ampersand; - }, - } - }, + } + }, + else => { + lexer.token = T.t_question; + }, + } + }, + '%' => { + // '%' or '%=' + lexer.step(); + switch (lexer.code_point) { + '=' => { + lexer.step(); + lexer.token = T.t_percent_equals; + }, - '|' => { + else => { + lexer.token = T.t_percent; + }, + } + }, - // '|' or '|=' or '||' or '||=' - lexer.step(); - switch (lexer.code_point) { - '=' => { - lexer.step(); - lexer.token = T.t_bar_equals; - }, - '|' => { - lexer.step(); - switch (lexer.code_point) { - '=' => { - lexer.step(); - lexer.token = T.t_bar_bar_equals; - }, + '&' => { + // '&' or '&=' or '&&' or '&&=' + lexer.step(); + switch (lexer.code_point) { + '=' => { + lexer.step(); + lexer.token = T.t_ampersand_equals; + }, - else => { - lexer.token = T.t_bar_bar; - }, - } - }, - else => { - lexer.token = T.t_bar; - }, - } - }, + '&' => { + lexer.step(); + switch (lexer.code_point) { + '=' => { + lexer.step(); + lexer.token = T.t_ampersand_ampersand_equals; + }, - '^' => { - // '^' or '^=' - lexer.step(); - switch (lexer.code_point) { - '=' => { - lexer.step(); - lexer.token = T.t_caret_equals; - }, + else => { + lexer.token = T.t_ampersand_ampersand; + }, + } + }, + else => { + lexer.token = T.t_ampersand; + }, + } + }, - else => { - lexer.token = T.t_caret; - }, - } - }, + '|' => { - '+' => { - // '+' or '+=' or '++' - lexer.step(); - switch (lexer.code_point) { - '=' => { - lexer.step(); - lexer.token = T.t_plus_equals; - }, + // '|' or '|=' or '||' or '||=' + lexer.step(); + switch (lexer.code_point) { + '=' => { + lexer.step(); + lexer.token = T.t_bar_equals; + }, + '|' => { + lexer.step(); + switch (lexer.code_point) { + '=' => { + lexer.step(); + lexer.token = T.t_bar_bar_equals; + }, - '+' => { - lexer.step(); - lexer.token = T.t_plus_plus; - }, + else => { + lexer.token = T.t_bar_bar; + }, + } + }, + else => { + lexer.token = T.t_bar; + }, + } + }, - else => { - lexer.token = T.t_plus; - }, - } - }, + '^' => { + // '^' or '^=' + lexer.step(); + switch (lexer.code_point) { + '=' => { + lexer.step(); + lexer.token = T.t_caret_equals; + }, - '-' => { - // '+' or '+=' or '++' - lexer.step(); - switch (lexer.code_point) { - '=' => { - lexer.step(); - lexer.token = T.t_minus_equals; - }, + else => { + lexer.token = T.t_caret; + }, + } + }, - '-' => { - lexer.step(); + '+' => { + // '+' or '+=' or '++' + lexer.step(); + switch (lexer.code_point) { + '=' => { + lexer.step(); + lexer.token = T.t_plus_equals; + }, - if (lexer.code_point == '>' and lexer.has_newline_before) { - lexer.step(); - lexer.log.addRangeWarning(lexer.source, lexer.range(), "Treating \"-->\" as the start of a legacy HTML single-line comment") catch unreachable; - - singleLineHTMLCloseComment: while (true) { - switch (lexer.code_point) { - '\r', '\n', 0x2028, 0x2029 => { - break :singleLineHTMLCloseComment; - }, - -1 => { - break :singleLineHTMLCloseComment; - }, - else => {}, - } - lexer.step(); - } - continue; - } + '+' => { + lexer.step(); + lexer.token = T.t_plus_plus; + }, - lexer.token = T.t_minus_minus; - }, + else => { + lexer.token = T.t_plus; + }, + } + }, - else => { - lexer.token = T.t_minus; - }, - } - }, + '-' => { + // '+' or '+=' or '++' + lexer.step(); + switch (lexer.code_point) { + '=' => { + lexer.step(); + lexer.token = T.t_minus_equals; + }, - '*' => { - // '*' or '*=' or '**' or '**=' + '-' => { + lexer.step(); - lexer.step(); - switch (lexer.code_point) { - '=' => { - lexer.step(); - lexer.token = .t_asterisk_equals; - }, - '*' => { + if (lexer.code_point == '>' and lexer.has_newline_before) { lexer.step(); - switch (lexer.code_point) { - '=' => { - lexer.step(); - lexer.token = .t_asterisk_asterisk_equals; - }, - else => { - lexer.token = .t_asterisk_asterisk; - }, - } - }, - else => { - lexer.token = .t_asterisk; - }, - } - }, - '/' => { - // '/' or '/=' or '//' or '/* ... */' - lexer.step(); - // TODO: forGlobalName + lexer.log.addRangeWarning(lexer.source, lexer.range(), "Treating \"-->\" as the start of a legacy HTML single-line comment") catch unreachable; - switch (lexer.code_point) { - '=' => { - lexer.step(); - lexer.token = .t_slash_equals; - }, - '/' => { - lexer.step(); - singleLineComment: while (true) { - lexer.step(); + singleLineHTMLCloseComment: while (true) { switch (lexer.code_point) { '\r', '\n', 0x2028, 0x2029 => { - break :singleLineComment; + break :singleLineHTMLCloseComment; }, -1 => { - break :singleLineComment; + break :singleLineHTMLCloseComment; }, else => {}, } + lexer.step(); } - - if (jsonOptions) |json| { - if (!json.allow_comments) { - lexer.addRangeError(lexer.range(), "JSON does not support comments", .{}, true); - return; - } - } - lexer.scanCommentText(); - continue; - }, - '*' => { - lexer.step(); - - multiLineComment: while (true) { - switch (lexer.code_point) { - '*' => { - lexer.step(); - if (lexer.code_point == '/') { - lexer.step(); - break :multiLineComment; - } - }, - '\r', '\n', 0x2028, 0x2029 => { - lexer.step(); - lexer.has_newline_before = true; - }, - -1 => { - lexer.start = lexer.end; - lexer.addError(lexer.start, "Expected \"*/\" to terminate multi-line comment", .{}, true); - }, - else => { - lexer.step(); - }, - } - } - if (jsonOptions) |json| { - if (!json.allow_comments) { - lexer.addRangeError(lexer.range(), "JSON does not support comments", .{}, true); - return; - } - } - lexer.scanCommentText(); continue; - }, - else => { - lexer.token = .t_slash; - }, - } - }, - - '=' => { - // '=' or '=>' or '==' or '===' - lexer.step(); - switch (lexer.code_point) { - '>' => { - lexer.step(); - lexer.token = T.t_equals_greater_than; - }, + } - '=' => { - lexer.step(); - switch (lexer.code_point) { - '=' => { - lexer.step(); - lexer.token = T.t_equals_equals_equals; - }, + lexer.token = T.t_minus_minus; + }, - else => { - lexer.token = T.t_equals_equals; - }, - } - }, + else => { + lexer.token = T.t_minus; + }, + } + }, - else => { - lexer.token = T.t_equals; - }, - } - }, + '*' => { + // '*' or '*=' or '**' or '**=' - '<' => { - // '<' or '<<' or '<=' or '<<=' or '<!--' - lexer.step(); - switch (lexer.code_point) { - '=' => { - lexer.step(); - lexer.token = T.t_less_than_equals; - }, + lexer.step(); + switch (lexer.code_point) { + '=' => { + lexer.step(); + lexer.token = .t_asterisk_equals; + }, + '*' => { + lexer.step(); + switch (lexer.code_point) { + '=' => { + lexer.step(); + lexer.token = .t_asterisk_asterisk_equals; + }, + else => { + lexer.token = .t_asterisk_asterisk; + }, + } + }, + else => { + lexer.token = .t_asterisk; + }, + } + }, + '/' => { + // '/' or '/=' or '//' or '/* ... */' + lexer.step(); - '<' => { + if (lexer.for_global_name) { + lexer.token = .t_slash; + break; + } + switch (lexer.code_point) { + '=' => { + lexer.step(); + lexer.token = .t_slash_equals; + }, + '/' => { + lexer.step(); + singleLineComment: while (true) { lexer.step(); switch (lexer.code_point) { - '=' => { - lexer.step(); - lexer.token = T.t_less_than_less_than_equals; + '\r', '\n', 0x2028, 0x2029 => { + break :singleLineComment; }, - - else => { - lexer.token = T.t_less_than_less_than; + -1 => { + break :singleLineComment; }, + else => {}, } - }, - // Handle legacy HTML-style comments - '!' => { - if (std.mem.eql(u8, lexer.peek("--".len), "--")) { - lexer.addUnsupportedSyntaxError("Legacy HTML comments not implemented yet!"); + } + + if (lexer.json_options) |json| { + if (!json.allow_comments) { + lexer.addRangeError(lexer.range(), "JSON does not support comments", .{}, true); return; } + } + lexer.scanCommentText(); + continue; + }, + '*' => { + lexer.step(); - lexer.token = T.t_less_than; - }, - - else => { - lexer.token = T.t_less_than; - }, - } - }, - - '>' => { - // '>' or '>>' or '>>>' or '>=' or '>>=' or '>>>=' - lexer.step(); - - switch (lexer.code_point) { - '=' => { - lexer.step(); - lexer.token = T.t_greater_than_equals; - }, - '>' => { - lexer.step(); + multiLineComment: while (true) { switch (lexer.code_point) { - '=' => { - lexer.step(); - lexer.token = T.t_greater_than_greater_than_equals; - }, - '>' => { + '*' => { lexer.step(); - switch (lexer.code_point) { - '=' => { - lexer.step(); - lexer.token = T.t_greater_than_greater_than_greater_than_equals; - }, - else => { - lexer.token = T.t_greater_than_greater_than_greater_than; - }, + if (lexer.code_point == '/') { + lexer.step(); + break :multiLineComment; } }, - else => { - lexer.token = T.t_greater_than_greater_than; - }, - } - }, - else => { - lexer.token = T.t_greater_than; - }, - } - }, - - '!' => { - // '!' or '!=' or '!==' - lexer.step(); - switch (lexer.code_point) { - '=' => { - lexer.step(); - switch (lexer.code_point) { - '=' => { + '\r', '\n', 0x2028, 0x2029 => { lexer.step(); - lexer.token = T.t_exclamation_equals_equals; + lexer.has_newline_before = true; + }, + -1 => { + lexer.start = lexer.end; + lexer.addError(lexer.start, "Expected \"*/\" to terminate multi-line comment", .{}, true); }, - else => { - lexer.token = T.t_exclamation_equals; + lexer.step(); }, } - }, - else => { - lexer.token = T.t_exclamation; - }, - } - }, + } + if (lexer.json_options) |json| { + if (!json.allow_comments) { + lexer.addRangeError(lexer.range(), "JSON does not support comments", .{}, true); + return; + } + } + lexer.scanCommentText(); + continue; + }, + else => { + lexer.token = .t_slash; + }, + } + }, - '\'', '"', '`' => { - lexer.parseStringLiteral(); - }, + '=' => { + // '=' or '=>' or '==' or '===' + lexer.step(); + switch (lexer.code_point) { + '>' => { + lexer.step(); + lexer.token = T.t_equals_greater_than; + }, - '_', '$', 'a'...'z', 'A'...'Z' => { - lexer.step(); - while (isIdentifierContinue(lexer.code_point)) { + '=' => { lexer.step(); - } + switch (lexer.code_point) { + '=' => { + lexer.step(); + lexer.token = T.t_equals_equals_equals; + }, - if (lexer.code_point == '\\') { - lexer.scanIdentifierWithEscapes(); - } else { - const contents = lexer.raw(); - lexer.identifier = contents; - lexer.token = Keywords.get(contents) orelse T.t_identifier; - } - }, + else => { + lexer.token = T.t_equals_equals; + }, + } + }, - '\\' => { - // TODO: normal - lexer.scanIdentifierWithEscapes(); - }, + else => { + lexer.token = T.t_equals; + }, + } + }, - '.', '0'...'9' => { - lexer.parseNumericLiteralOrDot(); - }, + '<' => { + // '<' or '<<' or '<=' or '<<=' or '<!--' + lexer.step(); + switch (lexer.code_point) { + '=' => { + lexer.step(); + lexer.token = T.t_less_than_equals; + }, - else => { - // Check for unusual whitespace characters - if (isWhitespace(lexer.code_point)) { + '<' => { lexer.step(); - continue; - } + switch (lexer.code_point) { + '=' => { + lexer.step(); + lexer.token = T.t_less_than_less_than_equals; + }, + + else => { + lexer.token = T.t_less_than_less_than; + }, + } + }, + // Handle legacy HTML-style comments + '!' => { + if (std.mem.eql(u8, lexer.peek("--".len), "--")) { + lexer.addUnsupportedSyntaxError("Legacy HTML comments not implemented yet!"); + return; + } - if (isIdentifierStart(lexer.code_point)) { + lexer.token = T.t_less_than; + }, + + else => { + lexer.token = T.t_less_than; + }, + } + }, + + '>' => { + // '>' or '>>' or '>>>' or '>=' or '>>=' or '>>>=' + lexer.step(); + + switch (lexer.code_point) { + '=' => { lexer.step(); - while (isIdentifierContinue(lexer.code_point)) { - lexer.step(); + lexer.token = T.t_greater_than_equals; + }, + '>' => { + lexer.step(); + switch (lexer.code_point) { + '=' => { + lexer.step(); + lexer.token = T.t_greater_than_greater_than_equals; + }, + '>' => { + lexer.step(); + switch (lexer.code_point) { + '=' => { + lexer.step(); + lexer.token = T.t_greater_than_greater_than_greater_than_equals; + }, + else => { + lexer.token = T.t_greater_than_greater_than_greater_than; + }, + } + }, + else => { + lexer.token = T.t_greater_than_greater_than; + }, } - if (lexer.code_point == '\\') { + }, + else => { + lexer.token = T.t_greater_than; + }, + } + }, - // lexer.Identifier, lexer.Token = lexer.scanIdentifierWithEscapes(normalIdentifier); - } else { - lexer.token = T.t_identifier; - lexer.identifier = lexer.raw(); + '!' => { + // '!' or '!=' or '!==' + lexer.step(); + switch (lexer.code_point) { + '=' => { + lexer.step(); + switch (lexer.code_point) { + '=' => { + lexer.step(); + lexer.token = T.t_exclamation_equals_equals; + }, + + else => { + lexer.token = T.t_exclamation_equals; + }, } - break; - } + }, + else => { + lexer.token = T.t_exclamation; + }, + } + }, - lexer.end = lexer.current; - lexer.token = T.t_syntax_error; - }, - } + '\'', '"', '`' => { + lexer.parseStringLiteral(); + }, - return; - } - } + '_', '$', 'a'...'z', 'A'...'Z' => { + lexer.step(); + while (isIdentifierContinue(lexer.code_point)) { + lexer.step(); + } - pub fn expected(self: *@This(), token: T) void { - if (tokenToString.get(token).len > 0) { - self.expectedString(tokenToString.get(token)); - } else { - self.unexpected(); - } - } + if (lexer.code_point == '\\') { + lexer.scanIdentifierWithEscapes(); + } else { + const contents = lexer.raw(); + lexer.identifier = contents; + lexer.token = Keywords.get(contents) orelse T.t_identifier; + } + }, - pub fn unexpected(lexer: *@This()) void { - var found: string = undefined; - if (lexer.start == lexer.source.contents.len) { - found = "end of file"; - } else { - found = lexer.raw(); + '\\' => { + // TODO: normal + lexer.scanIdentifierWithEscapes(); + }, + + '.', '0'...'9' => { + lexer.parseNumericLiteralOrDot(); + }, + + else => { + // Check for unusual whitespace characters + if (isWhitespace(lexer.code_point)) { + lexer.step(); + continue; + } + + if (isIdentifierStart(lexer.code_point)) { + lexer.step(); + while (isIdentifierContinue(lexer.code_point)) { + lexer.step(); + } + if (lexer.code_point == '\\') { + + // lexer.Identifier, lexer.Token = lexer.scanIdentifierWithEscapes(normalIdentifier); + } else { + lexer.token = T.t_identifier; + lexer.identifier = lexer.raw(); + } + break; + } + + lexer.end = lexer.current; + lexer.token = T.t_syntax_error; + }, } - lexer.addRangeError(lexer.range(), "Unexpected {s}", .{found}, true); + return; } + } - pub fn raw(self: *@This()) []const u8 { - return self.source.contents[self.start..self.end]; + pub fn expected(self: *@This(), token: T) void { + if (tokenToString.get(token).len > 0) { + self.expectedString(tokenToString.get(token)); + } else { + self.unexpected(); } + } - pub fn isContextualKeyword(self: *@This(), comptime keyword: string) bool { - return self.token == .t_identifier and strings.eql(self.raw(), keyword); + pub fn unexpected(lexer: *@This()) void { + var found: string = undefined; + if (lexer.start == lexer.source.contents.len) { + found = "end of file"; + } else { + found = lexer.raw(); } - pub fn expectedString(self: *@This(), text: string) void { - var found = text; - if (self.source.contents.len == self.start) { - found = "end of file"; - } - self.addRangeError(self.range(), "Expected {s} but found {s}", .{ text, found }, true); + lexer.addRangeError(lexer.range(), "Unexpected {s}", .{found}, true); + } + + pub fn raw(self: *@This()) []const u8 { + return self.source.contents[self.start..self.end]; + } + + pub fn isContextualKeyword(self: *@This(), comptime keyword: string) bool { + return self.token == .t_identifier and strings.eql(self.raw(), keyword); + } + + pub fn expectedString(self: *@This(), text: string) void { + var found = self.raw(); + if (self.source.contents.len == self.start) { + found = "end of file"; } + self.addRangeError(self.range(), "Expected {s} but found {s}", .{ text, found }, true); + } + + pub fn scanCommentText(lexer: *@This()) void { + var text = lexer.source.contents[lexer.start..lexer.end]; + const has_preserve_annotation = text.len > 2 and text[2] == '!'; + const is_multiline_comment = text[1] == '*'; - pub fn scanCommentText(lexer: *@This()) void { - var text = lexer.source.contents[lexer.start..lexer.end]; - const has_preserve_annotation = text.len > 2 and text[2] == '!'; - const is_multiline_comment = text[1] == '*'; + // Omit the trailing "*/" from the checks below + var endCommentText = text.len; + if (is_multiline_comment) { + endCommentText -= 2; + } - // Omit the trailing "*/" from the checks below - var endCommentText = text.len; + if (has_preserve_annotation or lexer.preserve_all_comments_before) { if (is_multiline_comment) { - endCommentText -= 2; + // text = lexer.removeMultilineCommentIndent(lexer.source.contents[0..lexer.start], text); } - if (has_preserve_annotation or lexer.preserve_all_comments_before) { - if (is_multiline_comment) { - // text = lexer.removeMultilineCommentIndent(lexer.source.contents[0..lexer.start], text); - } - - lexer.comments_to_preserve_before.append(js_ast.G.Comment{ - .text = text, - .loc = lexer.loc(), - }) catch unreachable; - } + lexer.comments_to_preserve_before.append(js_ast.G.Comment{ + .text = text, + .loc = lexer.loc(), + }) catch unreachable; } + } - // TODO: implement this - // it's too complicated to handle all the edgecases right now given the state of Zig's standard library - pub fn removeMultilineCommentIndent(lexer: *@This(), _prefix: string, text: string) string { - return text; - } + // TODO: implement this + // it's too complicated to handle all the edgecases right now given the state of Zig's standard library + pub fn removeMultilineCommentIndent(lexer: *@This(), _prefix: string, text: string) string { + return text; + } - pub fn range(self: *@This()) logger.Range { - return logger.Range{ - .loc = logger.usize2Loc(self.start), - .len = std.math.lossyCast(i32, self.end - self.start), - }; - } + pub fn range(self: *@This()) logger.Range { + return logger.Range{ + .loc = logger.usize2Loc(self.start), + .len = std.math.lossyCast(i32, self.end - self.start), + }; + } - pub fn init(log: *logger.Log, source: *logger.Source, allocator: *std.mem.Allocator) !@This() { - var empty_string_literal: JavascriptString = undefined; - var lex = @This(){ - .log = log, - .source = source.*, - .string_literal = empty_string_literal, - .prev_error_loc = logger.Loc.Empty, - .allocator = allocator, - .comments_to_preserve_before = std.ArrayList(js_ast.G.Comment).init(allocator), - }; - lex.step(); - lex.next(); - - return lex; - } + pub fn initGlobalName(log: *logger.Log, source: *logger.Source, allocator: *std.mem.Allocator) !@This() { + var empty_string_literal: JavascriptString = undefined; + var lex = @This(){ + .log = log, + .source = source.*, + .string_literal = empty_string_literal, + .prev_error_loc = logger.Loc.Empty, + .allocator = allocator, + .comments_to_preserve_before = std.ArrayList(js_ast.G.Comment).init(allocator), + .for_global_name = true, + }; + lex.step(); + lex.next(); - pub fn scanRegExp(lexer: *@This()) void { - while (true) { - switch (lexer.code_point) { - '/' => { - lexer.step(); - while (isIdentifierContinue(lexer.code_point)) { - switch (lexer.code_point) { - 'g', 'i', 'm', 's', 'u', 'y' => { - lexer.step(); - }, - else => { - lexer.syntaxError(); - }, - } - } - }, - '[' => { - lexer.step(); - while (lexer.code_point != ']') { - lexer.scanRegExpValidateAndStep(); - } - lexer.step(); - }, - else => { - lexer.scanRegExpValidateAndStep(); - }, - } - } - } + return lex; + } - // TODO: use wtf-8 encoding. - pub fn stringToUTF16(lexer: *@This(), str: string) JavascriptString { - var buf: JavascriptString = lexer.allocator.alloc(u16, std.mem.len(str)) catch unreachable; - var i: usize = 0; - // theres prob a faster/better way - for (str) |char| { - buf[i] = char; - i += 1; - } + pub fn initTSConfig(log: *logger.Log, source: *logger.Source, allocator: *std.mem.Allocator) !@This() { + var empty_string_literal: JavascriptString = undefined; + var lex = @This(){ + .log = log, + .source = source.*, + .string_literal = empty_string_literal, + .prev_error_loc = logger.Loc.Empty, + .allocator = allocator, + .comments_to_preserve_before = std.ArrayList(js_ast.G.Comment).init(allocator), + .json_options = JSONOptions{ + .allow_comments = true, + .allow_trailing_commas = true, + }, + }; + lex.step(); + lex.next(); - return buf; - } + return lex; + } - // TODO: use wtf-8 encoding. - pub fn utf16ToStringWithValidation(lexer: *@This(), js: JavascriptString) !string { - return std.unicode.utf16leToUtf8Alloc(lexer.allocator, js); - } + pub fn initJSON(log: *logger.Log, source: *logger.Source, allocator: *std.mem.Allocator) !@This() { + var empty_string_literal: JavascriptString = undefined; + var lex = @This(){ + .log = log, + .source = source.*, + .string_literal = empty_string_literal, + .prev_error_loc = logger.Loc.Empty, + .allocator = allocator, + .comments_to_preserve_before = std.ArrayList(js_ast.G.Comment).init(allocator), + .json_options = JSONOptions{ + .allow_comments = false, + .allow_trailing_commas = false, + }, + }; + lex.step(); + lex.next(); - // TODO: use wtf-8 encoding. - pub fn utf16ToString(lexer: *@This(), js: JavascriptString) string { - return std.unicode.utf16leToUtf8Alloc(lexer.allocator, js) catch unreachable; - } + return lex; + } - pub fn nextInsideJSXElement() void { - std.debug.panic("JSX not implemented yet.", .{}); - } + pub fn init(log: *logger.Log, source: *logger.Source, allocator: *std.mem.Allocator) !@This() { + var empty_string_literal: JavascriptString = undefined; + var lex = @This(){ + .log = log, + .source = source.*, + .string_literal = empty_string_literal, + .prev_error_loc = logger.Loc.Empty, + .allocator = allocator, + .comments_to_preserve_before = std.ArrayList(js_ast.G.Comment).init(allocator), + }; + lex.step(); + lex.next(); - fn scanRegExpValidateAndStep(lexer: *@This()) void { - if (lexer.code_point == '\\') { - lexer.step(); - } + return lex; + } + pub fn scanRegExp(lexer: *@This()) void { + while (true) { switch (lexer.code_point) { - '\r', '\n', 0x2028, 0x2029 => { - // Newlines aren't allowed in regular expressions - lexer.syntaxError(); + '/' => { + lexer.step(); + while (isIdentifierContinue(lexer.code_point)) { + switch (lexer.code_point) { + 'g', 'i', 'm', 's', 'u', 'y' => { + lexer.step(); + }, + else => { + lexer.syntaxError(); + }, + } + } }, - -1 => { // EOF - lexer.syntaxError(); + '[' => { + lexer.step(); + while (lexer.code_point != ']') { + lexer.scanRegExpValidateAndStep(); + } + lexer.step(); }, else => { - lexer.step(); + lexer.scanRegExpValidateAndStep(); }, } } + } - pub fn rescanCloseBraceAsTemplateToken(lexer: *@This()) void { - if (lexer.token != .t_close_brace) { - lexer.expected(.t_close_brace); - } + // TODO: use wtf-8 encoding. + pub fn stringToUTF16(lexer: *@This(), str: string) JavascriptString { + var buf: JavascriptString = lexer.allocator.alloc(u16, std.mem.len(str)) catch unreachable; + var i: usize = 0; + // theres prob a faster/better way + for (str) |char| { + buf[i] = char; + i += 1; + } - lexer.rescan_close_brace_as_template_token = true; - lexer.code_point = '`'; - lexer.current = lexer.end; - lexer.end -= 1; - lexer.next(); - lexer.rescan_close_brace_as_template_token = false; + return buf; + } + + // TODO: use wtf-8 encoding. + pub fn utf16ToStringWithValidation(lexer: *@This(), js: JavascriptString) !string { + return std.unicode.utf16leToUtf8Alloc(lexer.allocator, js); + } + + // TODO: use wtf-8 encoding. + pub fn utf16ToString(lexer: *@This(), js: JavascriptString) string { + return std.unicode.utf16leToUtf8Alloc(lexer.allocator, js) catch unreachable; + } + + pub fn nextInsideJSXElement() void { + std.debug.panic("JSX not implemented yet.", .{}); + } + + fn scanRegExpValidateAndStep(lexer: *@This()) void { + if (lexer.code_point == '\\') { + lexer.step(); } - pub fn rawTemplateContents(lexer: *@This()) string { - var text: string = undefined; + switch (lexer.code_point) { + '\r', '\n', 0x2028, 0x2029 => { + // Newlines aren't allowed in regular expressions + lexer.syntaxError(); + }, + -1 => { // EOF + lexer.syntaxError(); + }, + else => { + lexer.step(); + }, + } + } - switch (lexer.token) { - .t_no_substitution_template_literal, .t_template_tail => { - text = lexer.source.contents[lexer.start + 1 .. lexer.end - 1]; - }, - .t_template_middle, .t_template_head => { - text = lexer.source.contents[lexer.start + 1 .. lexer.end - 2]; - }, - else => {}, - } + pub fn rescanCloseBraceAsTemplateToken(lexer: *@This()) void { + if (lexer.token != .t_close_brace) { + lexer.expected(.t_close_brace); + } - if (strings.indexOfChar(text, '\r') == null) { - return text; - } + lexer.rescan_close_brace_as_template_token = true; + lexer.code_point = '`'; + lexer.current = lexer.end; + lexer.end -= 1; + lexer.next(); + lexer.rescan_close_brace_as_template_token = false; + } - // From the specification: - // - // 11.8.6.1 Static Semantics: TV and TRV - // - // TV excludes the code units of LineContinuation while TRV includes - // them. <CR><LF> and <CR> LineTerminatorSequences are normalized to - // <LF> for both TV and TRV. An explicit EscapeSequence is needed to - // include a <CR> or <CR><LF> sequence. - var bytes = MutableString.initCopy(lexer.allocator, text) catch unreachable; - var end: usize = 0; - var i: usize = 0; - var c: u8 = '0'; - while (i < bytes.list.items.len) { - c = bytes.list.items[i]; - i += 1; + pub fn rawTemplateContents(lexer: *@This()) string { + var text: string = undefined; + + switch (lexer.token) { + .t_no_substitution_template_literal, .t_template_tail => { + text = lexer.source.contents[lexer.start + 1 .. lexer.end - 1]; + }, + .t_template_middle, .t_template_head => { + text = lexer.source.contents[lexer.start + 1 .. lexer.end - 2]; + }, + else => {}, + } - if (c == '\r') { - // Convert '\r\n' into '\n' - if (i < bytes.list.items.len and bytes.list.items[i] == '\n') { - i += 1; - } + if (strings.indexOfChar(text, '\r') == null) { + return text; + } - // Convert '\r' into '\n' - c = '\n'; + // From the specification: + // + // 11.8.6.1 Static Semantics: TV and TRV + // + // TV excludes the code units of LineContinuation while TRV includes + // them. <CR><LF> and <CR> LineTerminatorSequences are normalized to + // <LF> for both TV and TRV. An explicit EscapeSequence is needed to + // include a <CR> or <CR><LF> sequence. + var bytes = MutableString.initCopy(lexer.allocator, text) catch unreachable; + var end: usize = 0; + var i: usize = 0; + var c: u8 = '0'; + while (i < bytes.list.items.len) { + c = bytes.list.items[i]; + i += 1; + + if (c == '\r') { + // Convert '\r\n' into '\n' + if (i < bytes.list.items.len and bytes.list.items[i] == '\n') { + i += 1; } - bytes.list.items[end] = c; - end += 1; + // Convert '\r' into '\n' + c = '\n'; } - return bytes.toOwnedSliceLength(end + 1); + bytes.list.items[end] = c; + end += 1; } - fn parseNumericLiteralOrDot(lexer: *@This()) void { - // Number or dot; - var first = lexer.code_point; - lexer.step(); - - // Dot without a digit after it; - if (first == '.' and (lexer.code_point < '0' or lexer.code_point > '9')) { - // "..." - if ((lexer.code_point == '.' and - lexer.current < lexer.source.contents.len) and - lexer.source.contents[lexer.current] == '.') - { - lexer.step(); - lexer.step(); - lexer.token = T.t_dot_dot_dot; - return; - } + return bytes.toOwnedSliceLength(end + 1); + } - // "." - lexer.token = T.t_dot; + fn parseNumericLiteralOrDot(lexer: *@This()) void { + // Number or dot; + var first = lexer.code_point; + lexer.step(); + + // Dot without a digit after it; + if (first == '.' and (lexer.code_point < '0' or lexer.code_point > '9')) { + // "..." + if ((lexer.code_point == '.' and + lexer.current < lexer.source.contents.len) and + lexer.source.contents[lexer.current] == '.') + { + lexer.step(); + lexer.step(); + lexer.token = T.t_dot_dot_dot; return; } - var underscoreCount: usize = 0; - var lastUnderscoreEnd: usize = 0; - var hasDotOrExponent = first == '.'; - var base: f32 = 0.0; - lexer.is_legacy_octal_literal = false; + // "." + lexer.token = T.t_dot; + return; + } + + var underscoreCount: usize = 0; + var lastUnderscoreEnd: usize = 0; + var hasDotOrExponent = first == '.'; + var base: f32 = 0.0; + lexer.is_legacy_octal_literal = false; + + // Assume this is a number, but potentially change to a bigint later; + lexer.token = T.t_numeric_literal; + + // Check for binary, octal, or hexadecimal literal; + if (first == '0') { + switch (lexer.code_point) { + 'b', 'B' => { + base = 2; + }, + + 'o', 'O' => { + base = 8; + }, + + 'x', 'X' => { + base = 16; + }, + + '0'...'7', '_' => { + base = 8; + lexer.is_legacy_octal_literal = true; + }, + else => {}, + } + } - // Assume this is a number, but potentially change to a bigint later; - lexer.token = T.t_numeric_literal; + if (base != 0) { + // Integer literal; + var isFirst = true; + var isInvalidLegacyOctalLiteral = false; + lexer.number = 0; + if (!lexer.is_legacy_octal_literal) { + lexer.step(); + } - // Check for binary, octal, or hexadecimal literal; - if (first == '0') { + integerLiteral: while (true) { switch (lexer.code_point) { - 'b', 'B' => { - base = 2; + '_' => { + // Cannot have multiple underscores in a row; + if (lastUnderscoreEnd > 0 and lexer.end == lastUnderscoreEnd + 1) { + lexer.syntaxError(); + } + + // The first digit must exist; + if (isFirst or lexer.is_legacy_octal_literal) { + lexer.syntaxError(); + } + + lastUnderscoreEnd = lexer.end; + underscoreCount += 1; }, - 'o', 'O' => { - base = 8; + '0', '1' => { + lexer.number = lexer.number * base + float64(lexer.code_point - '0'); }, - 'x', 'X' => { - base = 16; + '2', '3', '4', '5', '6', '7' => { + if (base == 2) { + lexer.syntaxError(); + } + lexer.number = lexer.number * base + float64(lexer.code_point - '0'); + }, + '8', '9' => { + if (lexer.is_legacy_octal_literal) { + isInvalidLegacyOctalLiteral = true; + } else if (base < 10) { + lexer.syntaxError(); + } + lexer.number = lexer.number * base + float64(lexer.code_point - '0'); + }, + 'A', 'B', 'C', 'D', 'E', 'F' => { + if (base != 16) { + lexer.syntaxError(); + } + lexer.number = lexer.number * base + float64(lexer.code_point + 10 - 'A'); }, - '0'...'7', '_' => { - base = 8; - lexer.is_legacy_octal_literal = true; + 'a', 'b', 'c', 'd', 'e', 'f' => { + if (base != 16) { + lexer.syntaxError(); + } + lexer.number = lexer.number * base + float64(lexer.code_point + 10 - 'a'); }, - else => {}, - } - } + else => { + // The first digit must exist; + if (isFirst) { + lexer.syntaxError(); + } - if (base != 0) { - // Integer literal; - var isFirst = true; - var isInvalidLegacyOctalLiteral = false; - lexer.number = 0; - if (!lexer.is_legacy_octal_literal) { - lexer.step(); + break :integerLiteral; + }, } - integerLiteral: while (true) { - switch (lexer.code_point) { - '_' => { - // Cannot have multiple underscores in a row; - if (lastUnderscoreEnd > 0 and lexer.end == lastUnderscoreEnd + 1) { - lexer.syntaxError(); - } - - // The first digit must exist; - if (isFirst or lexer.is_legacy_octal_literal) { - lexer.syntaxError(); - } - - lastUnderscoreEnd = lexer.end; - underscoreCount += 1; - }, + lexer.step(); + isFirst = false; + } - '0', '1' => { - lexer.number = lexer.number * base + float64(lexer.code_point - '0'); - }, + var isBigIntegerLiteral = lexer.code_point == 'n' and !hasDotOrExponent; - '2', '3', '4', '5', '6', '7' => { - if (base == 2) { - lexer.syntaxError(); - } - lexer.number = lexer.number * base + float64(lexer.code_point - '0'); - }, - '8', '9' => { - if (lexer.is_legacy_octal_literal) { - isInvalidLegacyOctalLiteral = true; - } else if (base < 10) { - lexer.syntaxError(); - } - lexer.number = lexer.number * base + float64(lexer.code_point - '0'); - }, - 'A', 'B', 'C', 'D', 'E', 'F' => { - if (base != 16) { - lexer.syntaxError(); - } - lexer.number = lexer.number * base + float64(lexer.code_point + 10 - 'A'); - }, + // Slow path: do we need to re-scan the input as text? + if (isBigIntegerLiteral or isInvalidLegacyOctalLiteral) { + var text = lexer.raw(); - 'a', 'b', 'c', 'd', 'e', 'f' => { - if (base != 16) { - lexer.syntaxError(); - } - lexer.number = lexer.number * base + float64(lexer.code_point + 10 - 'a'); - }, - else => { - // The first digit must exist; - if (isFirst) { - lexer.syntaxError(); - } + // Can't use a leading zero for bigint literals; + if (isBigIntegerLiteral and lexer.is_legacy_octal_literal) { + lexer.syntaxError(); + } - break :integerLiteral; - }, + // Filter out underscores; + if (underscoreCount > 0) { + var bytes = lexer.allocator.alloc(u8, text.len - underscoreCount) catch unreachable; + var i: usize = 0; + for (text) |char| { + if (char != '_') { + bytes[i] = char; + i += 1; + } } - - lexer.step(); - isFirst = false; } - var isBigIntegerLiteral = lexer.code_point == 'n' and !hasDotOrExponent; + // Store bigints as text to avoid precision loss; + if (isBigIntegerLiteral) { + lexer.identifier = text; + } else if (isInvalidLegacyOctalLiteral) { + if (std.fmt.parseFloat(f64, text)) |num| { + lexer.number = num; + } else |err| { + lexer.addError(lexer.start, "Invalid number {s}", .{text}, true); + } + } + } + } else { + // Floating-point literal; + var isInvalidLegacyOctalLiteral = first == '0' and (lexer.code_point == '8' or lexer.code_point == '9'); - // Slow path: do we need to re-scan the input as text? - if (isBigIntegerLiteral or isInvalidLegacyOctalLiteral) { - var text = lexer.raw(); + // Initial digits; + while (true) { + if (lexer.code_point < '0' or lexer.code_point > '9') { + if (lexer.code_point != '_') { + break; + } - // Can't use a leading zero for bigint literals; - if (isBigIntegerLiteral and lexer.is_legacy_octal_literal) { + // Cannot have multiple underscores in a row; + if (lastUnderscoreEnd > 0 and lexer.end == lastUnderscoreEnd + 1) { lexer.syntaxError(); } - // Filter out underscores; - if (underscoreCount > 0) { - var bytes = lexer.allocator.alloc(u8, text.len - underscoreCount) catch unreachable; - var i: usize = 0; - for (text) |char| { - if (char != '_') { - bytes[i] = char; - i += 1; - } - } + // The specification forbids underscores in this case; + if (isInvalidLegacyOctalLiteral) { + lexer.syntaxError(); } - // Store bigints as text to avoid precision loss; - if (isBigIntegerLiteral) { - lexer.identifier = text; - } else if (isInvalidLegacyOctalLiteral) { - if (std.fmt.parseFloat(f64, text)) |num| { - lexer.number = num; - } else |err| { - lexer.addError(lexer.start, "Invalid number {s}", .{text}, true); - } - } + lastUnderscoreEnd = lexer.end; + underscoreCount += 1; } - } else { - // Floating-point literal; - var isInvalidLegacyOctalLiteral = first == '0' and (lexer.code_point == '8' or lexer.code_point == '9'); + lexer.step(); + } - // Initial digits; + // Fractional digits; + if (first != '.' and lexer.code_point == '.') { + // An underscore must not come last; + if (lastUnderscoreEnd > 0 and lexer.end == lastUnderscoreEnd + 1) { + lexer.end -= 1; + lexer.syntaxError(); + } + + hasDotOrExponent = true; + lexer.step(); + if (lexer.code_point == '_') { + lexer.syntaxError(); + } while (true) { if (lexer.code_point < '0' or lexer.code_point > '9') { if (lexer.code_point != '_') { @@ -1290,151 +1389,110 @@ pub fn NewLexerType(comptime jsonOptions: ?JSONOptions) type { lexer.syntaxError(); } - // The specification forbids underscores in this case; - if (isInvalidLegacyOctalLiteral) { - lexer.syntaxError(); - } - lastUnderscoreEnd = lexer.end; underscoreCount += 1; } lexer.step(); } + } - // Fractional digits; - if (first != '.' and lexer.code_point == '.') { - // An underscore must not come last; - if (lastUnderscoreEnd > 0 and lexer.end == lastUnderscoreEnd + 1) { - lexer.end -= 1; - lexer.syntaxError(); - } - - hasDotOrExponent = true; - lexer.step(); - if (lexer.code_point == '_') { - lexer.syntaxError(); - } - while (true) { - if (lexer.code_point < '0' or lexer.code_point > '9') { - if (lexer.code_point != '_') { - break; - } - - // Cannot have multiple underscores in a row; - if (lastUnderscoreEnd > 0 and lexer.end == lastUnderscoreEnd + 1) { - lexer.syntaxError(); - } - - lastUnderscoreEnd = lexer.end; - underscoreCount += 1; - } - lexer.step(); - } + // Exponent; + if (lexer.code_point == 'e' or lexer.code_point == 'E') { + // An underscore must not come last; + if (lastUnderscoreEnd > 0 and lexer.end == lastUnderscoreEnd + 1) { + lexer.end -= 1; + lexer.syntaxError(); } - // Exponent; - if (lexer.code_point == 'e' or lexer.code_point == 'E') { - // An underscore must not come last; - if (lastUnderscoreEnd > 0 and lexer.end == lastUnderscoreEnd + 1) { - lexer.end -= 1; - lexer.syntaxError(); - } - - hasDotOrExponent = true; + hasDotOrExponent = true; + lexer.step(); + if (lexer.code_point == '+' or lexer.code_point == '-') { lexer.step(); - if (lexer.code_point == '+' or lexer.code_point == '-') { - lexer.step(); - } + } + if (lexer.code_point < '0' or lexer.code_point > '9') { + lexer.syntaxError(); + } + while (true) { if (lexer.code_point < '0' or lexer.code_point > '9') { - lexer.syntaxError(); - } - while (true) { - if (lexer.code_point < '0' or lexer.code_point > '9') { - if (lexer.code_point != '_') { - break; - } - - // Cannot have multiple underscores in a row; - if (lastUnderscoreEnd > 0 and lexer.end == lastUnderscoreEnd + 1) { - lexer.syntaxError(); - } + if (lexer.code_point != '_') { + break; + } - lastUnderscoreEnd = lexer.end; - underscoreCount += 1; + // Cannot have multiple underscores in a row; + if (lastUnderscoreEnd > 0 and lexer.end == lastUnderscoreEnd + 1) { + lexer.syntaxError(); } - lexer.step(); + + lastUnderscoreEnd = lexer.end; + underscoreCount += 1; } + lexer.step(); } + } - // Take a slice of the text to parse; - var text = lexer.raw(); + // Take a slice of the text to parse; + var text = lexer.raw(); - // Filter out underscores; - if (underscoreCount > 0) { - var i: usize = 0; - if (lexer.allocator.alloc(u8, text.len - underscoreCount)) |bytes| { - for (text) |char| { - if (char != '_') { - bytes[i] = char; - i += 1; - } + // Filter out underscores; + if (underscoreCount > 0) { + var i: usize = 0; + if (lexer.allocator.alloc(u8, text.len - underscoreCount)) |bytes| { + for (text) |char| { + if (char != '_') { + bytes[i] = char; + i += 1; } - text = bytes; - } else |err| { - lexer.addError(lexer.start, "Out of Memory Wah Wah Wah", .{}, true); - return; } + text = bytes; + } else |err| { + lexer.addError(lexer.start, "Out of Memory Wah Wah Wah", .{}, true); + return; } + } - if (lexer.code_point == 'n' and !hasDotOrExponent) { - // The only bigint literal that can start with 0 is "0n" - if (text.len > 1 and first == '0') { - lexer.syntaxError(); - } - - // Store bigints as text to avoid precision loss; - lexer.identifier = text; - } else if (!hasDotOrExponent and lexer.end - lexer.start < 10) { - // Parse a 32-bit integer (very fast path); - var number: u32 = 0; - for (text) |c| { - number = number * 10 + @intCast(u32, c - '0'); - } - lexer.number = @intToFloat(f64, number); - } else { - // Parse a double-precision floating-point number; - if (std.fmt.parseFloat(f64, text)) |num| { - lexer.number = num; - } else |err| { - lexer.addError(lexer.start, "Invalid number", .{}, true); - } + if (lexer.code_point == 'n' and !hasDotOrExponent) { + // The only bigint literal that can start with 0 is "0n" + if (text.len > 1 and first == '0') { + lexer.syntaxError(); } - } - // An underscore must not come last; - if (lastUnderscoreEnd > 0 and lexer.end == lastUnderscoreEnd + 1) { - lexer.end -= 1; - lexer.syntaxError(); + // Store bigints as text to avoid precision loss; + lexer.identifier = text; + } else if (!hasDotOrExponent and lexer.end - lexer.start < 10) { + // Parse a 32-bit integer (very fast path); + var number: u32 = 0; + for (text) |c| { + number = number * 10 + @intCast(u32, c - '0'); + } + lexer.number = @intToFloat(f64, number); + } else { + // Parse a double-precision floating-point number; + if (std.fmt.parseFloat(f64, text)) |num| { + lexer.number = num; + } else |err| { + lexer.addError(lexer.start, "Invalid number", .{}, true); + } } + } - // Handle bigint literals after the underscore-at-end check above; - if (lexer.code_point == 'n' and !hasDotOrExponent) { - lexer.token = T.t_big_integer_literal; - lexer.step(); - } + // An underscore must not come last; + if (lastUnderscoreEnd > 0 and lexer.end == lastUnderscoreEnd + 1) { + lexer.end -= 1; + lexer.syntaxError(); + } - // Identifiers can't occur immediately after numbers; - if (isIdentifierStart(lexer.code_point)) { - lexer.syntaxError(); - } + // Handle bigint literals after the underscore-at-end check above; + if (lexer.code_point == 'n' and !hasDotOrExponent) { + lexer.token = T.t_big_integer_literal; + lexer.step(); } - }; -} -// JS/TS lexer -pub const Lexer = NewLexerType(null); -pub const JSONLexer = NewLexerType(JSONOptions{ .allow_comments = false, .allow_trailing_commas = false }); -pub const TSConfigJSONLexer = NewLexerType(JSONOptions{ .allow_comments = true, .allow_trailing_commas = true }); + // Identifiers can't occur immediately after numbers; + if (isIdentifierStart(lexer.code_point)) { + lexer.syntaxError(); + } + } +}; pub fn isIdentifierStart(codepoint: CodePoint) bool { switch (codepoint) { diff --git a/src/js_lexer_tables.zig b/src/js_lexer_tables.zig index 0053b7234..4b9977699 100644 --- a/src/js_lexer_tables.zig +++ b/src/js_lexer_tables.zig @@ -231,6 +231,13 @@ pub const PropertyModifierKeyword = enum { }); }; +pub const TypeScriptAccessibilityModifier = std.ComptimeStringMap(u1, .{ + .{ "public", 1 }, + .{ "private", 1 }, + .{ "protected", 1 }, + .{ "readonly", 1 }, +}); + pub const TokenEnumType = std.EnumArray(T, []u8); pub const tokenToString = comptime { @@ -532,6 +539,7 @@ pub var jsxEntity: JSXEntityMap = undefined; pub fn initJSXEntityMap() !void { jsxEntity = JSXEntityMap.init(alloc.dynamic); + // return jsxEntity; jsxEntity.ensureCapacity(255) catch unreachable; jsxEntity.putAssumeCapacity("quot", @as(CodePoint, 0x0022)); @@ -793,14 +801,14 @@ test "tokenToString" { expectString(tokenToString.get(T.t_end_of_file), "end of file"); } -test "jsxEntity" { - try alloc.setup(std.heap.page_allocator); +// test "jsxEntity" { +// try alloc.setup(std.heap.page_allocator); - initJSXEntityMap() catch |err| { - @panic(@errorName(err)); - }; +// initJSXEntityMap() catch |err| { +// @panic(@errorName(err)); +// }; - if (jsxEntity.get("sim")) |v| { - expect(v == 0x223C); - } -} +// if (jsxEntity.get("sim")) |v| { +// expect(v == 0x223C); +// } +// } diff --git a/src/js_parser.zig b/src/js_parser.zig index c36915e05..b0339e1aa 100644 --- a/src/js_parser.zig +++ b/src/js_parser.zig @@ -6,6 +6,9 @@ const js_ast = @import("js_ast.zig"); const options = @import("options.zig"); const alloc = @import("alloc.zig"); +const js_printer = @import("js_printer.zig"); +const renamer = @import("renamer.zig"); + const fs = @import("fs.zig"); usingnamespace @import("strings.zig"); usingnamespace @import("ast/base.zig"); @@ -41,6 +44,71 @@ const ExprOrLetStmt = struct { decls: []G.Decl = &([_]G.Decl{}), }; +const FunctionKind = enum { stmt, expr }; + +const EightLetterMatcher = strings.ExactSizeMatcher(8); + +const AsyncPrefixExpression = enum { + none, + is_yield, + is_async, + is_await, + + pub fn find(ident: string) AsyncPrefixExpression { + if (ident.len != 5) { + return .none; + } + + switch (EightLetterMatcher.match(ident)) { + EightLetterMatcher.case("yield") => { + return .is_yield; + }, + EightLetterMatcher.case("await") => { + return .is_await; + }, + EightLetterMatcher.case("async") => { + return .is_async; + }, + + else => { + return .none; + }, + } + } +}; + +fn statementCaresAboutScope(stmt: Stmt) bool { + switch (stmt.data) { + .s_block, + .s_empty, + .s_debugger, + .s_expr, + .s_if, + .s_for, + .s_for_in, + .s_for_of, + .s_do_while, + .s_while, + .s_with, + .s_try, + .s_switch, + .s_return, + .s_throw, + .s_break, + .s_continue, + .s_directive, + => { + return false; + }, + .s_local => |s| { + return s.kind != .k_var; + }, + else => { + return true; + }, + } +} + const ExprIn = struct { // This tells us if there are optional chain expressions (EDot, EIndex, or // ECall) that are chained on to this expression. Because of the way the AST @@ -205,13 +273,19 @@ const ParenExprOpts = struct { force_arrow_fn: bool = false, }; +const AwaitOrYield = enum { + allow_ident, + allow_expr, + forbid_all, +}; + // This is function-specific information used during parsing. It is saved and // restored on the call stack around code that parses nested functions and // arrow expressions. const FnOrArrowDataParse = struct { async_range: ?logger.Range = null, - allow_await: bool = false, - allow_yield: bool = false, + allow_await: AwaitOrYield = AwaitOrYield.allow_ident, + allow_yield: AwaitOrYield = AwaitOrYield.allow_ident, allow_super_call: bool = false, is_top_level: bool = false, is_constructor: bool = false, @@ -225,7 +299,7 @@ const FnOrArrowDataParse = struct { allow_ts_decorators: bool = false, pub fn i() FnOrArrowDataParse { - return FnOrArrowDataParse{ .allow_await = false }; + return FnOrArrowDataParse{ .allow_await = AwaitOrYield.forbid_all }; } }; @@ -352,7 +426,7 @@ pub const Parser = struct { pub const Options = struct { jsx: options.JSX, - ts: bool = true, + ts: bool = false, ascii_only: bool = true, keep_names: bool = true, mangle_syntax: bool = false, @@ -375,8 +449,10 @@ pub const Parser = struct { if (self.p) |p| { // Parse the file in the first pass, but do not bind symbols var opts = ParseStatementOptions{ .is_module_scope = true }; + debugl("<p.parseStmtsUpTo>"); const stmts = try p.parseStmtsUpTo(js_lexer.T.t_end_of_file, &opts); - try p.prepareForVisitPass(); + debugl("</p.parseStmtsUpTo>"); + // try p.prepareForVisitPass(); // ESM is always strict mode. I don't think we need this. // // Strip off a leading "use strict" directive when not bundling @@ -398,18 +474,21 @@ pub const Parser = struct { }, logger.Loc.Empty); } + debugl("<p.appendPart>"); var parts = try List(js_ast.Part).initCapacity(p.allocator, 1); - try p.appendPart(parts, stmts); + try p.appendPart(&parts, stmts); + debugl("</p.appendPart>"); // Pop the module scope to apply the "ContainsDirectEval" rules - p.popScope(); - + // p.popScope(); + debugl("<result.Ast>"); result.ast = js_ast.Ast{ .parts = parts.toOwnedSlice(), .symbols = p.symbols.toOwnedSlice(), - .module_scope = p.module_scope.*, + // .module_scope = p.module_scope.*, }; result.ok = true; + debugl("</result.Ast>"); // result = p.toAST(parts); // result.source_map_comment = p.lexer.source_mapping_url; @@ -422,9 +501,9 @@ pub const Parser = struct { const lexer = try js_lexer.Lexer.init(log, source, allocator); return Parser{ .options = Options{ - .ts = transform.ts, + .ts = transform.loader == .tsx or transform.loader == .ts, .jsx = options.JSX{ - .parse = true, + .parse = transform.loader == .tsx or transform.loader == .jsx, .factory = transform.jsx_factory, .fragment = transform.jsx_fragment, }, @@ -438,6 +517,8 @@ pub const Parser = struct { } }; +const FindLabelSymbolResult = struct { ref: Ref, is_loop: bool, found: bool = false }; + const FindSymbolResult = struct { ref: Ref, declare_loc: ?logger.Loc = null, @@ -472,7 +553,8 @@ const ParseStatementOptions = struct { }; // P is for Parser! -const P = struct { +// public only because of Binding.ToExpr +pub const P = struct { allocator: *std.mem.Allocator, options: Parser.Options, log: *logger.Log, @@ -491,7 +573,7 @@ const P = struct { allocated_names: List(string), latest_arrow_arg_loc: logger.Loc = logger.Loc.Empty, forbid_suffix_after_as_loc: logger.Loc = logger.Loc.Empty, - current_scope: *js_ast.Scope = null, + current_scope: *js_ast.Scope = undefined, scopes_for_current_part: List(*js_ast.Scope), symbols: List(js_ast.Symbol), ts_use_counts: List(u32), @@ -580,10 +662,10 @@ const P = struct { // The visit pass binds identifiers to declared symbols, does constant // folding, substitutes compile-time variable definitions, and lowers certain // syntactic constructs as appropriate. - stmt_expr_value: js_ast.E, - call_target: js_ast.E, - delete_target: js_ast.E, - loop_body: js_ast.S, + stmt_expr_value: Expr.Data, + call_target: Expr.Data, + delete_target: Expr.Data, + loop_body: Stmt.Data, module_scope: *js_ast.Scope = undefined, is_control_flow_dead: bool = false, @@ -647,6 +729,9 @@ const P = struct { // warnings about non-string import paths will be omitted inside try blocks. await_target: ?js_ast.E = null, + to_expr_wrapper_namespace: Binding2ExprWrapper.Namespace, + to_expr_wrapper_hoisted: Binding2ExprWrapper.Hoisted, + // This helps recognize the "import().catch()" pattern. We also try to avoid // warning about this just like the "try { await import() }" pattern. then_catch_chain: ThenCatchChain, @@ -680,6 +765,11 @@ const P = struct { // after_arrow_body_loc: logger.Loc = logger.Loc.Empty, + const Binding2ExprWrapper = struct { + pub const Namespace = Binding.ToExpr(P, P.wrapIdentifierNamespace); + pub const Hoisted = Binding.ToExpr(P, P.wrapIdentifierHoisting); + }; + pub fn s(p: *P, t: anytype, loc: logger.Loc) Stmt { if (@typeInfo(@TypeOf(t)) == .Pointer) { return Stmt.init(t, loc); @@ -932,6 +1022,10 @@ const P = struct { } p.hoistSymbols(p.module_scope); + + p.require_ref = try p.newSymbol(.unbound, "require"); + p.exports_ref = try p.newSymbol(.hoisted, "exports"); + p.module_ref = try p.newSymbol(.hoisted, "module"); } pub fn hoistSymbols(p: *P, scope: *js_ast.Scope) void { @@ -963,7 +1057,7 @@ const P = struct { // Sanity-check that the scopes generated by the first and second passes match if (!order.loc.eql(loc) or order.scope.kind != kind) { - std.debug.panic("Expected scope ({s}, {d}) in {s}, found scope ({s}, {d})", .{ kind, loc.start, p.source.path.pretty, order.scope.kind, order.loc.start }); + p.panic("Expected scope ({s}, {d}) in {s}, found scope ({s}, {d})", .{ kind, loc.start, p.source.path.pretty, order.scope.kind, order.loc.start }); } p.current_scope = order.scope; @@ -972,17 +1066,21 @@ const P = struct { } pub fn pushScopeForParsePass(p: *P, kind: js_ast.Scope.Kind, loc: logger.Loc) !usize { - var parent = p.current_scope; - var scope = try js_ast.Scope.initPtr(p.allocator); + debugl("<pushScopeForParsePass>"); + defer debugl("</pushScopeForParsePass>"); + var scope = try Scope.initPtr(p.allocator); scope.kind = kind; - scope.parent = parent; - scope.label_ref = null; - var i = parent.children.items.len; + var parent: *Scope = undefined; + + if (kind != .entry) { + parent = p.current_scope; + scope.parent = parent; + try parent.children.append(scope); + scope.strict_mode = parent.strict_mode; + } - try parent.children.append(scope); - scope.strict_mode = parent.strict_mode; p.current_scope = scope; // Enforce that scope locations are strictly increasing to help catch bugs @@ -990,7 +1088,7 @@ const P = struct { if (p.scopes_in_order.items.len > 0) { const prev_start = p.scopes_in_order.items[p.scopes_in_order.items.len - 1].loc.start; if (prev_start >= loc.start) { - std.debug.panic("Scope location {d} must be greater than {d}", .{ loc.start, prev_start }); + p.panic("Scope location {d} must be greater than {d}", .{ loc.start, prev_start }); } } @@ -999,7 +1097,7 @@ const P = struct { // arguments. if (kind == js_ast.Scope.Kind.function_body) { if (parent.kind != js_ast.Scope.Kind.function_args) { - std.debug.panic("Internal error", .{}); + p.panic("Internal error", .{}); } var iter = scope.parent.?.members.iterator(); @@ -1013,7 +1111,11 @@ const P = struct { } } - return i; + // Remember the length in case we call popAndDiscardScope() later + const scope_index = p.scopes_in_order.items.len; + try p.scopes_in_order.append(ScopeOrder{ .loc = loc, .scope = scope }); + + return scope_index; } // Note: do not write to "p.log" in this function. Any errors due to conversion @@ -1086,8 +1188,8 @@ const P = struct { .is_computed = item.flags.is_computed, }, - .key = item.key orelse std.debug.panic("Internal error: Expected {s} to have a key.", .{item}), - .value = tup.binding orelse std.debug.panic("Internal error: Expected {s} to have a binding.", .{tup}), + .key = item.key orelse p.panic("Internal error: Expected {s} to have a key.", .{item}), + .value = tup.binding orelse p.panic("Internal error: Expected {s} to have a binding.", .{tup}), .default_value = initializer, }) catch unreachable; } @@ -1209,8 +1311,8 @@ const P = struct { var scopeIndex = try p.pushScopeForParsePass(js_ast.Scope.Kind.function_args, p.lexer.loc()); var func = p.parseFn(name, FnOrArrowDataParse{ .async_range = asyncRange, - .allow_await = isAsync, - .allow_yield = isGenerator, + .allow_await = if (isAsync) AwaitOrYield.allow_expr else AwaitOrYield.allow_ident, + .allow_yield = if (isGenerator) AwaitOrYield.allow_expr else AwaitOrYield.allow_ident, .is_typescript_declare = opts.is_typescript_declare, // Only allow omitting the body if we're parsing TypeScript @@ -1220,8 +1322,38 @@ const P = struct { // Don't output anything if it's just a forward declaration of a function if (opts.is_typescript_declare or func.body == null) { p.popAndDiscardScope(scopeIndex); + + // Balance the fake block scope introduced above + if (hasIfScope) { + p.popScope(); + } + + if (opts.is_typescript_declare and opts.is_namespace_scope and opts.is_export) { + p.has_non_local_export_declare_inside_namespace = true; + } + + return p.s(S.TypeScript{}, loc); } + // Balance the fake block scope introduced above + if (hasIfScope) { + p.popScope(); + } + + // Only declare the function after we know if it had a body or not. Otherwise + // TypeScript code such as this will double-declare the symbol: + // + // function foo(): void; + // function foo(): void {} + // + if (name) |*name_| { + const kind = if (isGenerator or isAsync) Symbol.Kind.generator_or_async_function else Symbol.Kind.hoisted_function; + name_.ref = try p.declareSymbol(kind, name_.loc, nameText); + } + func.name = name; + + func.flags.has_if_scope = hasIfScope; + func.flags.is_export = opts.is_export; return p.s(S.Function{ @@ -1243,7 +1375,7 @@ const P = struct { // Remove the last child from the parent scope var last = children.items.len - 1; if (children.items[last] != to_discard) { - std.debug.panic("Internal error", .{}); + p.panic("Internal error", .{}); } _ = children.popOrNull(); @@ -1258,8 +1390,8 @@ const P = struct { .name = name, .flags = Flags.Function{ .has_rest_arg = false, - .is_async = opts.allow_await, - .is_generator = opts.allow_yield, + .is_async = opts.allow_await == .allow_expr, + .is_generator = opts.allow_yield == .allow_expr, }, .arguments_ref = null, @@ -1269,12 +1401,12 @@ const P = struct { // Await and yield are not allowed in function arguments var old_fn_or_arrow_data = opts; - p.fn_or_arrow_data_parse.allow_await = false; - p.fn_or_arrow_data_parse.allow_yield = false; + p.fn_or_arrow_data_parse.allow_await = if (opts.allow_await == .allow_expr) AwaitOrYield.forbid_all else AwaitOrYield.allow_ident; + p.fn_or_arrow_data_parse.allow_yield = if (opts.allow_yield == .allow_expr) AwaitOrYield.forbid_all else AwaitOrYield.allow_ident; // If "super()" is allowed in the body, it's allowed in the arguments p.fn_or_arrow_data_parse.allow_super_call = opts.allow_super_call; - + var args = List(G.Arg).init(p.allocator); while (p.lexer.token != T.t_close_paren) { // Skip over "this" type annotations if (p.options.ts and p.lexer.token == T.t_this) { @@ -1290,23 +1422,126 @@ const P = struct { p.lexer.next(); continue; } + + var ts_decorators: []ExprNodeIndex = undefined; + if (opts.allow_ts_decorators) { + ts_decorators = p.parseTypeScriptDecorators(); + } + + if (!func.flags.has_rest_arg and p.lexer.token == T.t_dot_dot_dot) { + // p.markSyntaxFeature + p.lexer.next(); + func.flags.has_rest_arg = true; + } + + var is_typescript_ctor_field = false; + var is_identifier = p.lexer.token == T.t_identifier; + var text = p.lexer.identifier; + var arg = p.parseBinding(); + + if (p.options.ts and is_identifier and opts.is_constructor) { + // Skip over TypeScript accessibility modifiers, which turn this argument + // into a class field when used inside a class constructor. This is known + // as a "parameter property" in TypeScript. + while (true) { + switch (p.lexer.token) { + .t_identifier, .t_open_brace, .t_open_bracket => { + if (!js_lexer.TypeScriptAccessibilityModifier.has(p.lexer.identifier)) { + break; + } + + is_typescript_ctor_field = true; + + // TypeScript requires an identifier binding + if (p.lexer.token != .t_identifier) { + p.lexer.expect(.t_identifier); + } + text = p.lexer.identifier; + + // Re-parse the binding (the current binding is the TypeScript keyword) + arg = p.parseBinding(); + }, + else => { + break; + }, + } + } + + // "function foo(a?) {}" + if (p.lexer.token == .t_question) { + p.lexer.next(); + } + + // "function foo(a: any) {}" + if (p.lexer.token == .t_colon) { + p.lexer.next(); + p.skipTypescriptType(.lowest); + } + } + + var parseStmtOpts = ParseStatementOptions{}; + p.declareBinding(.hoisted, arg, &parseStmtOpts) catch unreachable; + + var default_value: Expr = undefined; + if (!func.flags.has_rest_arg and p.lexer.token == .t_equals) { + // p.markSyntaxFeature + p.lexer.next(); + default_value = p.parseExpr(.comma); + } + + args.append(G.Arg{ + .ts_decorators = ts_decorators, + .binding = arg, + .default = default_value, + + // We need to track this because it affects code generation + .is_typescript_ctor_field = is_typescript_ctor_field, + }) catch unreachable; + + if (p.lexer.token != .t_comma) { + break; + } + + if (func.flags.has_rest_arg) { + // JavaScript does not allow a comma after a rest argument + if (opts.is_typescript_declare) { + // TypeScript does allow a comma after a rest argument in a "declare" context + p.lexer.next(); + } else { + p.lexer.expect(.t_close_paren); + } + + break; + } + + p.lexer.next(); } - var ts_decorators: []ExprNodeIndex = undefined; - if (opts.allow_ts_decorators) { - ts_decorators = p.parseTypeScriptDecorators(); + // Reserve the special name "arguments" in this scope. This ensures that it + // shadows any variable called "arguments" in any parent scopes. But only do + // this if it wasn't already declared above because arguments are allowed to + // be called "arguments", in which case the real "arguments" is inaccessible. + if (!p.current_scope.members.contains("arguments")) { + func.arguments_ref = p.declareSymbol(.arguments, func.open_parens_loc, "arguments") catch unreachable; + p.symbols.items[func.arguments_ref.?.inner_index].must_not_be_renamed = true; } - if (!func.flags.has_rest_arg and p.lexer.token == T.t_dot_dot_dot) { - // p.markSyntaxFeature + p.lexer.expect(.t_close_paren); + p.fn_or_arrow_data_parse = old_fn_or_arrow_data; + + // "function foo(): any {}" + if (p.options.ts and p.lexer.token == .t_colon) { p.lexer.next(); - func.flags.has_rest_arg = true; + p.skipTypescriptReturnType(); } - var is_typescript_ctor_field = false; - var is_identifier = p.lexer.token == T.t_identifier; - // TODO: parseFn - // var arg = p.parseBinding(); + // "function foo(): any;" + if (opts.allow_missing_body_for_type_script and p.lexer.token != .t_open_brace) { + p.lexer.expectOrInsertSemicolon(); + return func; + } + var tempOpts = opts; + func.body = p.parseFnBody(&tempOpts) catch unreachable; return func; } @@ -1314,6 +1549,11 @@ const P = struct { // pub fn parseBinding(p: *P) // TODO: + pub fn skipTypescriptReturnType(p: *P) void { + notimpl(); + } + + // TODO: pub fn parseTypeScriptDecorators(p: *P) []ExprNodeIndex { notimpl(); } @@ -1431,12 +1671,12 @@ const P = struct { var loc = p.lexer.loc(); switch (p.lexer.token) { - js_lexer.T.t_semicolon => { + .t_semicolon => { p.lexer.next(); return Stmt.empty(); }, - js_lexer.T.t_export => { + .t_export => { var previousExportKeyword = p.es6_export_keyword; if (opts.is_module_scope) { p.es6_export_keyword = p.lexer.range(); @@ -1607,7 +1847,7 @@ const P = struct { } else {} }, else => { - std.debug.panic("Internal error: unexpected stmt {s}", .{stmt}); + p.panic("Internal error: unexpected stmt {s}", .{stmt}); }, } @@ -1657,7 +1897,7 @@ const P = struct { return p.s(S.ExportDefault{ .default_name = default_name, .value = js_ast.StmtOrExpr{ .stmt = stmt } }, loc); }, else => { - std.debug.panic("internal error: unexpected", .{}); + p.panic("internal error: unexpected", .{}); }, } } @@ -2028,16 +2268,16 @@ const P = struct { var isForAwait = p.lexer.isContextualKeyword("await"); if (isForAwait) { const await_range = p.lexer.range(); - if (!p.fn_or_arrow_data_parse.allow_await) { + if (p.fn_or_arrow_data_parse.allow_await != .allow_expr) { try p.log.addRangeError(p.source, await_range, "Cannot use \"await\" outside an async function"); isForAwait = false; } else { // TODO: improve error handling here // didGenerateError := p.markSyntaxFeature(compat.ForAwait, awaitRange) - // if p.fnOrArrowDataParse.isTopLevel && !didGenerateError { - // p.topLevelAwaitKeyword = awaitRange - // p.markSyntaxFeature(compat.TopLevelAwait, awaitRange) - // } + if (p.fn_or_arrow_data_parse.is_top_level) { + p.top_level_await_keyword = await_range; + // p.markSyntaxFeature(compat.TopLevelAwait, awaitRange) + } } p.lexer.next(); } @@ -2768,18 +3008,18 @@ const P = struct { pub fn requireInitializers(p: *P, decls: []G.Decl) !void { for (decls) |decl| { - if (decl.value) |val| { + if (decl.value == null) { switch (decl.binding.data) { .b_identifier => |ident| { const r = js_lexer.rangeOfIdentifier(&p.source, decl.binding.loc); try p.log.addRangeErrorFmt(p.source, r, p.allocator, "The constant \"{s}\" must be initialized", .{p.symbols.items[ident.ref.inner_index].original_name}); - return; + // return;/ + }, + else => { + try p.log.addError(p.source, decl.binding.loc, "This constant must be initialized"); }, - else => {}, } } - - try p.log.addError(p.source, decl.binding.loc, "This constant must be initialized"); } } @@ -2789,7 +3029,7 @@ const P = struct { switch (p.lexer.token) { .t_identifier => { const name = p.lexer.identifier; - if ((p.fn_or_arrow_data_parse.allow_await and strings.eql(name, "await")) or (p.fn_or_arrow_data_parse.allow_yield and strings.eql(name, "yield"))) { + if ((p.fn_or_arrow_data_parse.allow_await != .allow_ident and strings.eql(name, "await")) or (p.fn_or_arrow_data_parse.allow_yield != .allow_ident and strings.eql(name, "yield"))) { // TODO: add fmt to addRangeError p.log.addRangeError(p.source, p.lexer.range(), "Cannot use \"yield\" or \"await\" here.") catch unreachable; } @@ -3183,7 +3423,7 @@ const P = struct { }, p.lexer.loc())); } - if (p.lexer.token == .t_end_of_file) { + if (p.lexer.token == eend) { break :run; } @@ -3386,6 +3626,26 @@ const P = struct { return ref; } + pub fn validateFunctionName(p: *P, func: G.Fn, kind: FunctionKind) void { + if (func.name) |name| { + const original_name = p.symbols.items[name.ref.?.inner_index].original_name; + + if (func.flags.is_async and strings.eql(original_name, "await")) { + p.log.addRangeError( + p.source, + js_lexer.rangeOfIdentifier(&p.source, name.loc), + "An async function cannot be named \"await\"", + ) catch unreachable; + } else if (kind == .expr and func.flags.is_generator and strings.eql(original_name, "yield")) { + p.log.addRangeError( + p.source, + js_lexer.rangeOfIdentifier(&p.source, name.loc), + "An generator function expression cannot be named \"yield\"", + ) catch unreachable; + } + } + } + pub fn parseFnExpr(p: *P, loc: logger.Loc, is_async: bool, async_range: logger.Range) !Expr { p.lexer.next(); const is_generator = p.lexer.token == T.t_asterisk; @@ -3412,6 +3672,7 @@ const P = struct { } else { (name orelse unreachable).ref = try p.newSymbol(.hoisted_function, p.lexer.identifier); } + debug("FUNC NAME {s}", .{p.lexer.identifier}); p.lexer.next(); } @@ -3421,10 +3682,12 @@ const P = struct { var func = p.parseFn(name, FnOrArrowDataParse{ .async_range = async_range, - .allow_await = is_async, - .allow_yield = is_generator, + .allow_await = if (is_async) .allow_expr else .allow_ident, + .allow_yield = if (is_generator) .allow_expr else .allow_ident, }); + p.validateFunctionName(func, .expr); + return p.e(js_ast.E.Function{ .func = func, }, loc); @@ -3549,74 +3812,23 @@ const P = struct { return self.mm(@TypeOf(kind), kind); } - // The name is temporarily stored in the ref until the scope traversal pass - // happens, at which point a symbol will be generated and the ref will point - // to the symbol instead. - // - // The scope traversal pass will reconstruct the name using one of two methods. - // In the common case, the name is a slice of the file itself. In that case we - // can just store the slice and not need to allocate any extra memory. In the - // rare case, the name is an externally-allocated string. In that case we store - // an index to the string and use that index during the scope traversal pass. + // Doing this the fast way is too complicated for now. pub fn storeNameInRef(p: *P, name: string) !js_ast.Ref { - // jarred: honestly, this is kind of magic to me - // but I think I think I understand it. - // the strings are slices. - // "name" is just a different place in p.source.contents's buffer - // Instead of copying a shit ton of strings everywhere - // we can just say "yeah this is really over here at inner_index" - // .source_index being null is used to identify was this allocated or is just in the orignial thing. - // you could never do this in JavaScript!! - const ptr0 = @ptrToInt(name.ptr); - const ptr1 = @ptrToInt(p.source.contents.ptr); - - // Is the data in "name" a subset of the data in "p.source.Contents"? - if (ptr0 >= ptr1 and ptr0 + name.len < p.source.contents.len) { - // std.debug.print("storeNameInRef fast path", .{}); - // The name is a slice of the file contents, so we can just reference it by - // length and don't have to allocate anything. This is the common case. - // - // It's stored as a negative value so we'll crash if we try to use it. That - // way we'll catch cases where we've forgotten to call loadNameFromRef(). - // The length is the negative part because we know it's non-zero. - return js_ast.Ref{ .source_index = @intCast(Ref.Int, ptr0), .inner_index = (@intCast(Ref.Int, name.len) + @intCast(Ref.Int, ptr0)) }; + // allocated_names is lazily allocated + if (p.allocated_names.capacity > 0) { + const inner_index = @intCast(Ref.Int, p.allocated_names.items.len); + try p.allocated_names.append(name); + return js_ast.Ref{ .source_index = std.math.maxInt(Ref.Int), .inner_index = inner_index }; } else { - // std.debug.print("storeNameInRef slow path", .{}); - // The name is some memory allocated elsewhere. This is either an inline - // string constant in the parser or an identifier with escape sequences - // in the source code, which is very unusual. Stash it away for later. - // This uses allocations but it should hopefully be very uncommon. - - // allocated_names is lazily allocated - if (p.allocated_names.capacity > 0) { - const inner_index = @intCast(Ref.Int, p.allocated_names.items.len); - try p.allocated_names.append(name); - return js_ast.Ref{ .source_index = std.math.maxInt(Ref.Int), .inner_index = inner_index }; - } else { - p.allocated_names = try @TypeOf(p.allocated_names).initCapacity(p.allocator, 1); - p.allocated_names.appendAssumeCapacity(name); - return js_ast.Ref{ .source_index = std.math.maxInt(Ref.Int), .inner_index = 0 }; - } - - // p.allocatedNames = append(p.allocatedNames, name) - // return ref + p.allocated_names = try @TypeOf(p.allocated_names).initCapacity(p.allocator, 1); + p.allocated_names.appendAssumeCapacity(name); + return js_ast.Ref{ .source_index = std.math.maxInt(Ref.Int), .inner_index = 0 }; } } pub fn loadNameFromRef(p: *P, ref: js_ast.Ref) string { - if (!ref.isSourceNull()) { - if (ref.source_index == 0x80000000) { - return p.allocated_names.items[ref.inner_index]; - } - - if (std.builtin.mode != std.builtin.Mode.ReleaseFast) { - assert(ref.inner_index - ref.source_index > 0); - } - - return p.source.contents[ref.inner_index .. ref.inner_index - ref.source_index]; - } else { - std.debug.panic("Internal error: invalid symbol reference. {s}", .{ref}); - } + assert(ref.inner_index < p.allocated_names.items.len); + return p.allocated_names.items[ref.inner_index]; } // This parses an expression. This assumes we've already parsed the "async" @@ -3634,34 +3846,38 @@ const P = struct { switch (p.lexer.token) { // "async => {}" .t_equals_greater_than => { - const arg = G.Arg{ .binding = p.b( - B.Identifier{ - .ref = try p.storeNameInRef("async"), - }, - async_range.loc, - ) }; - _ = p.pushScopeForParsePass(.function_args, async_range.loc) catch unreachable; - defer p.popScope(); - var arrow_body = try p.parseArrowBodySingleArg(arg, FnOrArrowDataParse{}); - return p.e(arrow_body, async_range.loc); + if (level.lte(.assign)) { + const arg = G.Arg{ .binding = p.b( + B.Identifier{ + .ref = try p.storeNameInRef("async"), + }, + async_range.loc, + ) }; + _ = p.pushScopeForParsePass(.function_args, async_range.loc) catch unreachable; + defer p.popScope(); + var arrow_body = try p.parseArrowBodySingleArg(arg, FnOrArrowDataParse{}); + return p.e(arrow_body, async_range.loc); + } }, // "async x => {}" .t_identifier => { - // p.markLoweredSyntaxFeature(); - const ref = try p.storeNameInRef(p.lexer.identifier); - var arg = G.Arg{ .binding = p.b(B.Identifier{ - .ref = ref, - }, p.lexer.loc()) }; - p.lexer.next(); + if (level.lte(.assign)) { + // p.markLoweredSyntaxFeature(); + const ref = try p.storeNameInRef(p.lexer.identifier); + var arg = G.Arg{ .binding = p.b(B.Identifier{ + .ref = ref, + }, p.lexer.loc()) }; + p.lexer.next(); - _ = try p.pushScopeForParsePass(.function_args, async_range.loc); - defer p.popScope(); + _ = try p.pushScopeForParsePass(.function_args, async_range.loc); + defer p.popScope(); - var arrowBody = try p.parseArrowBodySingleArg(arg, FnOrArrowDataParse{ - .allow_await = true, - }); - arrowBody.is_async = true; - return p.e(arrowBody, async_range.loc); + var arrowBody = try p.parseArrowBodySingleArg(arg, FnOrArrowDataParse{ + .allow_await = .allow_expr, + }); + arrowBody.is_async = true; + return p.e(arrowBody, async_range.loc); + } }, // "async()" @@ -3800,7 +4016,7 @@ const P = struct { } } - p.current_scope = current_scope.parent orelse std.debug.panic("Internal error: attempted to call popScope() on the topmost scope", .{}); + p.current_scope = current_scope.parent orelse p.panic("Internal error: attempted to call popScope() on the topmost scope", .{}); } pub fn markExprAsParenthesized(p: *P, expr: *Expr) void { @@ -3985,7 +4201,7 @@ const P = struct { // Parse a shorthand property if (!opts.is_class and kind == .normal and p.lexer.token != .t_colon and p.lexer.token != .t_open_paren and p.lexer.token != .t_less_than and !opts.is_generator and !js_lexer.Keywords.has(name)) { - if ((p.fn_or_arrow_data_parse.allow_await and strings.eql(name, "await")) or (p.fn_or_arrow_data_parse.allow_yield and strings.eql(name, "yield"))) { + if ((p.fn_or_arrow_data_parse.allow_await != .allow_ident and strings.eql(name, "await")) or (p.fn_or_arrow_data_parse.allow_yield != .allow_ident and strings.eql(name, "yield"))) { // TODO: add fmt to addRangeError p.log.addRangeError(p.source, name_range, "Cannot use \"yield\" or \"await\" here.") catch unreachable; } @@ -4121,8 +4337,8 @@ const P = struct { var func = p.parseFn(null, FnOrArrowDataParse{ .async_range = opts.async_range, - .allow_await = opts.is_async, - .allow_yield = opts.is_generator, + .allow_await = if (opts.is_async) AwaitOrYield.allow_expr else AwaitOrYield.allow_ident, + .allow_yield = if (opts.is_generator) AwaitOrYield.allow_expr else AwaitOrYield.allow_ident, .allow_super_call = opts.class_has_extends and is_constructor, .allow_ts_decorators = opts.allow_ts_decorators, .is_constructor = is_constructor, @@ -4299,7 +4515,7 @@ const P = struct { // Forbid decorators on class constructors if (opts.ts_decorators.len > 0) { - switch ((property.key orelse std.debug.panic("Internal error: Expected property {s} to have a key.", .{property})).data) { + switch ((property.key orelse p.panic("Internal error: Expected property {s} to have a key.", .{property})).data) { .e_string => |str| { if (strings.eqlUtf16("constructor", str.value)) { p.log.addError(p.source, first_decorator_loc, "TypeScript does not allow decorators on class constructors") catch unreachable; @@ -5183,6 +5399,17 @@ const P = struct { } } } + + pub fn panic(p: *P, comptime str: string, args: anytype) noreturn { + p.log.addRangeErrorFmt(p.source, p.lexer.range(), p.allocator, str, args) catch unreachable; + + var fixedBuffer = [_]u8{0} ** 4096; + var stream = std.io.fixedBufferStream(&fixedBuffer); + + p.log.print(stream.writer()) catch unreachable; + std.debug.panic("{s}", .{fixedBuffer}); + } + pub fn _parsePrefix(p: *P, level: Level, errors: *DeferredErrors, flags: Expr.EFlags) Expr { const loc = p.lexer.loc(); const l = @enumToInt(level); @@ -5247,81 +5474,101 @@ const P = struct { p.lexer.next(); // Handle async and await expressions - if (name.len == 5) { - if (strings.eql(name, "async")) { - if (strings.eql(raw, "async")) { + switch (AsyncPrefixExpression.find(name)) { + .is_async => { + if (AsyncPrefixExpression.find(raw) != .is_async) { return p.parseAsyncPrefixExpr(name_range, level) catch unreachable; } - } else if (strings.eql(name, "await")) { - if (p.fn_or_arrow_data_parse.allow_await) { - if (!strings.eql(raw, "await")) { - p.log.addRangeError(p.source, name_range, "The keyword \"await\" cannot be escaped.") catch unreachable; - } else { - if (p.fn_or_arrow_data_parse.is_top_level) { - p.top_level_await_keyword = name_range; - // p.markSyntaxFeature() - } + }, - if (p.fn_or_arrow_data_parse.arrow_arg_errors) |*err| { - err.invalid_expr_await = name_range; + .is_await => { + switch (p.fn_or_arrow_data_parse.allow_await) { + .forbid_all => { + p.log.addRangeError(p.source, name_range, "The keyword \"await\" cannot be used here.") catch unreachable; + }, + .allow_expr => { + if (AsyncPrefixExpression.find(raw) != .is_await) { + p.log.addRangeError(p.source, name_range, "The keyword \"await\" cannot be escaped.") catch unreachable; } else { - p.fn_or_arrow_data_parse.arrow_arg_errors = DeferredArrowArgErrors{ .invalid_expr_await = name_range }; - } + if (p.fn_or_arrow_data_parse.is_top_level) { + p.top_level_await_keyword = name_range; + } - var value = p.parseExpr(.prefix); - if (p.lexer.token == T.t_asterisk_asterisk) { - p.lexer.unexpected(); - } + if (p.fn_or_arrow_data_parse.arrow_arg_errors) |*args| { + args.invalid_expr_await = name_range; + } - return p.e(E.Await{ .value = value }, loc); - } - } - } else if (strings.eql(name, "yield")) { - if (p.fn_or_arrow_data_parse.allow_yield) { - if (strings.eql(raw, "yield")) { - p.log.addRangeError(p.source, name_range, "The keyword \"yield\" cannot be escaped") catch unreachable; - } else { - if (l > @enumToInt(Level.assign)) { - p.log.addRangeError(p.source, name_range, "Cannot use a \"yield\" here without parentheses") catch unreachable; - } + const value = p.parseExpr(.prefix); + if (p.lexer.token == T.t_asterisk_asterisk) { + p.lexer.unexpected(); + } - if (p.fn_or_arrow_data_parse.arrow_arg_errors) |*err| { - err.invalid_expr_yield = name_range; + return p.e(E.Await{ .value = value }, loc); } + }, + .allow_ident => {}, + } + }, - return p.parseYieldExpr(loc); - } - } else if (!p.lexer.has_newline_before) { - // Try to gracefully recover if "yield" is used in the wrong place + .is_yield => { + switch (p.fn_or_arrow_data_parse.allow_yield) { + .forbid_all => { + p.log.addRangeError(p.source, name_range, "The keyword \"yield\" cannot be used here") catch unreachable; + }, + .allow_expr => { + if (AsyncPrefixExpression.find(raw) != .is_yield) { + p.log.addRangeError(p.source, name_range, "The keyword \"yield\" cannot be escaped") catch unreachable; + } else { + if (level.gte(.assign)) { + p.log.addRangeError(p.source, name_range, "Cannot use a \"yield\" here without parentheses") catch unreachable; + } + const value = p.parseExpr(.prefix); - switch (p.lexer.token) { - .t_null, .t_identifier, .t_false, .t_true, .t_numeric_literal, .t_big_integer_literal, .t_string_literal => { - p.log.addRangeError(p.source, name_range, "Cannot use \"yield\" outside a generator function") catch unreachable; - }, - else => {}, - } - } - } + if (p.fn_or_arrow_data_parse.arrow_arg_errors) |*args| { + args.invalid_expr_yield = name_range; + } - // Handle the start of an arrow expression - if (p.lexer.token == .t_equals_greater_than) { - const ref = p.storeNameInRef(name) catch unreachable; - var args = p.allocator.alloc(Arg, 1) catch unreachable; - args[0] = Arg{ .binding = p.b(B.Identifier{ - .ref = ref, - }, loc) }; + if (p.lexer.token == T.t_asterisk_asterisk) { + p.lexer.unexpected(); + } - _ = p.pushScopeForParsePass(.function_args, loc) catch unreachable; - defer p.popScope(); - return p.e(p.parseArrowBody(args, p.m(FnOrArrowDataParse{})) catch unreachable, loc); - } + return p.e(E.Yield{ .value = value }, loc); + } + }, + .allow_ident => { + // Try to gracefully recover if "yield" is used in the wrong place + if (!p.lexer.has_newline_before) { + switch (p.lexer.token) { + .t_null, .t_identifier, .t_false, .t_true, .t_numeric_literal, .t_big_integer_literal, .t_string_literal => { + p.log.addRangeError(p.source, name_range, "Cannot use \"yield\" outside a generator function") catch unreachable; + }, + else => {}, + } + } + }, + } + }, + .none => {}, + } + // Handle the start of an arrow expression + if (p.lexer.token == .t_equals_greater_than) { const ref = p.storeNameInRef(name) catch unreachable; - - return p.e(E.Identifier{ + var args = p.allocator.alloc(Arg, 1) catch unreachable; + args[0] = Arg{ .binding = p.b(B.Identifier{ .ref = ref, - }, loc); + }, loc) }; + + _ = p.pushScopeForParsePass(.function_args, loc) catch unreachable; + defer p.popScope(); + return p.e(p.parseArrowBody(args, p.m(FnOrArrowDataParse{})) catch unreachable, loc); } + + const ref = p.storeNameInRef(name) catch unreachable; + + return p.e(E.Identifier{ + .ref = ref, + }, loc); }, .t_string_literal, .t_no_substitution_template_literal => { return p.parseStringLiteral(); @@ -5784,7 +6031,7 @@ const P = struct { return p._parsePrefix(level, errors orelse &DeferredErrors.None, flags); } - pub fn appendPart(p: *P, parts: List(js_ast.Part), stmts: []Stmt) !void { + pub fn appendPart(p: *P, parts: *List(js_ast.Part), stmts: []Stmt) !void { p.symbol_uses = SymbolUseMap.init(p.allocator); p.declared_symbols.deinit(); p.import_records_for_current_part.deinit(); @@ -5803,7 +6050,10 @@ const P = struct { // const link = p.symbols.items[local.ref.inner_index].link; // } } - // TODO: here + // // TODO: here + try parts.append(js_ast.Part{ + .stmts = stmts, + }); } pub fn visitStmtsAndPrependTempRefs(p: *P, stmts: *List(Stmt), opts: *PrependTempRefsOpts) !void { @@ -5821,7 +6071,7 @@ const P = struct { if (p.fn_only_data_visit.this_capture_ref) |ref| { try p.temp_refs_to_declare.append(TempRef{ .ref = ref, - .value = p.e(E.This{}, opts.fn_body_loc orelse std.debug.panic("Internal error: Expected opts.fn_body_loc to exist", .{})), + .value = p.e(E.This{}, opts.fn_body_loc orelse p.panic("Internal error: Expected opts.fn_body_loc to exist", .{})), }); } } @@ -5838,6 +6088,14 @@ const P = struct { return p.visitExprInOut(expr, ExprIn{}); } + pub fn visitFunc(p: *P, func: *G.Fn, open_parens_loc: logger.Loc) void {} + + pub fn maybeKeepExprSymbolName(p: *P, expr: Expr, original_name: string, was_anonymous_named_expr: bool) Expr { + notimpl(); + + // return expr; + } + pub fn valueForThis(p: *P, loc: logger.Loc) ?Expr { // Substitute "this" if we're inside a static class property initializer if (p.fn_only_data_visit.this_class_static_ref) |*ref| { @@ -5885,6 +6143,7 @@ const P = struct { // return js_ast.Expr{Loc: expr.Loc, Data: &js_ast.EIdentifier{Ref: p.captureThis()}}, exprOut{} // } }, + .e_import_meta => |exp| {}, .e_spread => |exp| { return p.visitExpr(exp.value); @@ -5892,7 +6151,9 @@ const P = struct { .e_identifier => |e_| {}, .e_private_identifier => |e_| {}, .e_jsx_element => |e_| {}, + .e_template => |e_| {}, + .e_binary => |e_| {}, .e_index => |e_| {}, .e_unary => |e_| {}, @@ -6015,61 +6276,531 @@ const P = struct { switch (data.value) { .expr => |*expr| { const was_anonymous_named_expr = expr.isAnonymousNamed(); - // data.value.expr = p.m(p.visitExpr(expr.*)); + data.value.expr = p.visitExpr(expr.*); // // Optionally preserve the name - // data.value.expr = p.maybeKeepExprSymbolName(expr, "default", was_anonymous_named_expr); - - // // Discard type-only export default statements - // if (p.options.ts) { - // switch (expr.data) { - // .e_identifier => |ident| { - // const symbol = p.symbols.items[ident.ref.inner_index]; - // if (symbol.kind == .unbound) { - // if (p.local_type_names.get(symbol.original_name)) |local_type| { - // if (local_type.value) { - // return; - // } - // } - // } - // }, - // else => {}, - // } - // } + data.value.expr = p.maybeKeepExprSymbolName(expr.*, "default", was_anonymous_named_expr); + + // Discard type-only export default statements + if (p.options.ts) { + switch (expr.data) { + .e_identifier => |ident| { + const symbol = p.symbols.items[ident.ref.inner_index]; + if (symbol.kind == .unbound) { + if (p.local_type_names.get(symbol.original_name)) |local_type| { + if (local_type) { + return; + } + } + } + }, + else => {}, + } + } }, - .stmt => |st| {}, - } - }, - .s_export_equals => |data| {}, - .s_break => |data| {}, - .s_continue => |data| {}, - .s_label => |data| {}, - .s_local => |data| {}, - .s_expr => |data| {}, - .s_throw => |data| {}, - .s_return => |data| {}, - .s_block => |data| {}, - .s_with => |data| {}, - .s_while => |data| {}, - .s_do_while => |data| {}, - .s_if => |data| {}, - .s_for => |data| {}, - .s_for_in => |data| {}, - .s_for_of => |data| {}, - .s_try => |data| {}, - .s_switch => |data| {}, - .s_function => |data| {}, - .s_class => |data| {}, - .s_enum => |data| {}, - .s_namespace => |data| {}, - else => {}, + .stmt => |s2| { + switch (s2.data) { + .s_function => |func| { + var name: string = undefined; + if (func.func.name) |func_loc| { + name = p.symbols.items[func_loc.ref.?.inner_index].original_name; + } else { + func.func.name = data.default_name; + name = "default"; + } + + p.visitFunc(&func.func, func.func.open_parens_loc); + stmts.append(stmt.*) catch unreachable; + + if (func.func.name) |name_ref| { + // TODO-REACT-REFRESH-SPOT + stmts.append(p.keepStmtSymbolName(name_ref.loc, name_ref.ref.?, name)) catch unreachable; + } + }, + .s_class => |class| { + var shadow_ref = p.visitClass(s2.loc, &class.class); + }, + else => {}, + } + }, + } + }, + .s_export_equals => |data| { + // "module.exports = value" + stmts.append( + Expr.assignStmt( + p.e( + E.Dot{ + .target = p.e( + E.Identifier{ + .ref = p.module_ref, + }, + stmt.loc, + ), + .name = "exports", + .name_loc = stmt.loc, + }, + stmt.loc, + ), + p.visitExpr(data.value), + p.allocator, + ), + ) catch unreachable; + p.recordUsage(&p.module_ref); + }, + .s_break => |data| { + if (data.label) |*label| { + const name = p.loadNameFromRef(label.ref orelse p.panic("Expected label to have a ref", .{})); + const res = p.findLabelSymbol(label.loc, name); + + label.ref = res.ref; + } else if (p.fn_or_arrow_data_visit.is_inside_loop and !p.fn_or_arrow_data_visit.is_inside_switch) { + const r = js_lexer.rangeOfIdentifier(&p.source, stmt.loc); + p.log.addRangeError(p.source, r, "Cannot use \"break\" here") catch unreachable; + } + }, + .s_continue => |data| { + if (data.label) |*label| { + const name = p.loadNameFromRef(label.ref orelse p.panic("Expected continue label to have a ref", .{})); + const res = p.findLabelSymbol(label.loc, name); + label.ref = res.ref; + if (res.found and !res.is_loop) { + const r = js_lexer.rangeOfIdentifier(&p.source, stmt.loc); + p.log.addRangeErrorFmt(p.source, r, p.allocator, "Cannot \"continue\" to label {s}", .{name}) catch unreachable; + } + } else if (!p.fn_or_arrow_data_visit.is_inside_loop) { + const r = js_lexer.rangeOfIdentifier(&p.source, stmt.loc); + p.log.addRangeError(p.source, r, "Cannot use \"continue\" here") catch unreachable; + } + }, + .s_label => |data| { + p.pushScopeForVisitPass(.label, stmt.loc) catch unreachable; + const name = p.loadNameFromRef(data.name.ref orelse unreachable); + const ref = p.newSymbol(.label, name) catch unreachable; + data.name.ref = ref; + p.current_scope.label_ref = ref; + switch (data.stmt.data) { + .s_for, .s_for_in, .s_for_of, .s_while, .s_do_while => { + p.current_scope.label_stmt_is_loop = true; + }, + else => {}, + } + + data.stmt = p.visitSingleStmt(data.stmt, StmtsKind.none); + p.popScope(); + }, + .s_local => |data| { + for (data.decls) |*d| { + p.visitBinding(d.binding, null); + + if (d.value != null) { + var val = d.value orelse unreachable; + const was_anonymous_named_expr = p.isAnonymousNamedExpr(val); + + val = p.visitExpr(val); + // go version of defer would cause this to reset the variable + // zig version of defer causes this to set it to the last value of val, at the end of the scope. + defer d.value = val; + + // Optionally preserve the name + switch (d.binding.data) { + .b_identifier => |id| { + val = p.maybeKeepExprSymbolName( + val, + p.symbols.items[id.ref.inner_index].original_name, + was_anonymous_named_expr, + ); + }, + else => {}, + } + } + } + + // Handle being exported inside a namespace + if (data.is_export and p.enclosing_namespace_arg_ref != null) { + for (data.decls) |*d| { + if (d.value) |val| { + p.recordUsage(&(p.enclosing_namespace_arg_ref orelse unreachable)); + // TODO: is it necessary to lowerAssign? why does esbuild do it _most_ of the time? + stmts.append(p.s(S.SExpr{ + .value = Expr.assign(Binding.toExpr(&d.binding, p.to_expr_wrapper_namespace), val, p.allocator), + }, stmt.loc)) catch unreachable; + } + } + + return; + } + + // TODO: do we need to relocate vars? I don't think so. + if (data.kind == .k_var) {} + }, + .s_expr => |data| { + p.stmt_expr_value = data.value.data; + data.value = p.visitExpr(data.value); + + // TODO: + // if (p.options.mangle_syntax) { + + // } + }, + .s_throw => |data| { + data.value = p.visitExpr(data.value); + }, + .s_return => |data| { + if (p.fn_or_arrow_data_visit.is_outside_fn_or_arrow) { + const where = where: { + if (p.es6_export_keyword.len > 0) { + break :where p.es6_export_keyword; + } else if (p.top_level_await_keyword.len > 0) { + break :where p.top_level_await_keyword; + } else { + break :where logger.Range.None; + } + }; + + if (where.len > 0) { + p.log.addRangeError(p.source, where, "Top-level return cannot be used inside an ECMAScript module") catch unreachable; + } + } + + if (data.value) |val| { + data.value = p.visitExpr(val); + + // "return undefined;" can safely just always be "return;" + if (@as(Expr.Tag, data.value.?.data) == .e_undefined) { + // Returning undefined is implicit + data.value = null; + } + } + }, + .s_block => |data| { + { + p.pushScopeForVisitPass(.block, stmt.loc) catch unreachable; + defer p.popScope(); + + // Pass the "is loop body" status on to the direct children of a block used + // as a loop body. This is used to enable optimizations specific to the + // topmost scope in a loop body block. + const kind = if (std.meta.eql(p.loop_body, stmt.data)) StmtsKind.loop_body else StmtsKind.none; + var _stmts = List(Stmt).init(p.allocator); + p.visitStmts(&_stmts, kind) catch unreachable; + data.stmts = _stmts.toOwnedSlice(); + } + + // trim empty statements + if (data.stmts.len == 0) { + stmts.append(p.s(S.Empty{}, stmt.loc)) catch unreachable; + return; + } else if (data.stmts.len == 1 and !statementCaresAboutScope(data.stmts[0])) { + // Unwrap blocks containing a single statement + stmts.append(data.stmts[0]) catch unreachable; + return; + } + }, + .s_with => |data| { + notimpl(); + }, + .s_while => |data| { + data.test_ = p.visitExpr(data.test_); + data.body = p.visitLoopBody(data.body); + + // TODO: simplify boolean expression + }, + .s_do_while => |data| { + data.test_ = p.visitExpr(data.test_); + data.body = p.visitLoopBody(data.body); + + // TODO: simplify boolean expression + }, + .s_if => |data| { + data.test_ = p.visitExpr(data.test_); + + // TODO: Fold constants + + }, + .s_for => |data| { + { + p.pushScopeForVisitPass(.block, stmt.loc) catch unreachable; + defer p.popScope(); + if (data.init) |initst| { + _ = p.visitForLoopInit(initst, false); + } + + if (data.test_) |test_| { + data.test_ = p.visitExpr(test_); + + // TODO: boolean with side effects + } + + if (data.update) |update| { + data.update = p.visitExpr(update); + } + + data.body = p.visitLoopBody(data.body); + } + // TODO: Potentially relocate "var" declarations to the top level + + }, + .s_for_in => |data| { + { + p.pushScopeForVisitPass(.block, stmt.loc) catch unreachable; + defer p.popScope(); + _ = p.visitForLoopInit(data.init, true); + data.value = p.visitExpr(data.value); + data.body = p.visitLoopBody(data.body); + + // TODO: do we need to this? + // // Check for a variable initializer + // if local, ok := s.Init.Data.(*js_ast.SLocal); ok && local.Kind == js_ast.LocalVar && len(local.Decls) == 1 { + // decl := &local.Decls[0] + // if id, ok := decl.Binding.Data.(*js_ast.BIdentifier); ok && decl.Value != nil { + // p.markStrictModeFeature(forInVarInit, p.source.RangeOfOperatorBefore(decl.Value.Loc, "="), "") + + // // Lower for-in variable initializers in case the output is used in strict mode + // stmts = append(stmts, js_ast.Stmt{Loc: stmt.Loc, Data: &js_ast.SExpr{Value: js_ast.Assign( + // js_ast.Expr{Loc: decl.Binding.Loc, Data: &js_ast.EIdentifier{Ref: id.Ref}}, + // *decl.Value, + // )}}) + // decl.Value = nil + // } + // } + } + }, + .s_for_of => |data| { + p.pushScopeForVisitPass(.block, stmt.loc) catch unreachable; + defer p.popScope(); + _ = p.visitForLoopInit(data.init, true); + data.value = p.visitExpr(data.value); + data.body = p.visitLoopBody(data.body); + + // TODO: do we need to do this? + // // Potentially relocate "var" declarations to the top level + // if init, ok := s.Init.Data.(*js_ast.SLocal); ok && init.Kind == js_ast.LocalVar { + // if replacement, ok := p.maybeRelocateVarsToTopLevel(init.Decls, relocateVarsForInOrForOf); ok { + // s.Init = replacement + // } + // } + + // p.lowerObjectRestInForLoopInit(s.Init, &s.Body) + }, + .s_try => |data| { + notimpl(); + }, + .s_switch => |data| { + notimpl(); + }, + .s_function => |data| { + notimpl(); + }, + .s_class => |data| { + notimpl(); + }, + .s_enum => |data| { + notimpl(); + }, + .s_namespace => |data| { + notimpl(); + }, + else => { + notimpl(); + }, } // if we get this far, it stays try stmts.append(stmt.*); } + pub fn visitForLoopInit(p: *P, stmt: Stmt, is_in_or_of: bool) Stmt { + switch (stmt.data) { + .s_expr => |st| { + const assign_target = if (is_in_or_of) js_ast.AssignTarget.replace else js_ast.AssignTarget.none; + p.stmt_expr_value = st.value.data; + st.value = p.visitExprInOut(st.value, ExprIn{ .assign_target = assign_target }); + }, + .s_local => |st| { + for (st.decls) |*dec| { + p.visitBinding(dec.binding, null); + if (dec.value) |val| { + dec.value = p.visitExpr(val); + } + } + // s.Decls = p.lowerObjectRestInDecls(s.Decls) + // s.Kind = p.selectLocalKind(s.Kind) + }, + else => { + p.panic("Unexpected stmt in visitForLoopInit: {s}", .{stmt}); + }, + } + + return stmt; + } + + // pub fn maybeRelocateVarsToTopLevel(p: *P, decls: []G.Decl, mode: ) + + pub fn wrapIdentifierNamespace( + p: *P, + loc: logger.Loc, + ref: Ref, + ) Expr { + p.recordUsage(&(p.enclosing_namespace_arg_ref orelse unreachable)); + + return p.e(E.Dot{ + .target = p.e(E.Identifier{ .ref = p.enclosing_namespace_arg_ref orelse unreachable }, loc), + .name = p.symbols.items[ref.inner_index].original_name, + .name_loc = loc, + }, loc); + } + + pub fn wrapIdentifierHoisting( + p: *P, + loc: logger.Loc, + ref: Ref, + ) Expr { + p.relocated_top_level_vars.append(LocRef{ .loc = loc, .ref = ref }) catch unreachable; + var _ref = ref; + p.recordUsage(&_ref); + return p.e(E.Identifier{ .ref = _ref }, loc); + } + + pub fn isAnonymousNamedExpr(p: *P, expr: ExprNodeIndex) bool { + notimpl(); + } + + pub fn visitBinding(p: *P, binding: BindingNodeIndex, duplicate_arg_check: ?StringBoolMap) void { + notimpl(); + } + + pub fn visitLoopBody(p: *P, stmt: StmtNodeIndex) StmtNodeIndex { + const old_is_inside_loop = p.fn_or_arrow_data_visit.is_inside_loop; + p.fn_or_arrow_data_visit.is_inside_loop = true; + defer p.fn_or_arrow_data_visit.is_inside_loop = old_is_inside_loop; + p.loop_body = stmt.data; + return p.visitSingleStmt(stmt, .loop_body); + } + + pub fn visitSingleStmt(p: *P, stmt: Stmt, kind: StmtsKind) Stmt { + const has_if_scope = has_if: { + switch (stmt.data) { + .s_function => |func| { + break :has_if func.func.flags.has_if_scope; + }, + else => { + break :has_if false; + }, + } + }; + + // Introduce a fake block scope for function declarations inside if statements + if (has_if_scope) { + p.pushScopeForVisitPass(.block, stmt.loc) catch unreachable; + } + + var stmts = List(Stmt).initCapacity(p.allocator, 1) catch unreachable; + stmts.append(stmt) catch unreachable; + p.visitStmts(&stmts, kind) catch unreachable; + + if (has_if_scope) { + p.popScope(); + } + + return p.stmtsToSingleStmt(stmt.loc, stmts.toOwnedSlice()); + } + + // One statement could potentially expand to several statements + pub fn stmtsToSingleStmt(p: *P, loc: logger.Loc, stmts: []Stmt) Stmt { + if (stmts.len == 0) { + return p.s(S.Empty{}, loc); + } + + if (stmts.len == 1) { + switch (stmts[0].data) { + .s_local => |local| { + // "let" and "const" must be put in a block when in a single-statement context + + if (local.kind == .k_var) { + return stmts[0]; + } + }, + else => { + return stmts[0]; + }, + } + } + + return p.s(S.Block{ .stmts = stmts }, loc); + } + + pub fn findLabelSymbol(p: *P, loc: logger.Loc, name: string) FindLabelSymbolResult { + var res = FindLabelSymbolResult{ .ref = undefined, .is_loop = false }; + + var _scope: ?*Scope = p.current_scope; + + while (_scope) |scope| : (_scope = scope.parent) { + var label_ref = scope.label_ref orelse continue; + + if (!scope.kindStopsHoisting() or (scope.kind != .label) or !strings.eql(name, p.symbols.items[label_ref.inner_index].original_name)) { + continue; + } + + // Track how many times we've referenced this symbol + p.recordUsage(&label_ref); + res.ref = label_ref; + res.is_loop = scope.label_stmt_is_loop; + res.found = true; + break; + } + + const r = js_lexer.rangeOfIdentifier(&p.source, loc); + p.log.addRangeErrorFmt(p.source, r, p.allocator, "There is no containing label named {s}", .{name}) catch unreachable; + + // Allocate an "unbound" symbol + var ref = p.newSymbol(.unbound, name) catch unreachable; + + // Track how many times we've referenced this symbol + p.recordUsage(&ref); + + return res; + } + + pub fn visitClass(p: *P, loc: logger.Loc, class: *G.Class) Ref { + notimpl(); + } + + fn keepStmtSymbolName(p: *P, loc: logger.Loc, ref: Ref, name: string) Stmt { + var exprs = p.allocator.alloc(Expr, 2) catch unreachable; + exprs[0] = p.e(E.Identifier{ + .ref = ref, + }, loc); + exprs[1] = p.e(E.String{ .value = strings.toUTF16Alloc(name, p.allocator) catch unreachable }, loc); + return p.s(S.SExpr{ + // I believe that this is a spot we can do $RefreshReg$(name) + .value = p.callRuntime(loc, "__name", exprs), + + // Make sure tree shaking removes this if the function is never used + .does_not_affect_tree_shaking = true, + }, loc); + } + + pub fn callRuntime(p: *P, loc: logger.Loc, name: string, args: []Expr) Expr { + var ref: Ref = undefined; + if (!p.runtime_imports.contains(name)) { + ref = p.newSymbol(.other, name) catch unreachable; + p.module_scope.generated.append(ref) catch unreachable; + p.runtime_imports.put(name, ref) catch unreachable; + } else { + ref = p.runtime_imports.get(name) orelse unreachable; + } + + p.recordUsage(&ref); + return p.e(E.Call{ + .target = p.e(E.Identifier{ + .ref = ref, + }, loc), + .args = args, + }, loc); + } + fn visitStmts(p: *P, stmts: *List(Stmt), kind: StmtsKind) !void { // Save the current control-flow liveness. This represents if we are // currently inside an "if (false) { ... }" block. @@ -6178,8 +6909,7 @@ const P = struct { if (p.options.ts and p.lexer.token == .t_equals and !p.forbid_suffix_after_as_loc.eql(p.lexer.loc())) { p.lexer.next(); - var expr = p.parseExpr(.comma); - item = item.assign(&expr, p.allocator); + item = Expr.assign(item, p.parseExpr(.comma), p.allocator); } items_list.append(item) catch unreachable; @@ -6274,10 +7004,15 @@ const P = struct { parser.log = log; parser.allocator = allocator; parser.options = opts; + parser.to_expr_wrapper_namespace = Binding2ExprWrapper.Namespace.init(parser); + parser.to_expr_wrapper_hoisted = Binding2ExprWrapper.Hoisted.init(parser); parser.source = source; + parser.lexer = lexer; parser.data = js_ast.AstData.init(allocator); + _ = try parser.pushScopeForParsePass(.entry, locModuleScope); + return parser; } }; @@ -6309,11 +7044,48 @@ const DeferredArrowArgErrors = struct { invalid_expr_yield: logger.Range = logger.Range.None, }; -test "js_parser.init" { - try alloc.setup(std.heap.page_allocator); +const SymbolList = [][]Symbol; + +fn expectPrintedJS(contents: string, expected: string) !void { + if (alloc.dynamic_manager == null) { + try alloc.setup(std.heap.page_allocator); + } + + debugl("INIT TEST"); + + const opts = try options.TransformOptions.initUncached(alloc.dynamic, "file.js", contents); + var log = logger.Log.init(alloc.dynamic); + var source = logger.Source.initFile(opts.entry_point, alloc.dynamic); + var ast: js_ast.Ast = undefined; + + debugl("INIT PARSER"); + + var parser = try Parser.init(opts, &log, &source, alloc.dynamic); + debugl("RUN PARSER"); + + var res = try parser.parse(); + ast = res.ast; + var symbols: SymbolList = &([_][]Symbol{ast.symbols}); + var symbol_map = js_ast.Symbol.Map.initList(symbols); + + if (log.msgs.items.len > 0) { + debugl("PRINT LOG ERRORS"); + var fixedBuffer = [_]u8{0} ** 4096; + var stream = std.io.fixedBufferStream(&fixedBuffer); + + try log.print(stream.writer()); + std.debug.print("{s}", .{fixedBuffer}); + } + var linker = @import("linker.zig").Linker{}; + debugl("START AST PRINT"); + const result = js_printer.printAst(alloc.dynamic, ast, symbol_map, true, js_printer.Options{ .to_module_ref = res.ast.module_ref orelse Ref{ .inner_index = 0 } }, &linker) catch unreachable; + + std.testing.expectEqualStrings(contents, result.js); +} - const entryPointName = "/bacon/hello.js"; - const code = "for (let i = 0; i < 100; i++) { console.log(\"hi\");\n}"; - var parser = try Parser.init(try options.TransformOptions.initUncached(alloc.dynamic, entryPointName, code), alloc.dynamic); - const res = try parser.parse(); +test "expectPrint" { + try expectPrintedJS( + "const bacon = true; function hello() { return 100; }; hello();", + "const bacon = true; function hello() { return 100; }; hello();", + ); } diff --git a/src/js_printer.zig b/src/js_printer.zig index 71ca10402..beed8162d 100644 --- a/src/js_printer.zig +++ b/src/js_printer.zig @@ -302,6 +302,8 @@ pub fn NewPrinter(comptime ascii_only: bool) type { p.print("}"); } pub fn printDecls(p: *Printer, keyword: string, decls: []G.Decl, flags: ExprFlag) void { + debug("<printDecls>\n {s}", .{decls}); + defer debug("</printDecls>", .{}); p.print(keyword); p.printSpace(); @@ -330,6 +332,8 @@ pub fn NewPrinter(comptime ascii_only: bool) type { pub fn addSourceMapping(p: *Printer, loc: logger.Loc) void {} pub fn printSymbol(p: *Printer, ref: Ref) void { + debug("<printSymbol>\n {s}", .{ref}); + defer debugl("</printSymbol>"); const name = p.renamer.nameForSymbol(ref); p.printIdentifier(name); @@ -781,6 +785,8 @@ pub fn NewPrinter(comptime ascii_only: bool) type { pub fn printExpr(p: *Printer, expr: Expr, level: Level, _flags: ExprFlag) void { p.addSourceMapping(expr.loc); var flags = _flags; + debugl("<printExpr>"); + defer debugl("</printExpr>"); switch (expr.data) { .e_missing => |e| {}, @@ -1106,6 +1112,7 @@ pub fn NewPrinter(comptime ascii_only: bool) type { .e_function => |e| { const n = p.js.lenI(); var wrap = p.stmt_start == n or p.export_default_start == n; + if (wrap) { p.print("("); } @@ -1123,6 +1130,7 @@ pub fn NewPrinter(comptime ascii_only: bool) type { if (e.func.name) |sym| { p.printSymbol(sym.ref orelse std.debug.panic("internal error: expected E.Function's name symbol to have a ref\n{s}", .{e.func})); } + p.printFunc(e.func); if (wrap) { p.print(")"); @@ -1592,6 +1600,8 @@ pub fn NewPrinter(comptime ascii_only: bool) type { } pub fn printProperty(p: *Printer, item: G.Property) void { + debugl("<printProperty>"); + defer debugl("</printProperty>"); if (item.kind == .spread) { p.print("..."); p.printExpr(item.value.?, .comma, ExprFlag.None()); @@ -1748,6 +1758,8 @@ pub fn NewPrinter(comptime ascii_only: bool) type { } pub fn printBinding(p: *Printer, binding: Binding) void { + debug("<printBinding>\n {s}", .{binding}); + defer debugl("</printBinding>"); p.addSourceMapping(binding.loc); switch (binding.data) { @@ -1903,6 +1915,8 @@ pub fn NewPrinter(comptime ascii_only: bool) type { } pub fn printStmt(p: *Printer, stmt: Stmt) !void { + debug("<printStmt>: {s}\n", .{stmt}); + defer debug("</printStmt>: {s}\n", .{stmt}); p.comptime_flush(); p.addSourceMapping(stmt.loc); @@ -2682,9 +2696,7 @@ pub fn NewPrinter(comptime ascii_only: bool) type { .js = js, .writer = js.writer(), .linker = linker, - .renamer = rename.Renamer{ - .symbols = symbols, - }, + .renamer = rename.Renamer.init(symbols), }; } }; diff --git a/src/json_parser.zig b/src/json_parser.zig index 8b371d4e2..9c0257899 100644 --- a/src/json_parser.zig +++ b/src/json_parser.zig @@ -36,9 +36,9 @@ const Level = js_ast.Op.Level; const Op = js_ast.Op; const Scope = js_ast.Scope; const locModuleScope = logger.Loc.Empty; +const Lexer = js_lexer.Lexer; fn JSONLikeParser(opts: js_lexer.JSONOptions) type { - const Lexer = if (opts.allow_comments) js_lexer.TSConfigJSONLexer else js_lexer.JSONLexer; return struct { lexer: Lexer, source: *logger.Source, @@ -46,12 +46,21 @@ fn JSONLikeParser(opts: js_lexer.JSONOptions) type { allocator: *std.mem.Allocator, pub fn init(allocator: *std.mem.Allocator, source: *logger.Source, log: *logger.Log) !Parser { - return Parser{ - .lexer = try Lexer.init(log, source, allocator), - .allocator = allocator, - .log = log, - .source = source, - }; + if (opts.allow_comments) { + return Parser{ + .lexer = try Lexer.initTSConfig(log, source, allocator), + .allocator = allocator, + .log = log, + .source = source, + }; + } else { + return Parser{ + .lexer = try Lexer.initJSON(log, source, allocator), + .allocator = allocator, + .log = log, + .source = source, + }; + } } const Parser = @This(); @@ -217,6 +226,7 @@ const duplicateKeyJson = "{ \"name\": \"valid\", \"name\": \"invalid\" }"; const js_printer = @import("js_printer.zig"); const renamer = @import("renamer.zig"); +const SymbolList = [][]Symbol; fn expectPrintedJSON(_contents: string, expected: string) void { if (alloc.dynamic_manager == null) { @@ -240,7 +250,8 @@ fn expectPrintedJSON(_contents: string, expected: string) void { .stmts = &([_]Stmt{stmt}), }; const tree = js_ast.Ast.initTest(&([_]js_ast.Part{part})); - var symbol_map = Symbol.Map{}; + var symbols: SymbolList = &([_][]Symbol{tree.symbols}); + var symbol_map = js_ast.Symbol.Map.initList(symbols); if (log.msgs.items.len > 0) { std.debug.panic("--FAIL--\nExpr {s}\nLog: {s}\n--FAIL--", .{ expr, log.msgs.items[0].data.text }); } diff --git a/src/logger.zig b/src/logger.zig index a914f4741..e8ced7156 100644 --- a/src/logger.zig +++ b/src/logger.zig @@ -53,6 +53,7 @@ pub const Location = struct { length: usize = 0, // in bytes line_text: ?string = null, suggestion: ?string = null, + offset: usize = 0, pub fn init(file: []u8, namespace: []u8, line: i32, column: i32, length: u32, line_text: ?[]u8, suggestion: ?[]u8) Location { return Location{ @@ -63,6 +64,7 @@ pub const Location = struct { .length = length, .line_text = line_text, .suggestion = suggestion, + .offset = length, }; } @@ -76,6 +78,7 @@ pub const Location = struct { .column = usize2Loc(data.column_count).start, .length = source.contents.len, .line_text = source.contents[data.line_start..data.line_end], + .offset = @intCast(usize, std.math.max(r.loc.start, 0)), }; } else { return null; @@ -104,11 +107,27 @@ pub const Msg = struct { data: Data, notes: ?[]Data = null, pub fn doFormat(msg: *const Msg, to: anytype, formatterFunc: @TypeOf(std.fmt.format)) !void { - try formatterFunc(to, "\n\n{s}: {s}\n{s}\n{s}:{}:{}", .{ msg.kind.string(), msg.data.text, msg.data.location.?.line_text, msg.data.location.?.file, msg.data.location.?.line, msg.data.location.?.column }); + try formatterFunc(to, "\n\n{s}: {s}\n{s}\n{s}:{}:{} {d}", .{ + msg.kind.string(), + msg.data.text, + msg.data.location.?.line_text, + msg.data.location.?.file, + msg.data.location.?.line, + msg.data.location.?.column, + msg.data.location.?.offset, + }); } pub fn formatNoWriter(msg: *const Msg, comptime formatterFunc: @TypeOf(std.debug.panic)) void { - formatterFunc("\n\n{s}: {s}\n{s}\n{s}:{}:{}", .{ msg.kind.string(), msg.data.text, msg.data.location.?.line_text, msg.data.location.?.file, msg.data.location.?.line, msg.data.location.?.column }); + formatterFunc("\n\n{s}: {s}\n{s}\n{s}:{}:{} ({d})", .{ + msg.kind.string(), + msg.data.text, + msg.data.location.?.line_text, + msg.data.location.?.file, + msg.data.location.?.line, + msg.data.location.?.column, + msg.data.location.?.offset, + }); } }; @@ -270,7 +289,12 @@ pub const Source = struct { // symbol for an "export default" statement will be called "util_default". identifier_name: string, - pub const ErrorPosition = struct { line_start: usize, line_end: usize, column_count: usize, line_count: usize }; + pub const ErrorPosition = struct { + line_start: usize, + line_end: usize, + column_count: usize, + line_count: usize, + }; pub fn initFile(file: fs.File, allocator: *std.mem.Allocator) Source { var name = file.path.name; diff --git a/src/main.zig b/src/main.zig index 488cb903f..0b8437253 100644 --- a/src/main.zig +++ b/src/main.zig @@ -39,6 +39,7 @@ pub fn main() anyerror!void { .value = js_ast.StmtOrExpr{ .expr = expr }, .default_name = js_ast.LocRef{ .loc = logger.Loc{}, .ref = Ref{} }, }, logger.Loc{ .start = 0 }); + var part = js_ast.Part{ .stmts = &([_]js_ast.Stmt{stmt}), }; @@ -56,10 +57,11 @@ pub fn main() anyerror!void { } var _linker = linker.Linker{}; + var symbols: [][]js_ast.Symbol = &([_][]js_ast.Symbol{ast.symbols}); const printed = try js_printer.printAst( alloc.dynamic, ast, - js_ast.Symbol.Map{}, + js_ast.Symbol.Map.initList(symbols), false, js_printer.Options{ .to_module_ref = js_ast.Ref{ .inner_index = 0 } }, &_linker, diff --git a/src/string_immutable.zig b/src/string_immutable.zig index 116c17c00..d0dffd49e 100644 --- a/src/string_immutable.zig +++ b/src/string_immutable.zig @@ -237,3 +237,38 @@ pub fn containsNonBmpCodePointUTF16(_text: JavascriptString) bool { return false; } + +/// Super simple "perfect hash" algorithm +/// Only really useful for switching on strings +// TODO: can we auto detect and promote the underlying type? +pub fn ExactSizeMatcher(comptime max_bytes: usize) type { + const T = std.meta.Int( + .unsigned, + max_bytes * 8, + ); + + return struct { + pub fn match(str: anytype) T { + return hash(str) orelse std.math.maxInt(T); + } + + pub fn case(comptime str: []const u8) T { + return hash(str) orelse std.math.maxInt(T); + } + + fn hash(str: anytype) ?T { + // if (str.len > max_bytes) return null; + var tmp = [_]u8{0} ** max_bytes; + std.mem.copy(u8, &tmp, str); + return std.mem.readIntNative(T, &tmp); + } + }; +} + +const eight = ExactSizeMatcher(8); + +test "ExactSizeMatcher" { + const word = "yield"; + expect(eight.match(word) == eight.case("yield")); + expect(eight.match(word) != eight.case("yields")); +} |