From cabe773a4f0a12e411f9f3c9698da6bbd90ec474 Mon Sep 17 00:00:00 2001 From: Jarred Sumner Date: Wed, 28 Apr 2021 21:58:02 -0700 Subject: wip Former-commit-id: b37acf309c8f42d49dc47eea446f89a3dbe9f6e2 --- src/ast/base.zig | 10 +- src/defines.zig | 155 ++- src/global_name_parser.zig | 0 src/js_ast.zig | 93 +- src/js_lexer.zig | 2370 +++++++++++++++++++++++--------------------- src/js_lexer_tables.zig | 26 +- src/js_parser.zig | 1314 +++++++++++++++++++----- src/js_printer.zig | 18 +- src/json_parser.zig | 27 +- src/logger.zig | 30 +- src/main.zig | 4 +- src/string_immutable.zig | 35 + 12 files changed, 2615 insertions(+), 1467 deletions(-) create mode 100644 src/global_name_parser.zig (limited to 'src') diff --git a/src/ast/base.zig b/src/ast/base.zig index f941e3745..1b2e3f087 100644 --- a/src/ast/base.zig +++ b/src/ast/base.zig @@ -27,7 +27,7 @@ pub const Ref = packed struct { inner_index: Int = 0, // 2 bits of padding for whatever is the parent - pub const Int = u31; + pub const Int = u30; pub const None = Ref{ .inner_index = std.math.maxInt(Ref.Int) }; pub fn isNull(self: *const Ref) bool { return self.source_index == std.math.maxInt(Ref.Int) and self.inner_index == std.math.maxInt(Ref.Int); @@ -55,3 +55,11 @@ pub const RequireOrImportMeta = struct { exports_ref: Ref = Ref.None, is_wrapper_async: bool = false, }; +pub fn debug(comptime fmt: []const u8, args: anytype) callconv(.Inline) void { + // std.debug.print(fmt, args); +} +pub fn debugl( + comptime fmt: []const u8, +) callconv(.Inline) void { + // std.debug.print("{s}\n", .{fmt}); +} diff --git a/src/defines.zig b/src/defines.zig index 6021df57f..a27956839 100644 --- a/src/defines.zig +++ b/src/defines.zig @@ -1,12 +1,159 @@ const std = @import("std"); const js_ast = @import("./js_ast.zig"); +const alloc = @import("alloc.zig"); + +usingnamespace @import("strings.zig"); const GlobalDefinesKey = @import("./defines-table.zig").GlobalDefinesKey; -pub const defaultIdentifierDefines = comptime {}; +const Globals = struct { + pub const Undefined = js_ast.E.Undefined{}; + pub const UndefinedPtr = &Globals.Undefined; + + pub const NaN = js_ast.E.Number{ .value = std.math.nan(f64) }; + pub const NanPtr = &Globals.NaN; + + pub const Infinity = js_ast.E.Number{ .value = std.math.inf(f64) }; + pub const InfinityPtr = &Globals.Infinity; +}; + +pub const DefineData = struct { + value: js_ast.Expr.Data = DefaultValue, + + // True if accessing this value is known to not have any side effects. For + // example, a bare reference to "Object.create" can be removed because it + // does not have any observable side effects. + can_be_removed_if_unused: bool = false, + + // True if a call to this value is known to not have any side effects. For + // example, a bare call to "Object()" can be removed because it does not + // have any observable side effects. + call_can_be_unwrapped_if_unused: bool = false, + + pub const DefaultValue = js_ast.Expr.Data{ .e_undefined = Globals.UndefinedPtr }; + + // All the globals have the same behavior. + // So we can create just one struct for it. + pub const GlobalDefineData = DefineData{}; + + pub fn merge(a: DefineData, b: DefineData) DefineData { + return DefineData{ + .value = b.value, + .can_be_removed_if_unsued = a.can_be_removed_if_unsued, + .call_can_be_unwrapped_if_unused = a.call_can_be_unwrapped_if_unused, + }; + } +}; + +fn arePartsEqual(a: []string, b: []string) bool { + if (a.len != b.len) { + return false; + } + + var i: usize = 0; + while (i < a.len) : (i += 1) { + if (!strings.eql(a[i], b[i])) { + return false; + } + } + + return true; +} + +pub const IdentifierDefine = DefineData; + +pub const DotDefine = struct { + parts: []string, + data: DefineData, +}; + +pub const Define = struct { + identifiers: std.StringHashMapUnmanaged(IdentifierDefine), + dots: std.StringHashMapUnmanaged([]DotDefine), + allocator: *std.mem.Allocator, + + pub fn init(allocator: *std.mem.Allocator, user_defines: std.StringHashMap(DefineData)) !*@This() { + var define = try allocator.create(Define); + define.allocator = allocator; + try define.identifiers.ensureCapacity(allocator, 641); + try define.dots.ensureCapacity(allocator, 38); + + // Step 1. Load the globals into the hash tables + for (GlobalDefinesKey) |global| { + if (global.len == 1) { + // TODO: when https://github.com/ziglang/zig/pull/8596 is merged, switch to putAssumeCapacityNoClobber + define.identifiers.putAssumeCapacity(global[0], IdentifierDefine.GlobalDefineData); + } else { + // TODO: when https://github.com/ziglang/zig/pull/8596 is merged, switch to putAssumeCapacityNoClobber + define.dots.putAssumeCapacity(global[global.len - 1], DotDefine{ + .parts = global[0 .. global.len - 1], + .data = DefineData.GlobalDefineData, + }); + } + } + + // Step 2. Swap in certain literal values because those can be constant folded + define.identifiers.putAssumeCapacity("undefined", DefineData{ + .value = js_ast.Expr.Data{ .e_undefined = Globals.UndefinedPtr }, + }); + define.identifiers.putAssumeCapacity("NaN", DefineData{ + .value = js_ast.Expr.Data{ .e_number = Globals.NanPtr }, + }); + define.identifiers.putAssumeCapacity("Infinity", DefineData{ + .value = js_ast.Expr.Data{ .e_number = Globals.InfinityPtr }, + }); + + // Step 3. Load user data into hash tables + // At this stage, user data has already been validated. + if (user_defines.count() > 0) { + var iter = user_defines.iterator(); + while (iter.next()) |user_define| { + // If it has a dot, then it's a DotDefine. + // e.g. process.env.NODE_ENV + if (strings.lastIndexOfChar(user_define.key, '.')) |last_dot| { + const tail = user_define.key[last_dot + 1 .. user_define.key.len]; + const parts = std.mem.tokenize(user_define.key[0..last_dot], ".").rest(); + var didFind = false; + var initial_values = &([_]DotDefine{}); + + // "NODE_ENV" + if (define.dots.getEntry()) |entry| { + for (entry.value) |*part| { + // ["process", "env"] == ["process", "env"] + if (arePartsEqual(part.parts, parts)) { + part.data = part.data.merge(user_define.value); + didFind = true; + break; + } + } + + initial_values = entry.value; + } + + if (!didFind) { + var list = try std.ArrayList(DotDefine).initCapacity(allocator, initial_values.len + 1); + if (initial_values.len > 0) { + list.appendSliceAssumeCapacity(initial_values); + } -pub const IdentifierDefine = struct {}; + list.appendAssumeCapacity(DotDefine{ + .data = user_define.value, + // TODO: do we need to allocate this? + .parts = parts, + }); + try define.dots.put(allocator, tail, list.toOwnedSlice()); + } + } else { + // IS_BROWSER + try define.identifiers.put(user_define.key, user_define.value); + } + } + } -pub const DotDefine = struct {}; + return define; + } +}; -pub const Defines = struct {}; +test "defines" { + +} diff --git a/src/global_name_parser.zig b/src/global_name_parser.zig new file mode 100644 index 000000000..e69de29bb diff --git a/src/js_ast.zig b/src/js_ast.zig index 9602f42ab..4701020fd 100644 --- a/src/js_ast.zig +++ b/src/js_ast.zig @@ -93,6 +93,73 @@ pub const Binding = struct { loc: logger.Loc, data: B, + pub fn ToExpr(comptime expr_type: type, comptime func_type: anytype) type { + const ExprType = expr_type; + return struct { + context: *ExprType, + allocator: *std.mem.Allocator, + pub const Context = @This(); + + pub fn wrapIdentifier(ctx: *const Context, loc: logger.Loc, ref: Ref) Expr { + return func_type(ctx.context, loc, ref); + } + + pub fn init(context: *ExprType) Context { + return Context{ .context = context, .allocator = context.allocator }; + } + }; + } + + pub fn toExpr(binding: *const Binding, wrapper: anytype) Expr { + var loc = binding.loc; + + switch (binding.data) { + .b_missing => { + return Expr.alloc(wrapper.allocator, E.Missing{}, loc); + }, + + .b_identifier => |b| { + return wrapper.wrapIdentifier(loc, b.ref); + }, + .b_array => |b| { + var exprs = wrapper.allocator.alloc(Expr, b.items.len) catch unreachable; + var i: usize = 0; + while (i < exprs.len) : (i += 1) { + const item = b.items[i]; + exprs[i] = convert: { + const expr = toExpr(&item.binding, wrapper); + if (b.has_spread and i == exprs.len - 1) { + break :convert Expr.alloc(wrapper.allocator, E.Spread{ .value = expr }, expr.loc); + } else if (item.default_value) |default| { + break :convert Expr.assign(expr, default, wrapper.allocator); + } else { + break :convert expr; + } + }; + } + + return Expr.alloc(wrapper.allocator, E.Array{ .items = exprs, .is_single_line = b.is_single_line }, loc); + }, + .b_object => |b| { + var properties = wrapper.allocator.alloc(G.Property, b.properties.len) catch unreachable; + var i: usize = 0; + while (i < properties.len) : (i += 1) { + const item = b.properties[i]; + properties[i] = G.Property{ + .flags = item.flags, + .kind = if (item.flags.is_spread) G.Property.Kind.spread else G.Property.Kind.normal, + .value = toExpr(&item.value, wrapper), + .initializer = item.default_value, + }; + } + return Expr.alloc(wrapper.allocator, E.Object{ .properties = properties, .is_single_line = b.is_single_line }, loc); + }, + else => { + std.debug.panic("Interanl error", .{}); + }, + } + } + pub const Tag = packed enum { b_identifier, b_array, @@ -511,7 +578,7 @@ pub const Symbol = struct { // single inner array, so you can join the maps together by just make a // single outer array containing all of the inner arrays. See the comment on // "Ref" for more detail. - symbols_for_source: [][]Symbol = undefined, + symbols_for_source: [][]Symbol, pub fn get(self: *Map, ref: Ref) ?Symbol { return self.symbols_for_source[ref.source_index][ref.inner_index]; @@ -522,6 +589,10 @@ pub const Symbol = struct { return Map{ .symbols_for_source = symbols_for_source }; } + pub fn initList(list: [][]Symbol) Map { + return Map{ .symbols_for_source = list }; + } + pub fn follow(symbols: *Map, ref: Ref) Ref { if (symbols.get(ref)) |*symbol| { const link = symbol.link orelse return ref; @@ -1974,12 +2045,11 @@ pub const Expr = struct { } }; - pub fn assign(a: *Expr, b: *Expr, allocator: *std.mem.Allocator) Expr { - std.debug.assert(a != b); + pub fn assign(a: Expr, b: Expr, allocator: *std.mem.Allocator) Expr { return alloc(allocator, E.Binary{ .op = .bin_assign, - .left = a.*, - .right = b.*, + .left = a, + .right = b, }, a.loc); } pub fn at(expr: *Expr, t: anytype, allocator: *std.mem.allocator) callconv(.Inline) Expr { @@ -2061,15 +2131,13 @@ pub const Expr = struct { return null; } - pub fn assignStmt(a: *Expr, b: *Expr, allocator: *std.mem.Allocator) Stmt { + pub fn assignStmt(a: Expr, b: Expr, allocator: *std.mem.Allocator) Stmt { return Stmt.alloc( allocator, S.SExpr{ - .op = .assign, - .left = a, - .right = b, + .value = Expr.assign(a, b, allocator), }, - loc, + a.loc, ); } @@ -2883,7 +2951,9 @@ pub const Scope = struct { } }; -pub fn printmem(comptime format: string, args: anytype) void {} +pub fn printmem(comptime format: string, args: anytype) void { + // std.debug.print(format, args); +} test "Binding.init" { var binding = Binding.alloc( @@ -3073,3 +3143,4 @@ test "Expr.init" { // Stmt | 192 // STry | 384 // -- ESBuild bit sizes + diff --git a/src/js_lexer.zig b/src/js_lexer.zig index bd018a254..a6625b3ff 100644 --- a/src/js_lexer.zig +++ b/src/js_lexer.zig @@ -8,8 +8,6 @@ const js_ast = @import("js_ast.zig"); usingnamespace @import("ast/base.zig"); usingnamespace @import("strings.zig"); -const _f = @import("./test/fixtures.zig"); - const unicode = std.unicode; const Source = logger.Source; @@ -21,1264 +19,1365 @@ pub const jsxEntity = tables.jsxEntity; pub const StrictModeReservedWords = tables.StrictModeReservedWords; pub const PropertyModifierKeyword = tables.PropertyModifierKeyword; pub const TypescriptStmtKeyword = tables.TypescriptStmtKeyword; +pub const TypeScriptAccessibilityModifier = tables.TypeScriptAccessibilityModifier; pub const JSONOptions = struct { allow_comments: bool = false, allow_trailing_commas: bool = false, }; -pub fn NewLexerType(comptime jsonOptions: ?JSONOptions) type { - return struct { - // pub const Error = error{ - // UnexpectedToken, - // EndOfFile, - // }; - - // err: ?@This().Error, - log: *logger.Log, - source: logger.Source, - current: usize = 0, - start: usize = 0, - end: usize = 0, - did_panic: bool = false, - approximate_newline_count: i32 = 0, - legacy_octal_loc: logger.Loc = logger.Loc.Empty, - previous_backslash_quote_in_jsx: logger.Range = logger.Range.None, - token: T = T.t_end_of_file, - has_newline_before: bool = false, - has_pure_comment_before: bool = false, - preserve_all_comments_before: bool = false, - is_legacy_octal_literal: bool = false, - comments_to_preserve_before: std.ArrayList(js_ast.G.Comment), - all_original_comments: ?[]js_ast.G.Comment = null, - code_point: CodePoint = -1, - string_literal: JavascriptString, - identifier: []const u8 = "", - jsx_factory_pragma_comment: ?js_ast.Span = null, - jsx_fragment_pragma_comment: ?js_ast.Span = null, - source_mapping_url: ?js_ast.Span = null, - number: f64 = 0.0, - rescan_close_brace_as_template_token: bool = false, - for_global_name: bool = false, - prev_error_loc: logger.Loc = logger.Loc.Empty, - allocator: *std.mem.Allocator, - - pub fn loc(self: *@This()) logger.Loc { - return logger.usize2Loc(self.start); +pub const Lexer = struct { + // pub const Error = error{ + // UnexpectedToken, + // EndOfFile, + // }; + + // err: ?@This().Error, + log: *logger.Log, + json_options: ?JSONOptions = null, + for_global_name: bool = false, + source: logger.Source, + current: usize = 0, + start: usize = 0, + end: usize = 0, + did_panic: bool = false, + approximate_newline_count: i32 = 0, + legacy_octal_loc: logger.Loc = logger.Loc.Empty, + previous_backslash_quote_in_jsx: logger.Range = logger.Range.None, + token: T = T.t_end_of_file, + has_newline_before: bool = false, + has_pure_comment_before: bool = false, + preserve_all_comments_before: bool = false, + is_legacy_octal_literal: bool = false, + comments_to_preserve_before: std.ArrayList(js_ast.G.Comment), + all_original_comments: ?[]js_ast.G.Comment = null, + code_point: CodePoint = -1, + string_literal: JavascriptString, + identifier: []const u8 = "", + jsx_factory_pragma_comment: ?js_ast.Span = null, + jsx_fragment_pragma_comment: ?js_ast.Span = null, + source_mapping_url: ?js_ast.Span = null, + number: f64 = 0.0, + rescan_close_brace_as_template_token: bool = false, + prev_error_loc: logger.Loc = logger.Loc.Empty, + allocator: *std.mem.Allocator, + + pub fn loc(self: *@This()) logger.Loc { + return logger.usize2Loc(self.start); + } + + fn nextCodepointSlice(it: *@This()) callconv(.Inline) ?[]const u8 { + if (it.current >= it.source.contents.len) { + return null; } - fn nextCodepointSlice(it: *@This()) callconv(.Inline) ?[]const u8 { - if (it.current >= it.source.contents.len) { - return null; - } + const cp_len = unicode.utf8ByteSequenceLength(it.source.contents[it.current]) catch unreachable; + it.end = it.current; + it.current += cp_len; - const cp_len = unicode.utf8ByteSequenceLength(it.source.contents[it.current]) catch unreachable; - it.end = it.current; - it.current += cp_len; + return it.source.contents[it.current - cp_len .. it.current]; + } - return it.source.contents[it.current - cp_len .. it.current]; - } + pub fn syntaxError(self: *@This()) void { + self.addError(self.start, "Syntax Error!!", .{}, true); + } - pub fn syntaxError(self: *@This()) void { - self.addError(self.start, "Syntax Error!!", .{}, true); - } + pub fn addDefaultError(self: *@This(), msg: []const u8) void { + self.addError(self.start, "{s}", .{msg}, true); + } - pub fn addDefaultError(self: *@This(), msg: []const u8) void { - self.addError(self.start, "{s}", .{msg}, true); + pub fn addError(self: *@This(), _loc: usize, comptime format: []const u8, args: anytype, panic: bool) void { + var __loc = logger.usize2Loc(_loc); + if (__loc.eql(self.prev_error_loc)) { + return; } - pub fn addError(self: *@This(), _loc: usize, comptime format: []const u8, args: anytype, panic: bool) void { - var __loc = logger.usize2Loc(_loc); - if (__loc.eql(self.prev_error_loc)) { - return; - } + self.log.addErrorFmt(self.source, __loc, self.allocator, format, args) catch unreachable; + self.prev_error_loc = __loc; + var msg = self.log.msgs.items[self.log.msgs.items.len - 1]; + msg.formatNoWriter(std.debug.panic); + } - self.log.addErrorFmt(self.source, __loc, self.allocator, format, args) catch unreachable; - self.prev_error_loc = __loc; - var msg = self.log.msgs.items[self.log.msgs.items.len - 1]; - msg.formatNoWriter(std.debug.panic); + pub fn addRangeError(self: *@This(), r: logger.Range, comptime format: []const u8, args: anytype, panic: bool) void { + if (self.prev_error_loc.eql(r.loc)) { + return; } - pub fn addRangeError(self: *@This(), r: logger.Range, comptime format: []const u8, args: anytype, panic: bool) void { - if (self.prev_error_loc.eql(r.loc)) { - return; - } + const errorMessage = std.fmt.allocPrint(self.allocator, format, args) catch unreachable; + var msg = self.log.addRangeError(self.source, r, errorMessage); + self.prev_error_loc = r.loc; - const errorMessage = std.fmt.allocPrint(self.allocator, format, args) catch unreachable; - var msg = self.log.addRangeError(self.source, r, errorMessage); - self.prev_error_loc = r.loc; + if (panic) { + var fixedBuffer = [_]u8{0} ** 8096; + var stream = std.io.fixedBufferStream(&fixedBuffer); + const writer = stream.writer(); + self.log.print(writer) catch unreachable; - if (panic) { - self.doPanic(errorMessage); - } + std.debug.panic("{s}", .{fixedBuffer[0..stream.pos]}); } + } - fn doPanic(self: *@This(), content: []const u8) void { - if (@import("builtin").is_test) { - self.did_panic = true; - } else { - std.debug.panic("{s}", .{content}); - } + fn doPanic(self: *@This(), content: []const u8) void { + if (@import("builtin").is_test) { + self.did_panic = true; + } else { + std.debug.panic("{s}", .{content}); } + } - pub fn codePointEql(self: *@This(), a: u8) bool { - return @intCast(CodePoint, a) == self.code_point; - } + pub fn codePointEql(self: *@This(), a: u8) bool { + return @intCast(CodePoint, a) == self.code_point; + } - fn nextCodepoint(it: *@This()) callconv(.Inline) CodePoint { - const slice = it.nextCodepointSlice() orelse return @as(CodePoint, -1); + fn nextCodepoint(it: *@This()) callconv(.Inline) CodePoint { + const slice = it.nextCodepointSlice() orelse return @as(CodePoint, -1); - switch (slice.len) { - 1 => return @as(CodePoint, slice[0]), - 2 => return @as(CodePoint, unicode.utf8Decode2(slice) catch unreachable), - 3 => return @as(CodePoint, unicode.utf8Decode3(slice) catch unreachable), - 4 => return @as(CodePoint, unicode.utf8Decode4(slice) catch unreachable), - else => unreachable, - } + switch (slice.len) { + 1 => return @as(CodePoint, slice[0]), + 2 => return @as(CodePoint, unicode.utf8Decode2(slice) catch unreachable), + 3 => return @as(CodePoint, unicode.utf8Decode3(slice) catch unreachable), + 4 => return @as(CodePoint, unicode.utf8Decode4(slice) catch unreachable), + else => unreachable, } + } - /// Look ahead at the next n codepoints without advancing the iterator. - /// If fewer than n codepoints are available, then return the remainder of the string. - fn peek(it: *@This(), n: usize) []const u8 { - const original_i = it.current; - defer it.current = original_i; - - var end_ix = original_i; - var found: usize = 0; - while (found < n) : (found += 1) { - const next_codepoint = it.nextCodepointSlice() orelse return it.source.contents[original_i..]; - end_ix += next_codepoint.len; - } - - return it.source.contents[original_i..end_ix]; + /// Look ahead at the next n codepoints without advancing the iterator. + /// If fewer than n codepoints are available, then return the remainder of the string. + fn peek(it: *@This(), n: usize) []const u8 { + const original_i = it.current; + defer it.current = original_i; + + var end_ix = original_i; + var found: usize = 0; + while (found < n) : (found += 1) { + const next_codepoint = it.nextCodepointSlice() orelse return it.source.contents[original_i..]; + end_ix += next_codepoint.len; } - pub fn isIdentifierOrKeyword(lexer: @This()) bool { - return @enumToInt(lexer.token) >= @enumToInt(T.t_identifier); - } + return it.source.contents[original_i..end_ix]; + } - fn parseStringLiteral(lexer: *@This()) void { - var quote: CodePoint = lexer.code_point; - var needs_slow_path = false; - var suffixLen: usize = 1; + pub fn isIdentifierOrKeyword(lexer: @This()) bool { + return @enumToInt(lexer.token) >= @enumToInt(T.t_identifier); + } - if (quote != '`') { - lexer.token = T.t_string_literal; - } else if (lexer.rescan_close_brace_as_template_token) { - lexer.token = T.t_template_tail; - } else { - lexer.token = T.t_no_substitution_template_literal; - } - lexer.step(); + fn parseStringLiteral(lexer: *@This()) void { + var quote: CodePoint = lexer.code_point; + var needs_slow_path = false; + var suffixLen: usize = 1; - stringLiteral: while (true) { - switch (lexer.code_point) { - '\\' => { - needs_slow_path = true; - lexer.step(); + if (quote != '`') { + lexer.token = T.t_string_literal; + } else if (lexer.rescan_close_brace_as_template_token) { + lexer.token = T.t_template_tail; + } else { + lexer.token = T.t_no_substitution_template_literal; + } + lexer.step(); + + stringLiteral: while (true) { + switch (lexer.code_point) { + '\\' => { + needs_slow_path = true; + lexer.step(); - // Handle Windows CRLF - if (lexer.code_point == '\r' and jsonOptions != null) { + // Handle Windows CRLF + if (lexer.code_point == '\r' and lexer.json_options != null) { + lexer.step(); + if (lexer.code_point == '\n') { lexer.step(); - if (lexer.code_point == '\n') { - lexer.step(); - } - continue :stringLiteral; } - }, - // This indicates the end of the file + continue :stringLiteral; + } + }, + // This indicates the end of the file - -1 => { - lexer.addDefaultError("Unterminated string literal"); - }, + -1 => { + lexer.addDefaultError("Unterminated string literal"); + }, - '\r' => { - if (quote != '`') { - lexer.addDefaultError("Unterminated string literal"); - } + '\r' => { + if (quote != '`') { + lexer.addDefaultError("Unterminated string literal"); + } - // Template literals require newline normalization - needs_slow_path = true; - }, + // Template literals require newline normalization + needs_slow_path = true; + }, - '\n' => { - if (quote != '`') { - lexer.addDefaultError("Unterminated string literal"); - } - }, + '\n' => { + if (quote != '`') { + lexer.addDefaultError("Unterminated string literal"); + } + }, - '$' => { - if (quote == '`') { + '$' => { + if (quote == '`') { + lexer.step(); + if (lexer.code_point == '{') { + suffixLen = 2; lexer.step(); - if (lexer.code_point == '{') { - suffixLen = 2; - lexer.step(); - if (lexer.rescan_close_brace_as_template_token) { - lexer.token = T.t_template_middle; - } else { - lexer.token = T.t_template_head; - } - break :stringLiteral; + if (lexer.rescan_close_brace_as_template_token) { + lexer.token = T.t_template_middle; + } else { + lexer.token = T.t_template_head; } - continue :stringLiteral; - } - }, - - else => { - if (quote == lexer.code_point) { - lexer.step(); break :stringLiteral; } - // Non-ASCII strings need the slow path - if (lexer.code_point >= 0x80) { - needs_slow_path = true; - } else if (jsonOptions != null and lexer.code_point < 0x20) { - lexer.syntaxError(); - } - }, - } - lexer.step(); - } + continue :stringLiteral; + } + }, - const text = lexer.source.contents[lexer.start + 1 .. lexer.end - suffixLen]; - if (needs_slow_path) { - lexer.string_literal = lexer.stringToUTF16(text); - } else { - lexer.string_literal = lexer.allocator.alloc(u16, text.len) catch unreachable; - var i: usize = 0; - for (text) |byte| { - lexer.string_literal[i] = byte; - i += 1; - } + else => { + if (quote == lexer.code_point) { + lexer.step(); + break :stringLiteral; + } + // Non-ASCII strings need the slow path + if (lexer.code_point >= 0x80) { + needs_slow_path = true; + } else if (lexer.json_options != null and lexer.code_point < 0x20) { + lexer.syntaxError(); + } + }, } + lexer.step(); + } - if (quote == '\'' and jsonOptions != null) { - lexer.addRangeError(lexer.range(), "JSON strings must use double quotes", .{}, true); + const text = lexer.source.contents[lexer.start + 1 .. lexer.end - suffixLen]; + if (needs_slow_path) { + lexer.string_literal = lexer.stringToUTF16(text); + } else { + lexer.string_literal = lexer.allocator.alloc(u16, text.len) catch unreachable; + var i: usize = 0; + for (text) |byte| { + lexer.string_literal[i] = byte; + i += 1; } - // for (text) - // // if (needs_slow_path) { - // // // Slow path - - // // // lexer.string_literal = lexer.(lexer.start + 1, text); - // // } else { - // // // Fast path - - // // } } - fn step(lexer: *@This()) void { - lexer.code_point = lexer.nextCodepoint(); - - // Track the approximate number of newlines in the file so we can preallocate - // the line offset table in the printer for source maps. The line offset table - // is the #1 highest allocation in the heap profile, so this is worth doing. - // This count is approximate because it handles "\n" and "\r\n" (the common - // cases) but not "\r" or "\u2028" or "\u2029". Getting this wrong is harmless - // because it's only a preallocation. The array will just grow if it's too small. - if (lexer.code_point == '\n') { - lexer.approximate_newline_count += 1; - } + if (quote == '\'' and lexer.json_options != null) { + lexer.addRangeError(lexer.range(), "JSON strings must use double quotes", .{}, true); } + // for (text) + // // if (needs_slow_path) { + // // // Slow path - pub fn expect(self: *@This(), comptime token: T) void { - if (self.token != token) { - self.expected(token); - } + // // // lexer.string_literal = lexer.(lexer.start + 1, text); + // // } else { + // // // Fast path - self.next(); - } + // // } + } - pub fn expectOrInsertSemicolon(lexer: *@This()) void { - if (lexer.token == T.t_semicolon or (!lexer.has_newline_before and - lexer.token != T.t_close_brace and lexer.token != T.t_end_of_file)) - { - lexer.expect(T.t_semicolon); - } + fn step(lexer: *@This()) void { + lexer.code_point = lexer.nextCodepoint(); + + // Track the approximate number of newlines in the file so we can preallocate + // the line offset table in the printer for source maps. The line offset table + // is the #1 highest allocation in the heap profile, so this is worth doing. + // This count is approximate because it handles "\n" and "\r\n" (the common + // cases) but not "\r" or "\u2028" or "\u2029". Getting this wrong is harmless + // because it's only a preallocation. The array will just grow if it's too small. + if (lexer.code_point == '\n') { + lexer.approximate_newline_count += 1; } + } - pub fn addUnsupportedSyntaxError(self: *@This(), msg: []const u8) void { - self.addError(self.end, "Unsupported syntax: {s}", .{msg}, true); + pub fn expect(self: *@This(), comptime token: T) void { + if (self.token != token) { + self.expected(token); } - pub fn scanIdentifierWithEscapes(self: *@This()) void { - self.addUnsupportedSyntaxError("escape sequence"); - return; + self.next(); + } + + pub fn expectOrInsertSemicolon(lexer: *@This()) void { + if (lexer.token == T.t_semicolon or (!lexer.has_newline_before and + lexer.token != T.t_close_brace and lexer.token != T.t_end_of_file)) + { + lexer.expect(T.t_semicolon); } + } + + pub fn addUnsupportedSyntaxError(self: *@This(), msg: []const u8) void { + self.addError(self.end, "Unsupported syntax: {s}", .{msg}, true); + } + + pub fn scanIdentifierWithEscapes(self: *@This()) void { + self.addUnsupportedSyntaxError("escape sequence"); + return; + } - pub fn debugInfo(self: *@This()) void { - if (self.log.errors > 0) { - const stderr = std.io.getStdErr().writer(); - self.log.print(stderr) catch unreachable; + pub fn debugInfo(self: *@This()) void { + if (self.log.errors > 0) { + const stderr = std.io.getStdErr().writer(); + self.log.print(stderr) catch unreachable; + } else { + if (self.token == T.t_identifier or self.token == T.t_string_literal) { + std.debug.print(" {s} ", .{self.raw()}); } else { - if (self.token == T.t_identifier or self.token == T.t_string_literal) { - std.debug.print(" {s} ", .{self.raw()}); - } else { - std.debug.print(" <{s}> ", .{tokenToString.get(self.token)}); - } + std.debug.print(" <{s}> ", .{tokenToString.get(self.token)}); } } + } - pub fn expectContextualKeyword(self: *@This(), comptime keyword: string) void { - if (!self.isContextualKeyword(keyword)) { - self.addError(self.start, "\"{s}\"", .{keyword}, true); - } - self.next(); + pub fn expectContextualKeyword(self: *@This(), comptime keyword: string) void { + if (!self.isContextualKeyword(keyword)) { + self.addError(self.start, "\"{s}\"", .{keyword}, true); } + self.next(); + } - pub fn next(lexer: *@This()) void { - lexer.has_newline_before = lexer.end == 0; + pub fn next(lexer: *@This()) void { + lexer.has_newline_before = lexer.end == 0; - lex: while (true) { - lexer.start = lexer.end; - lexer.token = T.t_end_of_file; + lex: while (true) { + lexer.start = lexer.end; + lexer.token = T.t_end_of_file; - switch (lexer.code_point) { - -1 => { - lexer.token = T.t_end_of_file; - }, + switch (lexer.code_point) { + -1 => { + lexer.token = T.t_end_of_file; + }, - '#' => { - if (lexer.start == 0 and lexer.source.contents[1] == '!') { - lexer.addUnsupportedSyntaxError("#!hashbang is not supported yet."); - return; - } + '#' => { + if (lexer.start == 0 and lexer.source.contents[1] == '!') { + lexer.addUnsupportedSyntaxError("#!hashbang is not supported yet."); + return; + } + + lexer.step(); + if (!isIdentifierStart(lexer.code_point)) { + lexer.syntaxError(); + } + lexer.step(); + if (isIdentifierStart(lexer.code_point)) { lexer.step(); - if (!isIdentifierStart(lexer.code_point)) { - lexer.syntaxError(); + while (isIdentifierContinue(lexer.code_point)) { + lexer.step(); } - lexer.step(); - - if (isIdentifierStart(lexer.code_point)) { + if (lexer.code_point == '\\') { + lexer.scanIdentifierWithEscapes(); + lexer.token = T.t_private_identifier; + // lexer.Identifier, lexer.Token = lexer.scanIdentifierWithEscapes(normalIdentifier); + } else { + lexer.token = T.t_private_identifier; + lexer.identifier = lexer.raw(); + } + break; + } + }, + '\r', '\n', 0x2028, 0x2029 => { + lexer.step(); + lexer.has_newline_before = true; + continue; + }, + '\t', ' ' => { + lexer.step(); + continue; + }, + '(' => { + lexer.step(); + lexer.token = T.t_open_paren; + }, + ')' => { + lexer.step(); + lexer.token = T.t_close_paren; + }, + '[' => { + lexer.step(); + lexer.token = T.t_open_bracket; + }, + ']' => { + lexer.step(); + lexer.token = T.t_close_bracket; + }, + '{' => { + lexer.step(); + lexer.token = T.t_open_brace; + }, + '}' => { + lexer.step(); + lexer.token = T.t_close_brace; + }, + ',' => { + lexer.step(); + lexer.token = T.t_comma; + }, + ':' => { + lexer.step(); + lexer.token = T.t_colon; + }, + ';' => { + lexer.step(); + lexer.token = T.t_semicolon; + }, + '@' => { + lexer.step(); + lexer.token = T.t_at; + }, + '~' => { + lexer.step(); + lexer.token = T.t_tilde; + }, + '?' => { + // '?' or '?.' or '??' or '??=' + lexer.step(); + switch (lexer.code_point) { + '?' => { lexer.step(); - while (isIdentifierContinue(lexer.code_point)) { - lexer.step(); - } - if (lexer.code_point == '\\') { - lexer.scanIdentifierWithEscapes(); - lexer.token = T.t_private_identifier; - // lexer.Identifier, lexer.Token = lexer.scanIdentifierWithEscapes(normalIdentifier); - } else { - lexer.token = T.t_private_identifier; - lexer.identifier = lexer.raw(); + switch (lexer.code_point) { + '=' => { + lexer.step(); + lexer.token = T.t_question_question_equals; + }, + else => { + lexer.token = T.t_question_question; + }, } - break; - } - }, - '\r', '\n', 0x2028, 0x2029 => { - lexer.step(); - lexer.has_newline_before = true; - continue; - }, - '\t', ' ' => { - lexer.step(); - continue; - }, - '(' => { - lexer.step(); - lexer.token = T.t_open_paren; - }, - ')' => { - lexer.step(); - lexer.token = T.t_close_paren; - }, - '[' => { - lexer.step(); - lexer.token = T.t_open_bracket; - }, - ']' => { - lexer.step(); - lexer.token = T.t_close_bracket; - }, - '{' => { - lexer.step(); - lexer.token = T.t_open_brace; - }, - '}' => { - lexer.step(); - lexer.token = T.t_close_brace; - }, - ',' => { - lexer.step(); - lexer.token = T.t_comma; - }, - ':' => { - lexer.step(); - lexer.token = T.t_colon; - }, - ';' => { - lexer.step(); - lexer.token = T.t_semicolon; - }, - '@' => { - lexer.step(); - lexer.token = T.t_at; - }, - '~' => { - lexer.step(); - lexer.token = T.t_tilde; - }, - '?' => { - // '?' or '?.' or '??' or '??=' - lexer.step(); - switch (lexer.code_point) { - '?' => { - lexer.step(); - switch (lexer.code_point) { - '=' => { - lexer.step(); - lexer.token = T.t_question_question_equals; - }, - else => { - lexer.token = T.t_question_question; - }, - } - }, + }, - '.' => { - lexer.token = T.t_question; - const current = lexer.current; - const contents = lexer.source.contents; + '.' => { + lexer.token = T.t_question; + const current = lexer.current; + const contents = lexer.source.contents; - // Lookahead to disambiguate with 'a?.1:b' - if (current < contents.len) { - const c = contents[current]; - if (c < '0' or c > '9') { - lexer.step(); - lexer.token = T.t_question_dot; - } + // Lookahead to disambiguate with 'a?.1:b' + if (current < contents.len) { + const c = contents[current]; + if (c < '0' or c > '9') { + lexer.step(); + lexer.token = T.t_question_dot; } - }, - else => { - lexer.token = T.t_question; - }, - } - }, - '%' => { - // '%' or '%=' - lexer.step(); - switch (lexer.code_point) { - '=' => { - lexer.step(); - lexer.token = T.t_percent_equals; - }, - - else => { - lexer.token = T.t_percent; - }, - } - }, + } + }, + else => { + lexer.token = T.t_question; + }, + } + }, + '%' => { + // '%' or '%=' + lexer.step(); + switch (lexer.code_point) { + '=' => { + lexer.step(); + lexer.token = T.t_percent_equals; + }, - '&' => { - // '&' or '&=' or '&&' or '&&=' - lexer.step(); - switch (lexer.code_point) { - '=' => { - lexer.step(); - lexer.token = T.t_ampersand_equals; - }, + else => { + lexer.token = T.t_percent; + }, + } + }, - '&' => { - lexer.step(); - switch (lexer.code_point) { - '=' => { - lexer.step(); - lexer.token = T.t_ampersand_ampersand_equals; - }, - - else => { - lexer.token = T.t_ampersand_ampersand; - }, - } - }, - else => { - lexer.token = T.t_ampersand; - }, - } - }, - - '|' => { - - // '|' or '|=' or '||' or '||=' - lexer.step(); - switch (lexer.code_point) { - '=' => { - lexer.step(); - lexer.token = T.t_bar_equals; - }, - '|' => { - lexer.step(); - switch (lexer.code_point) { - '=' => { - lexer.step(); - lexer.token = T.t_bar_bar_equals; - }, + '&' => { + // '&' or '&=' or '&&' or '&&=' + lexer.step(); + switch (lexer.code_point) { + '=' => { + lexer.step(); + lexer.token = T.t_ampersand_equals; + }, - else => { - lexer.token = T.t_bar_bar; - }, - } - }, - else => { - lexer.token = T.t_bar; - }, - } - }, + '&' => { + lexer.step(); + switch (lexer.code_point) { + '=' => { + lexer.step(); + lexer.token = T.t_ampersand_ampersand_equals; + }, - '^' => { - // '^' or '^=' - lexer.step(); - switch (lexer.code_point) { - '=' => { - lexer.step(); - lexer.token = T.t_caret_equals; - }, + else => { + lexer.token = T.t_ampersand_ampersand; + }, + } + }, + else => { + lexer.token = T.t_ampersand; + }, + } + }, - else => { - lexer.token = T.t_caret; - }, - } - }, + '|' => { - '+' => { - // '+' or '+=' or '++' - lexer.step(); - switch (lexer.code_point) { - '=' => { - lexer.step(); - lexer.token = T.t_plus_equals; - }, + // '|' or '|=' or '||' or '||=' + lexer.step(); + switch (lexer.code_point) { + '=' => { + lexer.step(); + lexer.token = T.t_bar_equals; + }, + '|' => { + lexer.step(); + switch (lexer.code_point) { + '=' => { + lexer.step(); + lexer.token = T.t_bar_bar_equals; + }, - '+' => { - lexer.step(); - lexer.token = T.t_plus_plus; - }, + else => { + lexer.token = T.t_bar_bar; + }, + } + }, + else => { + lexer.token = T.t_bar; + }, + } + }, - else => { - lexer.token = T.t_plus; - }, - } - }, + '^' => { + // '^' or '^=' + lexer.step(); + switch (lexer.code_point) { + '=' => { + lexer.step(); + lexer.token = T.t_caret_equals; + }, - '-' => { - // '+' or '+=' or '++' - lexer.step(); - switch (lexer.code_point) { - '=' => { - lexer.step(); - lexer.token = T.t_minus_equals; - }, + else => { + lexer.token = T.t_caret; + }, + } + }, - '-' => { - lexer.step(); + '+' => { + // '+' or '+=' or '++' + lexer.step(); + switch (lexer.code_point) { + '=' => { + lexer.step(); + lexer.token = T.t_plus_equals; + }, - if (lexer.code_point == '>' and lexer.has_newline_before) { - lexer.step(); - lexer.log.addRangeWarning(lexer.source, lexer.range(), "Treating \"-->\" as the start of a legacy HTML single-line comment") catch unreachable; - - singleLineHTMLCloseComment: while (true) { - switch (lexer.code_point) { - '\r', '\n', 0x2028, 0x2029 => { - break :singleLineHTMLCloseComment; - }, - -1 => { - break :singleLineHTMLCloseComment; - }, - else => {}, - } - lexer.step(); - } - continue; - } + '+' => { + lexer.step(); + lexer.token = T.t_plus_plus; + }, - lexer.token = T.t_minus_minus; - }, + else => { + lexer.token = T.t_plus; + }, + } + }, - else => { - lexer.token = T.t_minus; - }, - } - }, + '-' => { + // '+' or '+=' or '++' + lexer.step(); + switch (lexer.code_point) { + '=' => { + lexer.step(); + lexer.token = T.t_minus_equals; + }, - '*' => { - // '*' or '*=' or '**' or '**=' + '-' => { + lexer.step(); - lexer.step(); - switch (lexer.code_point) { - '=' => { - lexer.step(); - lexer.token = .t_asterisk_equals; - }, - '*' => { + if (lexer.code_point == '>' and lexer.has_newline_before) { lexer.step(); - switch (lexer.code_point) { - '=' => { - lexer.step(); - lexer.token = .t_asterisk_asterisk_equals; - }, - else => { - lexer.token = .t_asterisk_asterisk; - }, - } - }, - else => { - lexer.token = .t_asterisk; - }, - } - }, - '/' => { - // '/' or '/=' or '//' or '/* ... */' - lexer.step(); - // TODO: forGlobalName + lexer.log.addRangeWarning(lexer.source, lexer.range(), "Treating \"-->\" as the start of a legacy HTML single-line comment") catch unreachable; - switch (lexer.code_point) { - '=' => { - lexer.step(); - lexer.token = .t_slash_equals; - }, - '/' => { - lexer.step(); - singleLineComment: while (true) { - lexer.step(); + singleLineHTMLCloseComment: while (true) { switch (lexer.code_point) { '\r', '\n', 0x2028, 0x2029 => { - break :singleLineComment; + break :singleLineHTMLCloseComment; }, -1 => { - break :singleLineComment; + break :singleLineHTMLCloseComment; }, else => {}, } + lexer.step(); } - - if (jsonOptions) |json| { - if (!json.allow_comments) { - lexer.addRangeError(lexer.range(), "JSON does not support comments", .{}, true); - return; - } - } - lexer.scanCommentText(); - continue; - }, - '*' => { - lexer.step(); - - multiLineComment: while (true) { - switch (lexer.code_point) { - '*' => { - lexer.step(); - if (lexer.code_point == '/') { - lexer.step(); - break :multiLineComment; - } - }, - '\r', '\n', 0x2028, 0x2029 => { - lexer.step(); - lexer.has_newline_before = true; - }, - -1 => { - lexer.start = lexer.end; - lexer.addError(lexer.start, "Expected \"*/\" to terminate multi-line comment", .{}, true); - }, - else => { - lexer.step(); - }, - } - } - if (jsonOptions) |json| { - if (!json.allow_comments) { - lexer.addRangeError(lexer.range(), "JSON does not support comments", .{}, true); - return; - } - } - lexer.scanCommentText(); continue; - }, - else => { - lexer.token = .t_slash; - }, - } - }, - - '=' => { - // '=' or '=>' or '==' or '===' - lexer.step(); - switch (lexer.code_point) { - '>' => { - lexer.step(); - lexer.token = T.t_equals_greater_than; - }, + } - '=' => { - lexer.step(); - switch (lexer.code_point) { - '=' => { - lexer.step(); - lexer.token = T.t_equals_equals_equals; - }, + lexer.token = T.t_minus_minus; + }, - else => { - lexer.token = T.t_equals_equals; - }, - } - }, + else => { + lexer.token = T.t_minus; + }, + } + }, - else => { - lexer.token = T.t_equals; - }, - } - }, + '*' => { + // '*' or '*=' or '**' or '**=' - '<' => { - // '<' or '<<' or '<=' or '<<=' or '