aboutsummaryrefslogtreecommitdiff
path: root/src/js_lexer.zig
diff options
context:
space:
mode:
authorGravatar Jarred Sumner <jarred@jarredsumner.com> 2021-04-28 21:58:02 -0700
committerGravatar Jarred Sumner <jarred@jarredsumner.com> 2021-04-28 21:58:02 -0700
commitcabe773a4f0a12e411f9f3c9698da6bbd90ec474 (patch)
treec20cfd9ba22c4ca999c850edb04012d94d72ccb5 /src/js_lexer.zig
parent435a6e9b187168d869024d1002951e4bfa76333a (diff)
downloadbun-cabe773a4f0a12e411f9f3c9698da6bbd90ec474.tar.gz
bun-cabe773a4f0a12e411f9f3c9698da6bbd90ec474.tar.zst
bun-cabe773a4f0a12e411f9f3c9698da6bbd90ec474.zip
wip
Former-commit-id: b37acf309c8f42d49dc47eea446f89a3dbe9f6e2
Diffstat (limited to 'src/js_lexer.zig')
-rw-r--r--src/js_lexer.zig2368
1 files changed, 1213 insertions, 1155 deletions
diff --git a/src/js_lexer.zig b/src/js_lexer.zig
index bd018a254..a6625b3ff 100644
--- a/src/js_lexer.zig
+++ b/src/js_lexer.zig
@@ -8,8 +8,6 @@ const js_ast = @import("js_ast.zig");
usingnamespace @import("ast/base.zig");
usingnamespace @import("strings.zig");
-const _f = @import("./test/fixtures.zig");
-
const unicode = std.unicode;
const Source = logger.Source;
@@ -21,1264 +19,1365 @@ pub const jsxEntity = tables.jsxEntity;
pub const StrictModeReservedWords = tables.StrictModeReservedWords;
pub const PropertyModifierKeyword = tables.PropertyModifierKeyword;
pub const TypescriptStmtKeyword = tables.TypescriptStmtKeyword;
+pub const TypeScriptAccessibilityModifier = tables.TypeScriptAccessibilityModifier;
pub const JSONOptions = struct {
allow_comments: bool = false,
allow_trailing_commas: bool = false,
};
-pub fn NewLexerType(comptime jsonOptions: ?JSONOptions) type {
- return struct {
- // pub const Error = error{
- // UnexpectedToken,
- // EndOfFile,
- // };
-
- // err: ?@This().Error,
- log: *logger.Log,
- source: logger.Source,
- current: usize = 0,
- start: usize = 0,
- end: usize = 0,
- did_panic: bool = false,
- approximate_newline_count: i32 = 0,
- legacy_octal_loc: logger.Loc = logger.Loc.Empty,
- previous_backslash_quote_in_jsx: logger.Range = logger.Range.None,
- token: T = T.t_end_of_file,
- has_newline_before: bool = false,
- has_pure_comment_before: bool = false,
- preserve_all_comments_before: bool = false,
- is_legacy_octal_literal: bool = false,
- comments_to_preserve_before: std.ArrayList(js_ast.G.Comment),
- all_original_comments: ?[]js_ast.G.Comment = null,
- code_point: CodePoint = -1,
- string_literal: JavascriptString,
- identifier: []const u8 = "",
- jsx_factory_pragma_comment: ?js_ast.Span = null,
- jsx_fragment_pragma_comment: ?js_ast.Span = null,
- source_mapping_url: ?js_ast.Span = null,
- number: f64 = 0.0,
- rescan_close_brace_as_template_token: bool = false,
- for_global_name: bool = false,
- prev_error_loc: logger.Loc = logger.Loc.Empty,
- allocator: *std.mem.Allocator,
-
- pub fn loc(self: *@This()) logger.Loc {
- return logger.usize2Loc(self.start);
+pub const Lexer = struct {
+ // pub const Error = error{
+ // UnexpectedToken,
+ // EndOfFile,
+ // };
+
+ // err: ?@This().Error,
+ log: *logger.Log,
+ json_options: ?JSONOptions = null,
+ for_global_name: bool = false,
+ source: logger.Source,
+ current: usize = 0,
+ start: usize = 0,
+ end: usize = 0,
+ did_panic: bool = false,
+ approximate_newline_count: i32 = 0,
+ legacy_octal_loc: logger.Loc = logger.Loc.Empty,
+ previous_backslash_quote_in_jsx: logger.Range = logger.Range.None,
+ token: T = T.t_end_of_file,
+ has_newline_before: bool = false,
+ has_pure_comment_before: bool = false,
+ preserve_all_comments_before: bool = false,
+ is_legacy_octal_literal: bool = false,
+ comments_to_preserve_before: std.ArrayList(js_ast.G.Comment),
+ all_original_comments: ?[]js_ast.G.Comment = null,
+ code_point: CodePoint = -1,
+ string_literal: JavascriptString,
+ identifier: []const u8 = "",
+ jsx_factory_pragma_comment: ?js_ast.Span = null,
+ jsx_fragment_pragma_comment: ?js_ast.Span = null,
+ source_mapping_url: ?js_ast.Span = null,
+ number: f64 = 0.0,
+ rescan_close_brace_as_template_token: bool = false,
+ prev_error_loc: logger.Loc = logger.Loc.Empty,
+ allocator: *std.mem.Allocator,
+
+ pub fn loc(self: *@This()) logger.Loc {
+ return logger.usize2Loc(self.start);
+ }
+
+ fn nextCodepointSlice(it: *@This()) callconv(.Inline) ?[]const u8 {
+ if (it.current >= it.source.contents.len) {
+ return null;
}
- fn nextCodepointSlice(it: *@This()) callconv(.Inline) ?[]const u8 {
- if (it.current >= it.source.contents.len) {
- return null;
- }
+ const cp_len = unicode.utf8ByteSequenceLength(it.source.contents[it.current]) catch unreachable;
+ it.end = it.current;
+ it.current += cp_len;
- const cp_len = unicode.utf8ByteSequenceLength(it.source.contents[it.current]) catch unreachable;
- it.end = it.current;
- it.current += cp_len;
+ return it.source.contents[it.current - cp_len .. it.current];
+ }
- return it.source.contents[it.current - cp_len .. it.current];
- }
+ pub fn syntaxError(self: *@This()) void {
+ self.addError(self.start, "Syntax Error!!", .{}, true);
+ }
- pub fn syntaxError(self: *@This()) void {
- self.addError(self.start, "Syntax Error!!", .{}, true);
- }
+ pub fn addDefaultError(self: *@This(), msg: []const u8) void {
+ self.addError(self.start, "{s}", .{msg}, true);
+ }
- pub fn addDefaultError(self: *@This(), msg: []const u8) void {
- self.addError(self.start, "{s}", .{msg}, true);
+ pub fn addError(self: *@This(), _loc: usize, comptime format: []const u8, args: anytype, panic: bool) void {
+ var __loc = logger.usize2Loc(_loc);
+ if (__loc.eql(self.prev_error_loc)) {
+ return;
}
- pub fn addError(self: *@This(), _loc: usize, comptime format: []const u8, args: anytype, panic: bool) void {
- var __loc = logger.usize2Loc(_loc);
- if (__loc.eql(self.prev_error_loc)) {
- return;
- }
+ self.log.addErrorFmt(self.source, __loc, self.allocator, format, args) catch unreachable;
+ self.prev_error_loc = __loc;
+ var msg = self.log.msgs.items[self.log.msgs.items.len - 1];
+ msg.formatNoWriter(std.debug.panic);
+ }
- self.log.addErrorFmt(self.source, __loc, self.allocator, format, args) catch unreachable;
- self.prev_error_loc = __loc;
- var msg = self.log.msgs.items[self.log.msgs.items.len - 1];
- msg.formatNoWriter(std.debug.panic);
+ pub fn addRangeError(self: *@This(), r: logger.Range, comptime format: []const u8, args: anytype, panic: bool) void {
+ if (self.prev_error_loc.eql(r.loc)) {
+ return;
}
- pub fn addRangeError(self: *@This(), r: logger.Range, comptime format: []const u8, args: anytype, panic: bool) void {
- if (self.prev_error_loc.eql(r.loc)) {
- return;
- }
+ const errorMessage = std.fmt.allocPrint(self.allocator, format, args) catch unreachable;
+ var msg = self.log.addRangeError(self.source, r, errorMessage);
+ self.prev_error_loc = r.loc;
- const errorMessage = std.fmt.allocPrint(self.allocator, format, args) catch unreachable;
- var msg = self.log.addRangeError(self.source, r, errorMessage);
- self.prev_error_loc = r.loc;
+ if (panic) {
+ var fixedBuffer = [_]u8{0} ** 8096;
+ var stream = std.io.fixedBufferStream(&fixedBuffer);
+ const writer = stream.writer();
+ self.log.print(writer) catch unreachable;
- if (panic) {
- self.doPanic(errorMessage);
- }
+ std.debug.panic("{s}", .{fixedBuffer[0..stream.pos]});
}
+ }
- fn doPanic(self: *@This(), content: []const u8) void {
- if (@import("builtin").is_test) {
- self.did_panic = true;
- } else {
- std.debug.panic("{s}", .{content});
- }
+ fn doPanic(self: *@This(), content: []const u8) void {
+ if (@import("builtin").is_test) {
+ self.did_panic = true;
+ } else {
+ std.debug.panic("{s}", .{content});
}
+ }
- pub fn codePointEql(self: *@This(), a: u8) bool {
- return @intCast(CodePoint, a) == self.code_point;
- }
+ pub fn codePointEql(self: *@This(), a: u8) bool {
+ return @intCast(CodePoint, a) == self.code_point;
+ }
- fn nextCodepoint(it: *@This()) callconv(.Inline) CodePoint {
- const slice = it.nextCodepointSlice() orelse return @as(CodePoint, -1);
+ fn nextCodepoint(it: *@This()) callconv(.Inline) CodePoint {
+ const slice = it.nextCodepointSlice() orelse return @as(CodePoint, -1);
- switch (slice.len) {
- 1 => return @as(CodePoint, slice[0]),
- 2 => return @as(CodePoint, unicode.utf8Decode2(slice) catch unreachable),
- 3 => return @as(CodePoint, unicode.utf8Decode3(slice) catch unreachable),
- 4 => return @as(CodePoint, unicode.utf8Decode4(slice) catch unreachable),
- else => unreachable,
- }
+ switch (slice.len) {
+ 1 => return @as(CodePoint, slice[0]),
+ 2 => return @as(CodePoint, unicode.utf8Decode2(slice) catch unreachable),
+ 3 => return @as(CodePoint, unicode.utf8Decode3(slice) catch unreachable),
+ 4 => return @as(CodePoint, unicode.utf8Decode4(slice) catch unreachable),
+ else => unreachable,
}
+ }
- /// Look ahead at the next n codepoints without advancing the iterator.
- /// If fewer than n codepoints are available, then return the remainder of the string.
- fn peek(it: *@This(), n: usize) []const u8 {
- const original_i = it.current;
- defer it.current = original_i;
-
- var end_ix = original_i;
- var found: usize = 0;
- while (found < n) : (found += 1) {
- const next_codepoint = it.nextCodepointSlice() orelse return it.source.contents[original_i..];
- end_ix += next_codepoint.len;
- }
-
- return it.source.contents[original_i..end_ix];
+ /// Look ahead at the next n codepoints without advancing the iterator.
+ /// If fewer than n codepoints are available, then return the remainder of the string.
+ fn peek(it: *@This(), n: usize) []const u8 {
+ const original_i = it.current;
+ defer it.current = original_i;
+
+ var end_ix = original_i;
+ var found: usize = 0;
+ while (found < n) : (found += 1) {
+ const next_codepoint = it.nextCodepointSlice() orelse return it.source.contents[original_i..];
+ end_ix += next_codepoint.len;
}
- pub fn isIdentifierOrKeyword(lexer: @This()) bool {
- return @enumToInt(lexer.token) >= @enumToInt(T.t_identifier);
- }
+ return it.source.contents[original_i..end_ix];
+ }
- fn parseStringLiteral(lexer: *@This()) void {
- var quote: CodePoint = lexer.code_point;
- var needs_slow_path = false;
- var suffixLen: usize = 1;
+ pub fn isIdentifierOrKeyword(lexer: @This()) bool {
+ return @enumToInt(lexer.token) >= @enumToInt(T.t_identifier);
+ }
- if (quote != '`') {
- lexer.token = T.t_string_literal;
- } else if (lexer.rescan_close_brace_as_template_token) {
- lexer.token = T.t_template_tail;
- } else {
- lexer.token = T.t_no_substitution_template_literal;
- }
- lexer.step();
+ fn parseStringLiteral(lexer: *@This()) void {
+ var quote: CodePoint = lexer.code_point;
+ var needs_slow_path = false;
+ var suffixLen: usize = 1;
- stringLiteral: while (true) {
- switch (lexer.code_point) {
- '\\' => {
- needs_slow_path = true;
- lexer.step();
+ if (quote != '`') {
+ lexer.token = T.t_string_literal;
+ } else if (lexer.rescan_close_brace_as_template_token) {
+ lexer.token = T.t_template_tail;
+ } else {
+ lexer.token = T.t_no_substitution_template_literal;
+ }
+ lexer.step();
- // Handle Windows CRLF
- if (lexer.code_point == '\r' and jsonOptions != null) {
+ stringLiteral: while (true) {
+ switch (lexer.code_point) {
+ '\\' => {
+ needs_slow_path = true;
+ lexer.step();
+
+ // Handle Windows CRLF
+ if (lexer.code_point == '\r' and lexer.json_options != null) {
+ lexer.step();
+ if (lexer.code_point == '\n') {
lexer.step();
- if (lexer.code_point == '\n') {
- lexer.step();
- }
- continue :stringLiteral;
}
- },
- // This indicates the end of the file
+ continue :stringLiteral;
+ }
+ },
+ // This indicates the end of the file
- -1 => {
- lexer.addDefaultError("Unterminated string literal");
- },
+ -1 => {
+ lexer.addDefaultError("Unterminated string literal");
+ },
- '\r' => {
- if (quote != '`') {
- lexer.addDefaultError("Unterminated string literal");
- }
+ '\r' => {
+ if (quote != '`') {
+ lexer.addDefaultError("Unterminated string literal");
+ }
- // Template literals require newline normalization
- needs_slow_path = true;
- },
+ // Template literals require newline normalization
+ needs_slow_path = true;
+ },
- '\n' => {
- if (quote != '`') {
- lexer.addDefaultError("Unterminated string literal");
- }
- },
+ '\n' => {
+ if (quote != '`') {
+ lexer.addDefaultError("Unterminated string literal");
+ }
+ },
- '$' => {
- if (quote == '`') {
+ '$' => {
+ if (quote == '`') {
+ lexer.step();
+ if (lexer.code_point == '{') {
+ suffixLen = 2;
lexer.step();
- if (lexer.code_point == '{') {
- suffixLen = 2;
- lexer.step();
- if (lexer.rescan_close_brace_as_template_token) {
- lexer.token = T.t_template_middle;
- } else {
- lexer.token = T.t_template_head;
- }
- break :stringLiteral;
+ if (lexer.rescan_close_brace_as_template_token) {
+ lexer.token = T.t_template_middle;
+ } else {
+ lexer.token = T.t_template_head;
}
- continue :stringLiteral;
- }
- },
-
- else => {
- if (quote == lexer.code_point) {
- lexer.step();
break :stringLiteral;
}
- // Non-ASCII strings need the slow path
- if (lexer.code_point >= 0x80) {
- needs_slow_path = true;
- } else if (jsonOptions != null and lexer.code_point < 0x20) {
- lexer.syntaxError();
- }
- },
- }
- lexer.step();
- }
+ continue :stringLiteral;
+ }
+ },
- const text = lexer.source.contents[lexer.start + 1 .. lexer.end - suffixLen];
- if (needs_slow_path) {
- lexer.string_literal = lexer.stringToUTF16(text);
- } else {
- lexer.string_literal = lexer.allocator.alloc(u16, text.len) catch unreachable;
- var i: usize = 0;
- for (text) |byte| {
- lexer.string_literal[i] = byte;
- i += 1;
- }
+ else => {
+ if (quote == lexer.code_point) {
+ lexer.step();
+ break :stringLiteral;
+ }
+ // Non-ASCII strings need the slow path
+ if (lexer.code_point >= 0x80) {
+ needs_slow_path = true;
+ } else if (lexer.json_options != null and lexer.code_point < 0x20) {
+ lexer.syntaxError();
+ }
+ },
}
+ lexer.step();
+ }
- if (quote == '\'' and jsonOptions != null) {
- lexer.addRangeError(lexer.range(), "JSON strings must use double quotes", .{}, true);
+ const text = lexer.source.contents[lexer.start + 1 .. lexer.end - suffixLen];
+ if (needs_slow_path) {
+ lexer.string_literal = lexer.stringToUTF16(text);
+ } else {
+ lexer.string_literal = lexer.allocator.alloc(u16, text.len) catch unreachable;
+ var i: usize = 0;
+ for (text) |byte| {
+ lexer.string_literal[i] = byte;
+ i += 1;
}
- // for (text)
- // // if (needs_slow_path) {
- // // // Slow path
-
- // // // lexer.string_literal = lexer.(lexer.start + 1, text);
- // // } else {
- // // // Fast path
-
- // // }
}
- fn step(lexer: *@This()) void {
- lexer.code_point = lexer.nextCodepoint();
-
- // Track the approximate number of newlines in the file so we can preallocate
- // the line offset table in the printer for source maps. The line offset table
- // is the #1 highest allocation in the heap profile, so this is worth doing.
- // This count is approximate because it handles "\n" and "\r\n" (the common
- // cases) but not "\r" or "\u2028" or "\u2029". Getting this wrong is harmless
- // because it's only a preallocation. The array will just grow if it's too small.
- if (lexer.code_point == '\n') {
- lexer.approximate_newline_count += 1;
- }
+ if (quote == '\'' and lexer.json_options != null) {
+ lexer.addRangeError(lexer.range(), "JSON strings must use double quotes", .{}, true);
}
+ // for (text)
+ // // if (needs_slow_path) {
+ // // // Slow path
- pub fn expect(self: *@This(), comptime token: T) void {
- if (self.token != token) {
- self.expected(token);
- }
+ // // // lexer.string_literal = lexer.(lexer.start + 1, text);
+ // // } else {
+ // // // Fast path
- self.next();
- }
+ // // }
+ }
- pub fn expectOrInsertSemicolon(lexer: *@This()) void {
- if (lexer.token == T.t_semicolon or (!lexer.has_newline_before and
- lexer.token != T.t_close_brace and lexer.token != T.t_end_of_file))
- {
- lexer.expect(T.t_semicolon);
- }
+ fn step(lexer: *@This()) void {
+ lexer.code_point = lexer.nextCodepoint();
+
+ // Track the approximate number of newlines in the file so we can preallocate
+ // the line offset table in the printer for source maps. The line offset table
+ // is the #1 highest allocation in the heap profile, so this is worth doing.
+ // This count is approximate because it handles "\n" and "\r\n" (the common
+ // cases) but not "\r" or "\u2028" or "\u2029". Getting this wrong is harmless
+ // because it's only a preallocation. The array will just grow if it's too small.
+ if (lexer.code_point == '\n') {
+ lexer.approximate_newline_count += 1;
}
+ }
- pub fn addUnsupportedSyntaxError(self: *@This(), msg: []const u8) void {
- self.addError(self.end, "Unsupported syntax: {s}", .{msg}, true);
+ pub fn expect(self: *@This(), comptime token: T) void {
+ if (self.token != token) {
+ self.expected(token);
}
- pub fn scanIdentifierWithEscapes(self: *@This()) void {
- self.addUnsupportedSyntaxError("escape sequence");
- return;
+ self.next();
+ }
+
+ pub fn expectOrInsertSemicolon(lexer: *@This()) void {
+ if (lexer.token == T.t_semicolon or (!lexer.has_newline_before and
+ lexer.token != T.t_close_brace and lexer.token != T.t_end_of_file))
+ {
+ lexer.expect(T.t_semicolon);
}
+ }
+
+ pub fn addUnsupportedSyntaxError(self: *@This(), msg: []const u8) void {
+ self.addError(self.end, "Unsupported syntax: {s}", .{msg}, true);
+ }
+
+ pub fn scanIdentifierWithEscapes(self: *@This()) void {
+ self.addUnsupportedSyntaxError("escape sequence");
+ return;
+ }
- pub fn debugInfo(self: *@This()) void {
- if (self.log.errors > 0) {
- const stderr = std.io.getStdErr().writer();
- self.log.print(stderr) catch unreachable;
+ pub fn debugInfo(self: *@This()) void {
+ if (self.log.errors > 0) {
+ const stderr = std.io.getStdErr().writer();
+ self.log.print(stderr) catch unreachable;
+ } else {
+ if (self.token == T.t_identifier or self.token == T.t_string_literal) {
+ std.debug.print(" {s} ", .{self.raw()});
} else {
- if (self.token == T.t_identifier or self.token == T.t_string_literal) {
- std.debug.print(" {s} ", .{self.raw()});
- } else {
- std.debug.print(" <{s}> ", .{tokenToString.get(self.token)});
- }
+ std.debug.print(" <{s}> ", .{tokenToString.get(self.token)});
}
}
+ }
- pub fn expectContextualKeyword(self: *@This(), comptime keyword: string) void {
- if (!self.isContextualKeyword(keyword)) {
- self.addError(self.start, "\"{s}\"", .{keyword}, true);
- }
- self.next();
+ pub fn expectContextualKeyword(self: *@This(), comptime keyword: string) void {
+ if (!self.isContextualKeyword(keyword)) {
+ self.addError(self.start, "\"{s}\"", .{keyword}, true);
}
+ self.next();
+ }
- pub fn next(lexer: *@This()) void {
- lexer.has_newline_before = lexer.end == 0;
+ pub fn next(lexer: *@This()) void {
+ lexer.has_newline_before = lexer.end == 0;
- lex: while (true) {
- lexer.start = lexer.end;
- lexer.token = T.t_end_of_file;
+ lex: while (true) {
+ lexer.start = lexer.end;
+ lexer.token = T.t_end_of_file;
- switch (lexer.code_point) {
- -1 => {
- lexer.token = T.t_end_of_file;
- },
+ switch (lexer.code_point) {
+ -1 => {
+ lexer.token = T.t_end_of_file;
+ },
- '#' => {
- if (lexer.start == 0 and lexer.source.contents[1] == '!') {
- lexer.addUnsupportedSyntaxError("#!hashbang is not supported yet.");
- return;
- }
+ '#' => {
+ if (lexer.start == 0 and lexer.source.contents[1] == '!') {
+ lexer.addUnsupportedSyntaxError("#!hashbang is not supported yet.");
+ return;
+ }
- lexer.step();
- if (!isIdentifierStart(lexer.code_point)) {
- lexer.syntaxError();
- }
- lexer.step();
+ lexer.step();
+ if (!isIdentifierStart(lexer.code_point)) {
+ lexer.syntaxError();
+ }
+ lexer.step();
- if (isIdentifierStart(lexer.code_point)) {
- lexer.step();
- while (isIdentifierContinue(lexer.code_point)) {
- lexer.step();
- }
- if (lexer.code_point == '\\') {
- lexer.scanIdentifierWithEscapes();
- lexer.token = T.t_private_identifier;
- // lexer.Identifier, lexer.Token = lexer.scanIdentifierWithEscapes(normalIdentifier);
- } else {
- lexer.token = T.t_private_identifier;
- lexer.identifier = lexer.raw();
- }
- break;
- }
- },
- '\r', '\n', 0x2028, 0x2029 => {
- lexer.step();
- lexer.has_newline_before = true;
- continue;
- },
- '\t', ' ' => {
- lexer.step();
- continue;
- },
- '(' => {
+ if (isIdentifierStart(lexer.code_point)) {
lexer.step();
- lexer.token = T.t_open_paren;
- },
- ')' => {
- lexer.step();
- lexer.token = T.t_close_paren;
- },
- '[' => {
- lexer.step();
- lexer.token = T.t_open_bracket;
- },
- ']' => {
- lexer.step();
- lexer.token = T.t_close_bracket;
- },
- '{' => {
- lexer.step();
- lexer.token = T.t_open_brace;
- },
- '}' => {
- lexer.step();
- lexer.token = T.t_close_brace;
- },
- ',' => {
- lexer.step();
- lexer.token = T.t_comma;
- },
- ':' => {
- lexer.step();
- lexer.token = T.t_colon;
- },
- ';' => {
- lexer.step();
- lexer.token = T.t_semicolon;
- },
- '@' => {
- lexer.step();
- lexer.token = T.t_at;
- },
- '~' => {
- lexer.step();
- lexer.token = T.t_tilde;
- },
- '?' => {
- // '?' or '?.' or '??' or '??='
- lexer.step();
- switch (lexer.code_point) {
- '?' => {
- lexer.step();
- switch (lexer.code_point) {
- '=' => {
- lexer.step();
- lexer.token = T.t_question_question_equals;
- },
- else => {
- lexer.token = T.t_question_question;
- },
- }
- },
-
- '.' => {
- lexer.token = T.t_question;
- const current = lexer.current;
- const contents = lexer.source.contents;
-
- // Lookahead to disambiguate with 'a?.1:b'
- if (current < contents.len) {
- const c = contents[current];
- if (c < '0' or c > '9') {
- lexer.step();
- lexer.token = T.t_question_dot;
- }
- }
- },
- else => {
- lexer.token = T.t_question;
- },
+ while (isIdentifierContinue(lexer.code_point)) {
+ lexer.step();
}
- },
- '%' => {
- // '%' or '%='
- lexer.step();
- switch (lexer.code_point) {
- '=' => {
- lexer.step();
- lexer.token = T.t_percent_equals;
- },
-
- else => {
- lexer.token = T.t_percent;
- },
+ if (lexer.code_point == '\\') {
+ lexer.scanIdentifierWithEscapes();
+ lexer.token = T.t_private_identifier;
+ // lexer.Identifier, lexer.Token = lexer.scanIdentifierWithEscapes(normalIdentifier);
+ } else {
+ lexer.token = T.t_private_identifier;
+ lexer.identifier = lexer.raw();
}
- },
+ break;
+ }
+ },
+ '\r', '\n', 0x2028, 0x2029 => {
+ lexer.step();
+ lexer.has_newline_before = true;
+ continue;
+ },
+ '\t', ' ' => {
+ lexer.step();
+ continue;
+ },
+ '(' => {
+ lexer.step();
+ lexer.token = T.t_open_paren;
+ },
+ ')' => {
+ lexer.step();
+ lexer.token = T.t_close_paren;
+ },
+ '[' => {
+ lexer.step();
+ lexer.token = T.t_open_bracket;
+ },
+ ']' => {
+ lexer.step();
+ lexer.token = T.t_close_bracket;
+ },
+ '{' => {
+ lexer.step();
+ lexer.token = T.t_open_brace;
+ },
+ '}' => {
+ lexer.step();
+ lexer.token = T.t_close_brace;
+ },
+ ',' => {
+ lexer.step();
+ lexer.token = T.t_comma;
+ },
+ ':' => {
+ lexer.step();
+ lexer.token = T.t_colon;
+ },
+ ';' => {
+ lexer.step();
+ lexer.token = T.t_semicolon;
+ },
+ '@' => {
+ lexer.step();
+ lexer.token = T.t_at;
+ },
+ '~' => {
+ lexer.step();
+ lexer.token = T.t_tilde;
+ },
+ '?' => {
+ // '?' or '?.' or '??' or '??='
+ lexer.step();
+ switch (lexer.code_point) {
+ '?' => {
+ lexer.step();
+ switch (lexer.code_point) {
+ '=' => {
+ lexer.step();
+ lexer.token = T.t_question_question_equals;
+ },
+ else => {
+ lexer.token = T.t_question_question;
+ },
+ }
+ },
- '&' => {
- // '&' or '&=' or '&&' or '&&='
- lexer.step();
- switch (lexer.code_point) {
- '=' => {
- lexer.step();
- lexer.token = T.t_ampersand_equals;
- },
+ '.' => {
+ lexer.token = T.t_question;
+ const current = lexer.current;
+ const contents = lexer.source.contents;
- '&' => {
- lexer.step();
- switch (lexer.code_point) {
- '=' => {
- lexer.step();
- lexer.token = T.t_ampersand_ampersand_equals;
- },
-
- else => {
- lexer.token = T.t_ampersand_ampersand;
- },
+ // Lookahead to disambiguate with 'a?.1:b'
+ if (current < contents.len) {
+ const c = contents[current];
+ if (c < '0' or c > '9') {
+ lexer.step();
+ lexer.token = T.t_question_dot;
}
- },
- else => {
- lexer.token = T.t_ampersand;
- },
- }
- },
+ }
+ },
+ else => {
+ lexer.token = T.t_question;
+ },
+ }
+ },
+ '%' => {
+ // '%' or '%='
+ lexer.step();
+ switch (lexer.code_point) {
+ '=' => {
+ lexer.step();
+ lexer.token = T.t_percent_equals;
+ },
- '|' => {
+ else => {
+ lexer.token = T.t_percent;
+ },
+ }
+ },
- // '|' or '|=' or '||' or '||='
- lexer.step();
- switch (lexer.code_point) {
- '=' => {
- lexer.step();
- lexer.token = T.t_bar_equals;
- },
- '|' => {
- lexer.step();
- switch (lexer.code_point) {
- '=' => {
- lexer.step();
- lexer.token = T.t_bar_bar_equals;
- },
+ '&' => {
+ // '&' or '&=' or '&&' or '&&='
+ lexer.step();
+ switch (lexer.code_point) {
+ '=' => {
+ lexer.step();
+ lexer.token = T.t_ampersand_equals;
+ },
- else => {
- lexer.token = T.t_bar_bar;
- },
- }
- },
- else => {
- lexer.token = T.t_bar;
- },
- }
- },
+ '&' => {
+ lexer.step();
+ switch (lexer.code_point) {
+ '=' => {
+ lexer.step();
+ lexer.token = T.t_ampersand_ampersand_equals;
+ },
- '^' => {
- // '^' or '^='
- lexer.step();
- switch (lexer.code_point) {
- '=' => {
- lexer.step();
- lexer.token = T.t_caret_equals;
- },
+ else => {
+ lexer.token = T.t_ampersand_ampersand;
+ },
+ }
+ },
+ else => {
+ lexer.token = T.t_ampersand;
+ },
+ }
+ },
- else => {
- lexer.token = T.t_caret;
- },
- }
- },
+ '|' => {
- '+' => {
- // '+' or '+=' or '++'
- lexer.step();
- switch (lexer.code_point) {
- '=' => {
- lexer.step();
- lexer.token = T.t_plus_equals;
- },
+ // '|' or '|=' or '||' or '||='
+ lexer.step();
+ switch (lexer.code_point) {
+ '=' => {
+ lexer.step();
+ lexer.token = T.t_bar_equals;
+ },
+ '|' => {
+ lexer.step();
+ switch (lexer.code_point) {
+ '=' => {
+ lexer.step();
+ lexer.token = T.t_bar_bar_equals;
+ },
- '+' => {
- lexer.step();
- lexer.token = T.t_plus_plus;
- },
+ else => {
+ lexer.token = T.t_bar_bar;
+ },
+ }
+ },
+ else => {
+ lexer.token = T.t_bar;
+ },
+ }
+ },
- else => {
- lexer.token = T.t_plus;
- },
- }
- },
+ '^' => {
+ // '^' or '^='
+ lexer.step();
+ switch (lexer.code_point) {
+ '=' => {
+ lexer.step();
+ lexer.token = T.t_caret_equals;
+ },
- '-' => {
- // '+' or '+=' or '++'
- lexer.step();
- switch (lexer.code_point) {
- '=' => {
- lexer.step();
- lexer.token = T.t_minus_equals;
- },
+ else => {
+ lexer.token = T.t_caret;
+ },
+ }
+ },
- '-' => {
- lexer.step();
+ '+' => {
+ // '+' or '+=' or '++'
+ lexer.step();
+ switch (lexer.code_point) {
+ '=' => {
+ lexer.step();
+ lexer.token = T.t_plus_equals;
+ },
- if (lexer.code_point == '>' and lexer.has_newline_before) {
- lexer.step();
- lexer.log.addRangeWarning(lexer.source, lexer.range(), "Treating \"-->\" as the start of a legacy HTML single-line comment") catch unreachable;
-
- singleLineHTMLCloseComment: while (true) {
- switch (lexer.code_point) {
- '\r', '\n', 0x2028, 0x2029 => {
- break :singleLineHTMLCloseComment;
- },
- -1 => {
- break :singleLineHTMLCloseComment;
- },
- else => {},
- }
- lexer.step();
- }
- continue;
- }
+ '+' => {
+ lexer.step();
+ lexer.token = T.t_plus_plus;
+ },
- lexer.token = T.t_minus_minus;
- },
+ else => {
+ lexer.token = T.t_plus;
+ },
+ }
+ },
- else => {
- lexer.token = T.t_minus;
- },
- }
- },
+ '-' => {
+ // '+' or '+=' or '++'
+ lexer.step();
+ switch (lexer.code_point) {
+ '=' => {
+ lexer.step();
+ lexer.token = T.t_minus_equals;
+ },
- '*' => {
- // '*' or '*=' or '**' or '**='
+ '-' => {
+ lexer.step();
- lexer.step();
- switch (lexer.code_point) {
- '=' => {
- lexer.step();
- lexer.token = .t_asterisk_equals;
- },
- '*' => {
+ if (lexer.code_point == '>' and lexer.has_newline_before) {
lexer.step();
- switch (lexer.code_point) {
- '=' => {
- lexer.step();
- lexer.token = .t_asterisk_asterisk_equals;
- },
- else => {
- lexer.token = .t_asterisk_asterisk;
- },
- }
- },
- else => {
- lexer.token = .t_asterisk;
- },
- }
- },
- '/' => {
- // '/' or '/=' or '//' or '/* ... */'
- lexer.step();
- // TODO: forGlobalName
+ lexer.log.addRangeWarning(lexer.source, lexer.range(), "Treating \"-->\" as the start of a legacy HTML single-line comment") catch unreachable;
- switch (lexer.code_point) {
- '=' => {
- lexer.step();
- lexer.token = .t_slash_equals;
- },
- '/' => {
- lexer.step();
- singleLineComment: while (true) {
- lexer.step();
+ singleLineHTMLCloseComment: while (true) {
switch (lexer.code_point) {
'\r', '\n', 0x2028, 0x2029 => {
- break :singleLineComment;
+ break :singleLineHTMLCloseComment;
},
-1 => {
- break :singleLineComment;
+ break :singleLineHTMLCloseComment;
},
else => {},
}
+ lexer.step();
}
-
- if (jsonOptions) |json| {
- if (!json.allow_comments) {
- lexer.addRangeError(lexer.range(), "JSON does not support comments", .{}, true);
- return;
- }
- }
- lexer.scanCommentText();
- continue;
- },
- '*' => {
- lexer.step();
-
- multiLineComment: while (true) {
- switch (lexer.code_point) {
- '*' => {
- lexer.step();
- if (lexer.code_point == '/') {
- lexer.step();
- break :multiLineComment;
- }
- },
- '\r', '\n', 0x2028, 0x2029 => {
- lexer.step();
- lexer.has_newline_before = true;
- },
- -1 => {
- lexer.start = lexer.end;
- lexer.addError(lexer.start, "Expected \"*/\" to terminate multi-line comment", .{}, true);
- },
- else => {
- lexer.step();
- },
- }
- }
- if (jsonOptions) |json| {
- if (!json.allow_comments) {
- lexer.addRangeError(lexer.range(), "JSON does not support comments", .{}, true);
- return;
- }
- }
- lexer.scanCommentText();
continue;
- },
- else => {
- lexer.token = .t_slash;
- },
- }
- },
-
- '=' => {
- // '=' or '=>' or '==' or '==='
- lexer.step();
- switch (lexer.code_point) {
- '>' => {
- lexer.step();
- lexer.token = T.t_equals_greater_than;
- },
+ }
- '=' => {
- lexer.step();
- switch (lexer.code_point) {
- '=' => {
- lexer.step();
- lexer.token = T.t_equals_equals_equals;
- },
+ lexer.token = T.t_minus_minus;
+ },
- else => {
- lexer.token = T.t_equals_equals;
- },
- }
- },
+ else => {
+ lexer.token = T.t_minus;
+ },
+ }
+ },
- else => {
- lexer.token = T.t_equals;
- },
- }
- },
+ '*' => {
+ // '*' or '*=' or '**' or '**='
- '<' => {
- // '<' or '<<' or '<=' or '<<=' or '<!--'
- lexer.step();
- switch (lexer.code_point) {
- '=' => {
- lexer.step();
- lexer.token = T.t_less_than_equals;
- },
+ lexer.step();
+ switch (lexer.code_point) {
+ '=' => {
+ lexer.step();
+ lexer.token = .t_asterisk_equals;
+ },
+ '*' => {
+ lexer.step();
+ switch (lexer.code_point) {
+ '=' => {
+ lexer.step();
+ lexer.token = .t_asterisk_asterisk_equals;
+ },
+ else => {
+ lexer.token = .t_asterisk_asterisk;
+ },
+ }
+ },
+ else => {
+ lexer.token = .t_asterisk;
+ },
+ }
+ },
+ '/' => {
+ // '/' or '/=' or '//' or '/* ... */'
+ lexer.step();
- '<' => {
+ if (lexer.for_global_name) {
+ lexer.token = .t_slash;
+ break;
+ }
+ switch (lexer.code_point) {
+ '=' => {
+ lexer.step();
+ lexer.token = .t_slash_equals;
+ },
+ '/' => {
+ lexer.step();
+ singleLineComment: while (true) {
lexer.step();
switch (lexer.code_point) {
- '=' => {
- lexer.step();
- lexer.token = T.t_less_than_less_than_equals;
+ '\r', '\n', 0x2028, 0x2029 => {
+ break :singleLineComment;
},
-
- else => {
- lexer.token = T.t_less_than_less_than;
+ -1 => {
+ break :singleLineComment;
},
+ else => {},
}
- },
- // Handle legacy HTML-style comments
- '!' => {
- if (std.mem.eql(u8, lexer.peek("--".len), "--")) {
- lexer.addUnsupportedSyntaxError("Legacy HTML comments not implemented yet!");
+ }
+
+ if (lexer.json_options) |json| {
+ if (!json.allow_comments) {
+ lexer.addRangeError(lexer.range(), "JSON does not support comments", .{}, true);
return;
}
+ }
+ lexer.scanCommentText();
+ continue;
+ },
+ '*' => {
+ lexer.step();
- lexer.token = T.t_less_than;
- },
-
- else => {
- lexer.token = T.t_less_than;
- },
- }
- },
-
- '>' => {
- // '>' or '>>' or '>>>' or '>=' or '>>=' or '>>>='
- lexer.step();
-
- switch (lexer.code_point) {
- '=' => {
- lexer.step();
- lexer.token = T.t_greater_than_equals;
- },
- '>' => {
- lexer.step();
+ multiLineComment: while (true) {
switch (lexer.code_point) {
- '=' => {
- lexer.step();
- lexer.token = T.t_greater_than_greater_than_equals;
- },
- '>' => {
+ '*' => {
lexer.step();
- switch (lexer.code_point) {
- '=' => {
- lexer.step();
- lexer.token = T.t_greater_than_greater_than_greater_than_equals;
- },
- else => {
- lexer.token = T.t_greater_than_greater_than_greater_than;
- },
+ if (lexer.code_point == '/') {
+ lexer.step();
+ break :multiLineComment;
}
},
- else => {
- lexer.token = T.t_greater_than_greater_than;
- },
- }
- },
- else => {
- lexer.token = T.t_greater_than;
- },
- }
- },
-
- '!' => {
- // '!' or '!=' or '!=='
- lexer.step();
- switch (lexer.code_point) {
- '=' => {
- lexer.step();
- switch (lexer.code_point) {
- '=' => {
+ '\r', '\n', 0x2028, 0x2029 => {
lexer.step();
- lexer.token = T.t_exclamation_equals_equals;
+ lexer.has_newline_before = true;
+ },
+ -1 => {
+ lexer.start = lexer.end;
+ lexer.addError(lexer.start, "Expected \"*/\" to terminate multi-line comment", .{}, true);
},
-
else => {
- lexer.token = T.t_exclamation_equals;
+ lexer.step();
},
}
- },
- else => {
- lexer.token = T.t_exclamation;
- },
- }
- },
+ }
+ if (lexer.json_options) |json| {
+ if (!json.allow_comments) {
+ lexer.addRangeError(lexer.range(), "JSON does not support comments", .{}, true);
+ return;
+ }
+ }
+ lexer.scanCommentText();
+ continue;
+ },
+ else => {
+ lexer.token = .t_slash;
+ },
+ }
+ },
- '\'', '"', '`' => {
- lexer.parseStringLiteral();
- },
+ '=' => {
+ // '=' or '=>' or '==' or '==='
+ lexer.step();
+ switch (lexer.code_point) {
+ '>' => {
+ lexer.step();
+ lexer.token = T.t_equals_greater_than;
+ },
- '_', '$', 'a'...'z', 'A'...'Z' => {
- lexer.step();
- while (isIdentifierContinue(lexer.code_point)) {
+ '=' => {
lexer.step();
- }
+ switch (lexer.code_point) {
+ '=' => {
+ lexer.step();
+ lexer.token = T.t_equals_equals_equals;
+ },
- if (lexer.code_point == '\\') {
- lexer.scanIdentifierWithEscapes();
- } else {
- const contents = lexer.raw();
- lexer.identifier = contents;
- lexer.token = Keywords.get(contents) orelse T.t_identifier;
- }
- },
+ else => {
+ lexer.token = T.t_equals_equals;
+ },
+ }
+ },
- '\\' => {
- // TODO: normal
- lexer.scanIdentifierWithEscapes();
- },
+ else => {
+ lexer.token = T.t_equals;
+ },
+ }
+ },
- '.', '0'...'9' => {
- lexer.parseNumericLiteralOrDot();
- },
+ '<' => {
+ // '<' or '<<' or '<=' or '<<=' or '<!--'
+ lexer.step();
+ switch (lexer.code_point) {
+ '=' => {
+ lexer.step();
+ lexer.token = T.t_less_than_equals;
+ },
- else => {
- // Check for unusual whitespace characters
- if (isWhitespace(lexer.code_point)) {
+ '<' => {
lexer.step();
- continue;
- }
+ switch (lexer.code_point) {
+ '=' => {
+ lexer.step();
+ lexer.token = T.t_less_than_less_than_equals;
+ },
+
+ else => {
+ lexer.token = T.t_less_than_less_than;
+ },
+ }
+ },
+ // Handle legacy HTML-style comments
+ '!' => {
+ if (std.mem.eql(u8, lexer.peek("--".len), "--")) {
+ lexer.addUnsupportedSyntaxError("Legacy HTML comments not implemented yet!");
+ return;
+ }
- if (isIdentifierStart(lexer.code_point)) {
+ lexer.token = T.t_less_than;
+ },
+
+ else => {
+ lexer.token = T.t_less_than;
+ },
+ }
+ },
+
+ '>' => {
+ // '>' or '>>' or '>>>' or '>=' or '>>=' or '>>>='
+ lexer.step();
+
+ switch (lexer.code_point) {
+ '=' => {
lexer.step();
- while (isIdentifierContinue(lexer.code_point)) {
- lexer.step();
+ lexer.token = T.t_greater_than_equals;
+ },
+ '>' => {
+ lexer.step();
+ switch (lexer.code_point) {
+ '=' => {
+ lexer.step();
+ lexer.token = T.t_greater_than_greater_than_equals;
+ },
+ '>' => {
+ lexer.step();
+ switch (lexer.code_point) {
+ '=' => {
+ lexer.step();
+ lexer.token = T.t_greater_than_greater_than_greater_than_equals;
+ },
+ else => {
+ lexer.token = T.t_greater_than_greater_than_greater_than;
+ },
+ }
+ },
+ else => {
+ lexer.token = T.t_greater_than_greater_than;
+ },
}
- if (lexer.code_point == '\\') {
+ },
+ else => {
+ lexer.token = T.t_greater_than;
+ },
+ }
+ },
- // lexer.Identifier, lexer.Token = lexer.scanIdentifierWithEscapes(normalIdentifier);
- } else {
- lexer.token = T.t_identifier;
- lexer.identifier = lexer.raw();
+ '!' => {
+ // '!' or '!=' or '!=='
+ lexer.step();
+ switch (lexer.code_point) {
+ '=' => {
+ lexer.step();
+ switch (lexer.code_point) {
+ '=' => {
+ lexer.step();
+ lexer.token = T.t_exclamation_equals_equals;
+ },
+
+ else => {
+ lexer.token = T.t_exclamation_equals;
+ },
}
- break;
- }
+ },
+ else => {
+ lexer.token = T.t_exclamation;
+ },
+ }
+ },
- lexer.end = lexer.current;
- lexer.token = T.t_syntax_error;
- },
- }
+ '\'', '"', '`' => {
+ lexer.parseStringLiteral();
+ },
- return;
- }
- }
+ '_', '$', 'a'...'z', 'A'...'Z' => {
+ lexer.step();
+ while (isIdentifierContinue(lexer.code_point)) {
+ lexer.step();
+ }
- pub fn expected(self: *@This(), token: T) void {
- if (tokenToString.get(token).len > 0) {
- self.expectedString(tokenToString.get(token));
- } else {
- self.unexpected();
- }
- }
+ if (lexer.code_point == '\\') {
+ lexer.scanIdentifierWithEscapes();
+ } else {
+ const contents = lexer.raw();
+ lexer.identifier = contents;
+ lexer.token = Keywords.get(contents) orelse T.t_identifier;
+ }
+ },
- pub fn unexpected(lexer: *@This()) void {
- var found: string = undefined;
- if (lexer.start == lexer.source.contents.len) {
- found = "end of file";
- } else {
- found = lexer.raw();
+ '\\' => {
+ // TODO: normal
+ lexer.scanIdentifierWithEscapes();
+ },
+
+ '.', '0'...'9' => {
+ lexer.parseNumericLiteralOrDot();
+ },
+
+ else => {
+ // Check for unusual whitespace characters
+ if (isWhitespace(lexer.code_point)) {
+ lexer.step();
+ continue;
+ }
+
+ if (isIdentifierStart(lexer.code_point)) {
+ lexer.step();
+ while (isIdentifierContinue(lexer.code_point)) {
+ lexer.step();
+ }
+ if (lexer.code_point == '\\') {
+
+ // lexer.Identifier, lexer.Token = lexer.scanIdentifierWithEscapes(normalIdentifier);
+ } else {
+ lexer.token = T.t_identifier;
+ lexer.identifier = lexer.raw();
+ }
+ break;
+ }
+
+ lexer.end = lexer.current;
+ lexer.token = T.t_syntax_error;
+ },
}
- lexer.addRangeError(lexer.range(), "Unexpected {s}", .{found}, true);
+ return;
}
+ }
- pub fn raw(self: *@This()) []const u8 {
- return self.source.contents[self.start..self.end];
+ pub fn expected(self: *@This(), token: T) void {
+ if (tokenToString.get(token).len > 0) {
+ self.expectedString(tokenToString.get(token));
+ } else {
+ self.unexpected();
}
+ }
- pub fn isContextualKeyword(self: *@This(), comptime keyword: string) bool {
- return self.token == .t_identifier and strings.eql(self.raw(), keyword);
+ pub fn unexpected(lexer: *@This()) void {
+ var found: string = undefined;
+ if (lexer.start == lexer.source.contents.len) {
+ found = "end of file";
+ } else {
+ found = lexer.raw();
}
- pub fn expectedString(self: *@This(), text: string) void {
- var found = text;
- if (self.source.contents.len == self.start) {
- found = "end of file";
- }
- self.addRangeError(self.range(), "Expected {s} but found {s}", .{ text, found }, true);
+ lexer.addRangeError(lexer.range(), "Unexpected {s}", .{found}, true);
+ }
+
+ pub fn raw(self: *@This()) []const u8 {
+ return self.source.contents[self.start..self.end];
+ }
+
+ pub fn isContextualKeyword(self: *@This(), comptime keyword: string) bool {
+ return self.token == .t_identifier and strings.eql(self.raw(), keyword);
+ }
+
+ pub fn expectedString(self: *@This(), text: string) void {
+ var found = self.raw();
+ if (self.source.contents.len == self.start) {
+ found = "end of file";
}
+ self.addRangeError(self.range(), "Expected {s} but found {s}", .{ text, found }, true);
+ }
+
+ pub fn scanCommentText(lexer: *@This()) void {
+ var text = lexer.source.contents[lexer.start..lexer.end];
+ const has_preserve_annotation = text.len > 2 and text[2] == '!';
+ const is_multiline_comment = text[1] == '*';
- pub fn scanCommentText(lexer: *@This()) void {
- var text = lexer.source.contents[lexer.start..lexer.end];
- const has_preserve_annotation = text.len > 2 and text[2] == '!';
- const is_multiline_comment = text[1] == '*';
+ // Omit the trailing "*/" from the checks below
+ var endCommentText = text.len;
+ if (is_multiline_comment) {
+ endCommentText -= 2;
+ }
- // Omit the trailing "*/" from the checks below
- var endCommentText = text.len;
+ if (has_preserve_annotation or lexer.preserve_all_comments_before) {
if (is_multiline_comment) {
- endCommentText -= 2;
+ // text = lexer.removeMultilineCommentIndent(lexer.source.contents[0..lexer.start], text);
}
- if (has_preserve_annotation or lexer.preserve_all_comments_before) {
- if (is_multiline_comment) {
- // text = lexer.removeMultilineCommentIndent(lexer.source.contents[0..lexer.start], text);
- }
-
- lexer.comments_to_preserve_before.append(js_ast.G.Comment{
- .text = text,
- .loc = lexer.loc(),
- }) catch unreachable;
- }
+ lexer.comments_to_preserve_before.append(js_ast.G.Comment{
+ .text = text,
+ .loc = lexer.loc(),
+ }) catch unreachable;
}
+ }
- // TODO: implement this
- // it's too complicated to handle all the edgecases right now given the state of Zig's standard library
- pub fn removeMultilineCommentIndent(lexer: *@This(), _prefix: string, text: string) string {
- return text;
- }
+ // TODO: implement this
+ // it's too complicated to handle all the edgecases right now given the state of Zig's standard library
+ pub fn removeMultilineCommentIndent(lexer: *@This(), _prefix: string, text: string) string {
+ return text;
+ }
- pub fn range(self: *@This()) logger.Range {
- return logger.Range{
- .loc = logger.usize2Loc(self.start),
- .len = std.math.lossyCast(i32, self.end - self.start),
- };
- }
+ pub fn range(self: *@This()) logger.Range {
+ return logger.Range{
+ .loc = logger.usize2Loc(self.start),
+ .len = std.math.lossyCast(i32, self.end - self.start),
+ };
+ }
- pub fn init(log: *logger.Log, source: *logger.Source, allocator: *std.mem.Allocator) !@This() {
- var empty_string_literal: JavascriptString = undefined;
- var lex = @This(){
- .log = log,
- .source = source.*,
- .string_literal = empty_string_literal,
- .prev_error_loc = logger.Loc.Empty,
- .allocator = allocator,
- .comments_to_preserve_before = std.ArrayList(js_ast.G.Comment).init(allocator),
- };
- lex.step();
- lex.next();
-
- return lex;
- }
+ pub fn initGlobalName(log: *logger.Log, source: *logger.Source, allocator: *std.mem.Allocator) !@This() {
+ var empty_string_literal: JavascriptString = undefined;
+ var lex = @This(){
+ .log = log,
+ .source = source.*,
+ .string_literal = empty_string_literal,
+ .prev_error_loc = logger.Loc.Empty,
+ .allocator = allocator,
+ .comments_to_preserve_before = std.ArrayList(js_ast.G.Comment).init(allocator),
+ .for_global_name = true,
+ };
+ lex.step();
+ lex.next();
- pub fn scanRegExp(lexer: *@This()) void {
- while (true) {
- switch (lexer.code_point) {
- '/' => {
- lexer.step();
- while (isIdentifierContinue(lexer.code_point)) {
- switch (lexer.code_point) {
- 'g', 'i', 'm', 's', 'u', 'y' => {
- lexer.step();
- },
- else => {
- lexer.syntaxError();
- },
- }
- }
- },
- '[' => {
- lexer.step();
- while (lexer.code_point != ']') {
- lexer.scanRegExpValidateAndStep();
- }
- lexer.step();
- },
- else => {
- lexer.scanRegExpValidateAndStep();
- },
- }
- }
- }
+ return lex;
+ }
- // TODO: use wtf-8 encoding.
- pub fn stringToUTF16(lexer: *@This(), str: string) JavascriptString {
- var buf: JavascriptString = lexer.allocator.alloc(u16, std.mem.len(str)) catch unreachable;
- var i: usize = 0;
- // theres prob a faster/better way
- for (str) |char| {
- buf[i] = char;
- i += 1;
- }
+ pub fn initTSConfig(log: *logger.Log, source: *logger.Source, allocator: *std.mem.Allocator) !@This() {
+ var empty_string_literal: JavascriptString = undefined;
+ var lex = @This(){
+ .log = log,
+ .source = source.*,
+ .string_literal = empty_string_literal,
+ .prev_error_loc = logger.Loc.Empty,
+ .allocator = allocator,
+ .comments_to_preserve_before = std.ArrayList(js_ast.G.Comment).init(allocator),
+ .json_options = JSONOptions{
+ .allow_comments = true,
+ .allow_trailing_commas = true,
+ },
+ };
+ lex.step();
+ lex.next();
- return buf;
- }
+ return lex;
+ }
- // TODO: use wtf-8 encoding.
- pub fn utf16ToStringWithValidation(lexer: *@This(), js: JavascriptString) !string {
- return std.unicode.utf16leToUtf8Alloc(lexer.allocator, js);
- }
+ pub fn initJSON(log: *logger.Log, source: *logger.Source, allocator: *std.mem.Allocator) !@This() {
+ var empty_string_literal: JavascriptString = undefined;
+ var lex = @This(){
+ .log = log,
+ .source = source.*,
+ .string_literal = empty_string_literal,
+ .prev_error_loc = logger.Loc.Empty,
+ .allocator = allocator,
+ .comments_to_preserve_before = std.ArrayList(js_ast.G.Comment).init(allocator),
+ .json_options = JSONOptions{
+ .allow_comments = false,
+ .allow_trailing_commas = false,
+ },
+ };
+ lex.step();
+ lex.next();
- // TODO: use wtf-8 encoding.
- pub fn utf16ToString(lexer: *@This(), js: JavascriptString) string {
- return std.unicode.utf16leToUtf8Alloc(lexer.allocator, js) catch unreachable;
- }
+ return lex;
+ }
- pub fn nextInsideJSXElement() void {
- std.debug.panic("JSX not implemented yet.", .{});
- }
+ pub fn init(log: *logger.Log, source: *logger.Source, allocator: *std.mem.Allocator) !@This() {
+ var empty_string_literal: JavascriptString = undefined;
+ var lex = @This(){
+ .log = log,
+ .source = source.*,
+ .string_literal = empty_string_literal,
+ .prev_error_loc = logger.Loc.Empty,
+ .allocator = allocator,
+ .comments_to_preserve_before = std.ArrayList(js_ast.G.Comment).init(allocator),
+ };
+ lex.step();
+ lex.next();
- fn scanRegExpValidateAndStep(lexer: *@This()) void {
- if (lexer.code_point == '\\') {
- lexer.step();
- }
+ return lex;
+ }
+ pub fn scanRegExp(lexer: *@This()) void {
+ while (true) {
switch (lexer.code_point) {
- '\r', '\n', 0x2028, 0x2029 => {
- // Newlines aren't allowed in regular expressions
- lexer.syntaxError();
+ '/' => {
+ lexer.step();
+ while (isIdentifierContinue(lexer.code_point)) {
+ switch (lexer.code_point) {
+ 'g', 'i', 'm', 's', 'u', 'y' => {
+ lexer.step();
+ },
+ else => {
+ lexer.syntaxError();
+ },
+ }
+ }
},
- -1 => { // EOF
- lexer.syntaxError();
+ '[' => {
+ lexer.step();
+ while (lexer.code_point != ']') {
+ lexer.scanRegExpValidateAndStep();
+ }
+ lexer.step();
},
else => {
- lexer.step();
+ lexer.scanRegExpValidateAndStep();
},
}
}
+ }
- pub fn rescanCloseBraceAsTemplateToken(lexer: *@This()) void {
- if (lexer.token != .t_close_brace) {
- lexer.expected(.t_close_brace);
- }
+ // TODO: use wtf-8 encoding.
+ pub fn stringToUTF16(lexer: *@This(), str: string) JavascriptString {
+ var buf: JavascriptString = lexer.allocator.alloc(u16, std.mem.len(str)) catch unreachable;
+ var i: usize = 0;
+ // theres prob a faster/better way
+ for (str) |char| {
+ buf[i] = char;
+ i += 1;
+ }
- lexer.rescan_close_brace_as_template_token = true;
- lexer.code_point = '`';
- lexer.current = lexer.end;
- lexer.end -= 1;
- lexer.next();
- lexer.rescan_close_brace_as_template_token = false;
+ return buf;
+ }
+
+ // TODO: use wtf-8 encoding.
+ pub fn utf16ToStringWithValidation(lexer: *@This(), js: JavascriptString) !string {
+ return std.unicode.utf16leToUtf8Alloc(lexer.allocator, js);
+ }
+
+ // TODO: use wtf-8 encoding.
+ pub fn utf16ToString(lexer: *@This(), js: JavascriptString) string {
+ return std.unicode.utf16leToUtf8Alloc(lexer.allocator, js) catch unreachable;
+ }
+
+ pub fn nextInsideJSXElement() void {
+ std.debug.panic("JSX not implemented yet.", .{});
+ }
+
+ fn scanRegExpValidateAndStep(lexer: *@This()) void {
+ if (lexer.code_point == '\\') {
+ lexer.step();
}
- pub fn rawTemplateContents(lexer: *@This()) string {
- var text: string = undefined;
+ switch (lexer.code_point) {
+ '\r', '\n', 0x2028, 0x2029 => {
+ // Newlines aren't allowed in regular expressions
+ lexer.syntaxError();
+ },
+ -1 => { // EOF
+ lexer.syntaxError();
+ },
+ else => {
+ lexer.step();
+ },
+ }
+ }
- switch (lexer.token) {
- .t_no_substitution_template_literal, .t_template_tail => {
- text = lexer.source.contents[lexer.start + 1 .. lexer.end - 1];
- },
- .t_template_middle, .t_template_head => {
- text = lexer.source.contents[lexer.start + 1 .. lexer.end - 2];
- },
- else => {},
- }
+ pub fn rescanCloseBraceAsTemplateToken(lexer: *@This()) void {
+ if (lexer.token != .t_close_brace) {
+ lexer.expected(.t_close_brace);
+ }
- if (strings.indexOfChar(text, '\r') == null) {
- return text;
- }
+ lexer.rescan_close_brace_as_template_token = true;
+ lexer.code_point = '`';
+ lexer.current = lexer.end;
+ lexer.end -= 1;
+ lexer.next();
+ lexer.rescan_close_brace_as_template_token = false;
+ }
- // From the specification:
- //
- // 11.8.6.1 Static Semantics: TV and TRV
- //
- // TV excludes the code units of LineContinuation while TRV includes
- // them. <CR><LF> and <CR> LineTerminatorSequences are normalized to
- // <LF> for both TV and TRV. An explicit EscapeSequence is needed to
- // include a <CR> or <CR><LF> sequence.
- var bytes = MutableString.initCopy(lexer.allocator, text) catch unreachable;
- var end: usize = 0;
- var i: usize = 0;
- var c: u8 = '0';
- while (i < bytes.list.items.len) {
- c = bytes.list.items[i];
- i += 1;
+ pub fn rawTemplateContents(lexer: *@This()) string {
+ var text: string = undefined;
+
+ switch (lexer.token) {
+ .t_no_substitution_template_literal, .t_template_tail => {
+ text = lexer.source.contents[lexer.start + 1 .. lexer.end - 1];
+ },
+ .t_template_middle, .t_template_head => {
+ text = lexer.source.contents[lexer.start + 1 .. lexer.end - 2];
+ },
+ else => {},
+ }
- if (c == '\r') {
- // Convert '\r\n' into '\n'
- if (i < bytes.list.items.len and bytes.list.items[i] == '\n') {
- i += 1;
- }
+ if (strings.indexOfChar(text, '\r') == null) {
+ return text;
+ }
- // Convert '\r' into '\n'
- c = '\n';
+ // From the specification:
+ //
+ // 11.8.6.1 Static Semantics: TV and TRV
+ //
+ // TV excludes the code units of LineContinuation while TRV includes
+ // them. <CR><LF> and <CR> LineTerminatorSequences are normalized to
+ // <LF> for both TV and TRV. An explicit EscapeSequence is needed to
+ // include a <CR> or <CR><LF> sequence.
+ var bytes = MutableString.initCopy(lexer.allocator, text) catch unreachable;
+ var end: usize = 0;
+ var i: usize = 0;
+ var c: u8 = '0';
+ while (i < bytes.list.items.len) {
+ c = bytes.list.items[i];
+ i += 1;
+
+ if (c == '\r') {
+ // Convert '\r\n' into '\n'
+ if (i < bytes.list.items.len and bytes.list.items[i] == '\n') {
+ i += 1;
}
- bytes.list.items[end] = c;
- end += 1;
+ // Convert '\r' into '\n'
+ c = '\n';
}
- return bytes.toOwnedSliceLength(end + 1);
+ bytes.list.items[end] = c;
+ end += 1;
}
- fn parseNumericLiteralOrDot(lexer: *@This()) void {
- // Number or dot;
- var first = lexer.code_point;
- lexer.step();
-
- // Dot without a digit after it;
- if (first == '.' and (lexer.code_point < '0' or lexer.code_point > '9')) {
- // "..."
- if ((lexer.code_point == '.' and
- lexer.current < lexer.source.contents.len) and
- lexer.source.contents[lexer.current] == '.')
- {
- lexer.step();
- lexer.step();
- lexer.token = T.t_dot_dot_dot;
- return;
- }
+ return bytes.toOwnedSliceLength(end + 1);
+ }
- // "."
- lexer.token = T.t_dot;
+ fn parseNumericLiteralOrDot(lexer: *@This()) void {
+ // Number or dot;
+ var first = lexer.code_point;
+ lexer.step();
+
+ // Dot without a digit after it;
+ if (first == '.' and (lexer.code_point < '0' or lexer.code_point > '9')) {
+ // "..."
+ if ((lexer.code_point == '.' and
+ lexer.current < lexer.source.contents.len) and
+ lexer.source.contents[lexer.current] == '.')
+ {
+ lexer.step();
+ lexer.step();
+ lexer.token = T.t_dot_dot_dot;
return;
}
- var underscoreCount: usize = 0;
- var lastUnderscoreEnd: usize = 0;
- var hasDotOrExponent = first == '.';
- var base: f32 = 0.0;
- lexer.is_legacy_octal_literal = false;
+ // "."
+ lexer.token = T.t_dot;
+ return;
+ }
+
+ var underscoreCount: usize = 0;
+ var lastUnderscoreEnd: usize = 0;
+ var hasDotOrExponent = first == '.';
+ var base: f32 = 0.0;
+ lexer.is_legacy_octal_literal = false;
+
+ // Assume this is a number, but potentially change to a bigint later;
+ lexer.token = T.t_numeric_literal;
+
+ // Check for binary, octal, or hexadecimal literal;
+ if (first == '0') {
+ switch (lexer.code_point) {
+ 'b', 'B' => {
+ base = 2;
+ },
+
+ 'o', 'O' => {
+ base = 8;
+ },
+
+ 'x', 'X' => {
+ base = 16;
+ },
+
+ '0'...'7', '_' => {
+ base = 8;
+ lexer.is_legacy_octal_literal = true;
+ },
+ else => {},
+ }
+ }
- // Assume this is a number, but potentially change to a bigint later;
- lexer.token = T.t_numeric_literal;
+ if (base != 0) {
+ // Integer literal;
+ var isFirst = true;
+ var isInvalidLegacyOctalLiteral = false;
+ lexer.number = 0;
+ if (!lexer.is_legacy_octal_literal) {
+ lexer.step();
+ }
- // Check for binary, octal, or hexadecimal literal;
- if (first == '0') {
+ integerLiteral: while (true) {
switch (lexer.code_point) {
- 'b', 'B' => {
- base = 2;
+ '_' => {
+ // Cannot have multiple underscores in a row;
+ if (lastUnderscoreEnd > 0 and lexer.end == lastUnderscoreEnd + 1) {
+ lexer.syntaxError();
+ }
+
+ // The first digit must exist;
+ if (isFirst or lexer.is_legacy_octal_literal) {
+ lexer.syntaxError();
+ }
+
+ lastUnderscoreEnd = lexer.end;
+ underscoreCount += 1;
},
- 'o', 'O' => {
- base = 8;
+ '0', '1' => {
+ lexer.number = lexer.number * base + float64(lexer.code_point - '0');
},
- 'x', 'X' => {
- base = 16;
+ '2', '3', '4', '5', '6', '7' => {
+ if (base == 2) {
+ lexer.syntaxError();
+ }
+ lexer.number = lexer.number * base + float64(lexer.code_point - '0');
+ },
+ '8', '9' => {
+ if (lexer.is_legacy_octal_literal) {
+ isInvalidLegacyOctalLiteral = true;
+ } else if (base < 10) {
+ lexer.syntaxError();
+ }
+ lexer.number = lexer.number * base + float64(lexer.code_point - '0');
+ },
+ 'A', 'B', 'C', 'D', 'E', 'F' => {
+ if (base != 16) {
+ lexer.syntaxError();
+ }
+ lexer.number = lexer.number * base + float64(lexer.code_point + 10 - 'A');
},
- '0'...'7', '_' => {
- base = 8;
- lexer.is_legacy_octal_literal = true;
+ 'a', 'b', 'c', 'd', 'e', 'f' => {
+ if (base != 16) {
+ lexer.syntaxError();
+ }
+ lexer.number = lexer.number * base + float64(lexer.code_point + 10 - 'a');
},
- else => {},
- }
- }
+ else => {
+ // The first digit must exist;
+ if (isFirst) {
+ lexer.syntaxError();
+ }
- if (base != 0) {
- // Integer literal;
- var isFirst = true;
- var isInvalidLegacyOctalLiteral = false;
- lexer.number = 0;
- if (!lexer.is_legacy_octal_literal) {
- lexer.step();
+ break :integerLiteral;
+ },
}
- integerLiteral: while (true) {
- switch (lexer.code_point) {
- '_' => {
- // Cannot have multiple underscores in a row;
- if (lastUnderscoreEnd > 0 and lexer.end == lastUnderscoreEnd + 1) {
- lexer.syntaxError();
- }
-
- // The first digit must exist;
- if (isFirst or lexer.is_legacy_octal_literal) {
- lexer.syntaxError();
- }
-
- lastUnderscoreEnd = lexer.end;
- underscoreCount += 1;
- },
+ lexer.step();
+ isFirst = false;
+ }
- '0', '1' => {
- lexer.number = lexer.number * base + float64(lexer.code_point - '0');
- },
+ var isBigIntegerLiteral = lexer.code_point == 'n' and !hasDotOrExponent;
- '2', '3', '4', '5', '6', '7' => {
- if (base == 2) {
- lexer.syntaxError();
- }
- lexer.number = lexer.number * base + float64(lexer.code_point - '0');
- },
- '8', '9' => {
- if (lexer.is_legacy_octal_literal) {
- isInvalidLegacyOctalLiteral = true;
- } else if (base < 10) {
- lexer.syntaxError();
- }
- lexer.number = lexer.number * base + float64(lexer.code_point - '0');
- },
- 'A', 'B', 'C', 'D', 'E', 'F' => {
- if (base != 16) {
- lexer.syntaxError();
- }
- lexer.number = lexer.number * base + float64(lexer.code_point + 10 - 'A');
- },
+ // Slow path: do we need to re-scan the input as text?
+ if (isBigIntegerLiteral or isInvalidLegacyOctalLiteral) {
+ var text = lexer.raw();
- 'a', 'b', 'c', 'd', 'e', 'f' => {
- if (base != 16) {
- lexer.syntaxError();
- }
- lexer.number = lexer.number * base + float64(lexer.code_point + 10 - 'a');
- },
- else => {
- // The first digit must exist;
- if (isFirst) {
- lexer.syntaxError();
- }
+ // Can't use a leading zero for bigint literals;
+ if (isBigIntegerLiteral and lexer.is_legacy_octal_literal) {
+ lexer.syntaxError();
+ }
- break :integerLiteral;
- },
+ // Filter out underscores;
+ if (underscoreCount > 0) {
+ var bytes = lexer.allocator.alloc(u8, text.len - underscoreCount) catch unreachable;
+ var i: usize = 0;
+ for (text) |char| {
+ if (char != '_') {
+ bytes[i] = char;
+ i += 1;
+ }
}
-
- lexer.step();
- isFirst = false;
}
- var isBigIntegerLiteral = lexer.code_point == 'n' and !hasDotOrExponent;
+ // Store bigints as text to avoid precision loss;
+ if (isBigIntegerLiteral) {
+ lexer.identifier = text;
+ } else if (isInvalidLegacyOctalLiteral) {
+ if (std.fmt.parseFloat(f64, text)) |num| {
+ lexer.number = num;
+ } else |err| {
+ lexer.addError(lexer.start, "Invalid number {s}", .{text}, true);
+ }
+ }
+ }
+ } else {
+ // Floating-point literal;
+ var isInvalidLegacyOctalLiteral = first == '0' and (lexer.code_point == '8' or lexer.code_point == '9');
- // Slow path: do we need to re-scan the input as text?
- if (isBigIntegerLiteral or isInvalidLegacyOctalLiteral) {
- var text = lexer.raw();
+ // Initial digits;
+ while (true) {
+ if (lexer.code_point < '0' or lexer.code_point > '9') {
+ if (lexer.code_point != '_') {
+ break;
+ }
- // Can't use a leading zero for bigint literals;
- if (isBigIntegerLiteral and lexer.is_legacy_octal_literal) {
+ // Cannot have multiple underscores in a row;
+ if (lastUnderscoreEnd > 0 and lexer.end == lastUnderscoreEnd + 1) {
lexer.syntaxError();
}
- // Filter out underscores;
- if (underscoreCount > 0) {
- var bytes = lexer.allocator.alloc(u8, text.len - underscoreCount) catch unreachable;
- var i: usize = 0;
- for (text) |char| {
- if (char != '_') {
- bytes[i] = char;
- i += 1;
- }
- }
+ // The specification forbids underscores in this case;
+ if (isInvalidLegacyOctalLiteral) {
+ lexer.syntaxError();
}
- // Store bigints as text to avoid precision loss;
- if (isBigIntegerLiteral) {
- lexer.identifier = text;
- } else if (isInvalidLegacyOctalLiteral) {
- if (std.fmt.parseFloat(f64, text)) |num| {
- lexer.number = num;
- } else |err| {
- lexer.addError(lexer.start, "Invalid number {s}", .{text}, true);
- }
- }
+ lastUnderscoreEnd = lexer.end;
+ underscoreCount += 1;
}
- } else {
- // Floating-point literal;
- var isInvalidLegacyOctalLiteral = first == '0' and (lexer.code_point == '8' or lexer.code_point == '9');
+ lexer.step();
+ }
- // Initial digits;
+ // Fractional digits;
+ if (first != '.' and lexer.code_point == '.') {
+ // An underscore must not come last;
+ if (lastUnderscoreEnd > 0 and lexer.end == lastUnderscoreEnd + 1) {
+ lexer.end -= 1;
+ lexer.syntaxError();
+ }
+
+ hasDotOrExponent = true;
+ lexer.step();
+ if (lexer.code_point == '_') {
+ lexer.syntaxError();
+ }
while (true) {
if (lexer.code_point < '0' or lexer.code_point > '9') {
if (lexer.code_point != '_') {
@@ -1290,151 +1389,110 @@ pub fn NewLexerType(comptime jsonOptions: ?JSONOptions) type {
lexer.syntaxError();
}
- // The specification forbids underscores in this case;
- if (isInvalidLegacyOctalLiteral) {
- lexer.syntaxError();
- }
-
lastUnderscoreEnd = lexer.end;
underscoreCount += 1;
}
lexer.step();
}
+ }
- // Fractional digits;
- if (first != '.' and lexer.code_point == '.') {
- // An underscore must not come last;
- if (lastUnderscoreEnd > 0 and lexer.end == lastUnderscoreEnd + 1) {
- lexer.end -= 1;
- lexer.syntaxError();
- }
-
- hasDotOrExponent = true;
- lexer.step();
- if (lexer.code_point == '_') {
- lexer.syntaxError();
- }
- while (true) {
- if (lexer.code_point < '0' or lexer.code_point > '9') {
- if (lexer.code_point != '_') {
- break;
- }
-
- // Cannot have multiple underscores in a row;
- if (lastUnderscoreEnd > 0 and lexer.end == lastUnderscoreEnd + 1) {
- lexer.syntaxError();
- }
-
- lastUnderscoreEnd = lexer.end;
- underscoreCount += 1;
- }
- lexer.step();
- }
+ // Exponent;
+ if (lexer.code_point == 'e' or lexer.code_point == 'E') {
+ // An underscore must not come last;
+ if (lastUnderscoreEnd > 0 and lexer.end == lastUnderscoreEnd + 1) {
+ lexer.end -= 1;
+ lexer.syntaxError();
}
- // Exponent;
- if (lexer.code_point == 'e' or lexer.code_point == 'E') {
- // An underscore must not come last;
- if (lastUnderscoreEnd > 0 and lexer.end == lastUnderscoreEnd + 1) {
- lexer.end -= 1;
- lexer.syntaxError();
- }
-
- hasDotOrExponent = true;
+ hasDotOrExponent = true;
+ lexer.step();
+ if (lexer.code_point == '+' or lexer.code_point == '-') {
lexer.step();
- if (lexer.code_point == '+' or lexer.code_point == '-') {
- lexer.step();
- }
+ }
+ if (lexer.code_point < '0' or lexer.code_point > '9') {
+ lexer.syntaxError();
+ }
+ while (true) {
if (lexer.code_point < '0' or lexer.code_point > '9') {
- lexer.syntaxError();
- }
- while (true) {
- if (lexer.code_point < '0' or lexer.code_point > '9') {
- if (lexer.code_point != '_') {
- break;
- }
-
- // Cannot have multiple underscores in a row;
- if (lastUnderscoreEnd > 0 and lexer.end == lastUnderscoreEnd + 1) {
- lexer.syntaxError();
- }
+ if (lexer.code_point != '_') {
+ break;
+ }
- lastUnderscoreEnd = lexer.end;
- underscoreCount += 1;
+ // Cannot have multiple underscores in a row;
+ if (lastUnderscoreEnd > 0 and lexer.end == lastUnderscoreEnd + 1) {
+ lexer.syntaxError();
}
- lexer.step();
+
+ lastUnderscoreEnd = lexer.end;
+ underscoreCount += 1;
}
+ lexer.step();
}
+ }
- // Take a slice of the text to parse;
- var text = lexer.raw();
+ // Take a slice of the text to parse;
+ var text = lexer.raw();
- // Filter out underscores;
- if (underscoreCount > 0) {
- var i: usize = 0;
- if (lexer.allocator.alloc(u8, text.len - underscoreCount)) |bytes| {
- for (text) |char| {
- if (char != '_') {
- bytes[i] = char;
- i += 1;
- }
+ // Filter out underscores;
+ if (underscoreCount > 0) {
+ var i: usize = 0;
+ if (lexer.allocator.alloc(u8, text.len - underscoreCount)) |bytes| {
+ for (text) |char| {
+ if (char != '_') {
+ bytes[i] = char;
+ i += 1;
}
- text = bytes;
- } else |err| {
- lexer.addError(lexer.start, "Out of Memory Wah Wah Wah", .{}, true);
- return;
}
+ text = bytes;
+ } else |err| {
+ lexer.addError(lexer.start, "Out of Memory Wah Wah Wah", .{}, true);
+ return;
}
+ }
- if (lexer.code_point == 'n' and !hasDotOrExponent) {
- // The only bigint literal that can start with 0 is "0n"
- if (text.len > 1 and first == '0') {
- lexer.syntaxError();
- }
-
- // Store bigints as text to avoid precision loss;
- lexer.identifier = text;
- } else if (!hasDotOrExponent and lexer.end - lexer.start < 10) {
- // Parse a 32-bit integer (very fast path);
- var number: u32 = 0;
- for (text) |c| {
- number = number * 10 + @intCast(u32, c - '0');
- }
- lexer.number = @intToFloat(f64, number);
- } else {
- // Parse a double-precision floating-point number;
- if (std.fmt.parseFloat(f64, text)) |num| {
- lexer.number = num;
- } else |err| {
- lexer.addError(lexer.start, "Invalid number", .{}, true);
- }
+ if (lexer.code_point == 'n' and !hasDotOrExponent) {
+ // The only bigint literal that can start with 0 is "0n"
+ if (text.len > 1 and first == '0') {
+ lexer.syntaxError();
}
- }
- // An underscore must not come last;
- if (lastUnderscoreEnd > 0 and lexer.end == lastUnderscoreEnd + 1) {
- lexer.end -= 1;
- lexer.syntaxError();
+ // Store bigints as text to avoid precision loss;
+ lexer.identifier = text;
+ } else if (!hasDotOrExponent and lexer.end - lexer.start < 10) {
+ // Parse a 32-bit integer (very fast path);
+ var number: u32 = 0;
+ for (text) |c| {
+ number = number * 10 + @intCast(u32, c - '0');
+ }
+ lexer.number = @intToFloat(f64, number);
+ } else {
+ // Parse a double-precision floating-point number;
+ if (std.fmt.parseFloat(f64, text)) |num| {
+ lexer.number = num;
+ } else |err| {
+ lexer.addError(lexer.start, "Invalid number", .{}, true);
+ }
}
+ }
- // Handle bigint literals after the underscore-at-end check above;
- if (lexer.code_point == 'n' and !hasDotOrExponent) {
- lexer.token = T.t_big_integer_literal;
- lexer.step();
- }
+ // An underscore must not come last;
+ if (lastUnderscoreEnd > 0 and lexer.end == lastUnderscoreEnd + 1) {
+ lexer.end -= 1;
+ lexer.syntaxError();
+ }
- // Identifiers can't occur immediately after numbers;
- if (isIdentifierStart(lexer.code_point)) {
- lexer.syntaxError();
- }
+ // Handle bigint literals after the underscore-at-end check above;
+ if (lexer.code_point == 'n' and !hasDotOrExponent) {
+ lexer.token = T.t_big_integer_literal;
+ lexer.step();
}
- };
-}
-// JS/TS lexer
-pub const Lexer = NewLexerType(null);
-pub const JSONLexer = NewLexerType(JSONOptions{ .allow_comments = false, .allow_trailing_commas = false });
-pub const TSConfigJSONLexer = NewLexerType(JSONOptions{ .allow_comments = true, .allow_trailing_commas = true });
+ // Identifiers can't occur immediately after numbers;
+ if (isIdentifierStart(lexer.code_point)) {
+ lexer.syntaxError();
+ }
+ }
+};
pub fn isIdentifierStart(codepoint: CodePoint) bool {
switch (codepoint) {