From ae90b3deea27f16c1e9d3d791f1cf21e342e45b5 Mon Sep 17 00:00:00 2001 From: Jarred Sumner Date: Sat, 8 May 2021 20:48:20 -0700 Subject: Use try for errors during parsing so that backtracking can happen Former-commit-id: 7d3b0e7daa374577b59f9b53c7663d767df2077d --- src/js_lexer.zig | 509 ++++++++--------- src/js_parser/js_parser.zig | 1263 ++++++++++++++++++++++--------------------- src/json_parser.zig | 42 +- 3 files changed, 932 insertions(+), 882 deletions(-) (limited to 'src') diff --git a/src/js_lexer.zig b/src/js_lexer.zig index 0cae6b0b4..4c758c6a4 100644 --- a/src/js_lexer.zig +++ b/src/js_lexer.zig @@ -34,6 +34,15 @@ pub const JSONOptions = struct { pub const Lexer = struct { const LexerType = @This(); + pub const Error = error{ + UTF8Fail, + OutOfMemory, + SyntaxError, + UnexpectedSyntax, + JSONStringsMustUseDoubleQuotes, + ParserError, + }; + // pub const Error = error{ // UnexpectedToken, // EndOfFile, @@ -80,26 +89,28 @@ pub const Lexer = struct { return logger.usize2Loc(self.start); } - fn nextCodepointSlice(it: *LexerType) callconv(.Inline) ?[]const u8 { + fn nextCodepointSlice(it: *LexerType) callconv(.Inline) !?[]const u8 { if (it.current >= it.source.contents.len) { // without this line, strings cut off one before the last characte it.end = it.current; return null; } - const cp_len = unicode.utf8ByteSequenceLength(it.source.contents[it.current]) catch unreachable; + const cp_len = unicode.utf8ByteSequenceLength(it.source.contents[it.current]) catch return Error.UTF8Fail; it.end = it.current; it.current += cp_len; return it.source.contents[it.current - cp_len .. it.current]; } - pub fn syntaxError(self: *LexerType) void { + pub fn syntaxError(self: *LexerType) !void { self.addError(self.start, "Syntax Error!!", .{}, true); + return Error.SyntaxError; } - pub fn addDefaultError(self: *LexerType, msg: []const u8) void { + pub fn addDefaultError(self: *LexerType, msg: []const u8) !void { self.addError(self.start, "{s}", .{msg}, true); + return Error.SyntaxError; } pub fn addError(self: *LexerType, _loc: usize, comptime format: []const u8, args: anytype, panic: bool) void { @@ -114,7 +125,7 @@ pub const Lexer = struct { msg.formatNoWriter(Global.panic); } - pub fn addRangeError(self: *LexerType, r: logger.Range, comptime format: []const u8, args: anytype, panic: bool) void { + pub fn addRangeError(self: *LexerType, r: logger.Range, comptime format: []const u8, args: anytype, panic: bool) !void { if (self.prev_error_loc.eql(r.loc)) { return; } @@ -124,12 +135,7 @@ pub const Lexer = struct { self.prev_error_loc = r.loc; if (panic) { - var fixedBuffer = [_]u8{0} ** 8096; - var stream = std.io.fixedBufferStream(&fixedBuffer); - const writer = stream.writer(); - self.log.print(writer) catch unreachable; - - Global.panic("{s}", .{fixedBuffer[0..stream.pos]}); + return Error.ParserError; } } @@ -145,8 +151,8 @@ pub const Lexer = struct { return @intCast(CodePoint, a) == self.code_point; } - fn nextCodepoint(it: *LexerType) callconv(.Inline) CodePoint { - const slice = it.nextCodepointSlice() orelse return @as(CodePoint, -1); + fn nextCodepoint(it: *LexerType) callconv(.Inline) !CodePoint { + const slice = (try it.nextCodepointSlice()) orelse return @as(CodePoint, -1); switch (slice.len) { 1 => return @as(CodePoint, slice[0]), @@ -159,14 +165,14 @@ pub const Lexer = struct { /// Look ahead at the next n codepoints without advancing the iterator. /// If fewer than n codepoints are available, then return the remainder of the string. - fn peek(it: *LexerType, n: usize) []const u8 { + fn peek(it: *LexerType, n: usize) !string { const original_i = it.current; defer it.current = original_i; var end_ix = original_i; var found: usize = 0; while (found < n) : (found += 1) { - const next_codepoint = it.nextCodepointSlice() orelse return it.source.contents[original_i..]; + const next_codepoint = (try it.nextCodepointSlice()) orelse return it.source.contents[original_i..]; end_ix += next_codepoint.len; } @@ -185,7 +191,7 @@ pub const Lexer = struct { } } - fn parseStringLiteral(lexer: *LexerType) void { + fn parseStringLiteral(lexer: *LexerType) !void { var quote: CodePoint = lexer.code_point; var needs_slow_path = false; var suffixLen: usize = 1; @@ -197,19 +203,19 @@ pub const Lexer = struct { } else { lexer.token = T.t_no_substitution_template_literal; } - lexer.step(); + try lexer.step(); stringLiteral: while (true) { switch (lexer.code_point) { '\\' => { needs_slow_path = true; - lexer.step(); + try lexer.step(); // Handle Windows CRLF if (lexer.code_point == '\r' and lexer.json_options != null) { - lexer.step(); + try lexer.step(); if (lexer.code_point == '\n') { - lexer.step(); + try lexer.step(); } continue :stringLiteral; } @@ -217,12 +223,12 @@ pub const Lexer = struct { // This indicates the end of the file -1 => { - lexer.addDefaultError("Unterminated string literal"); + try lexer.addDefaultError("Unterminated string literal"); }, '\r' => { if (quote != '`') { - lexer.addDefaultError("Unterminated string literal"); + try lexer.addDefaultError("Unterminated string literal"); } // Template literals require newline normalization @@ -231,16 +237,16 @@ pub const Lexer = struct { '\n' => { if (quote != '`') { - lexer.addDefaultError("Unterminated string literal"); + try lexer.addDefaultError("Unterminated string literal"); } }, '$' => { if (quote == '`') { - lexer.step(); + try lexer.step(); if (lexer.code_point == '{') { suffixLen = 2; - lexer.step(); + try lexer.step(); if (lexer.rescan_close_brace_as_template_token) { lexer.token = T.t_template_middle; } else { @@ -254,18 +260,18 @@ pub const Lexer = struct { else => { if (quote == lexer.code_point) { - lexer.step(); + try lexer.step(); break :stringLiteral; } // Non-ASCII strings need the slow path if (lexer.code_point >= 0x80) { needs_slow_path = true; } else if (lexer.json_options != null and lexer.code_point < 0x20) { - lexer.syntaxError(); + try lexer.syntaxError(); } }, } - lexer.step(); + try lexer.step(); } // Reset string literal @@ -283,7 +289,7 @@ pub const Lexer = struct { } if (quote == '\'' and lexer.json_options != null) { - lexer.addRangeError(lexer.range(), "JSON strings must use double quotes", .{}, true); + try lexer.addRangeError(lexer.range(), "JSON strings must use double quotes", .{}, true); } // for (text) // // if (needs_slow_path) { @@ -296,8 +302,8 @@ pub const Lexer = struct { // // } } - fn step(lexer: *LexerType) void { - lexer.code_point = lexer.nextCodepoint(); + fn step(lexer: *LexerType) !void { + lexer.code_point = try lexer.nextCodepoint(); // Track the approximate number of newlines in the file so we can preallocate // the line offset table in the printer for source maps. The line offset table @@ -310,29 +316,29 @@ pub const Lexer = struct { } } - pub fn expect(self: *LexerType, comptime token: T) void { + pub fn expect(self: *LexerType, comptime token: T) !void { if (self.token != token) { - self.expected(token); + try self.expected(token); } - self.next(); + try self.next(); } - pub fn expectOrInsertSemicolon(lexer: *LexerType) void { + pub fn expectOrInsertSemicolon(lexer: *LexerType) !void { if (lexer.token == T.t_semicolon or (!lexer.has_newline_before and lexer.token != T.t_close_brace and lexer.token != T.t_end_of_file)) { - lexer.expect(T.t_semicolon); + try lexer.expect(T.t_semicolon); } } - pub fn addUnsupportedSyntaxError(self: *LexerType, msg: []const u8) void { + pub fn addUnsupportedSyntaxError(self: *LexerType, msg: []const u8) !void { self.addError(self.end, "Unsupported syntax: {s}", .{msg}, true); + return Error.SyntaxError; } - pub fn scanIdentifierWithEscapes(self: *LexerType) void { - self.addUnsupportedSyntaxError("escape sequence"); - return; + pub fn scanIdentifierWithEscapes(self: *LexerType) !void { + try self.addUnsupportedSyntaxError("escape sequence"); } pub fn debugInfo(self: *LexerType) void { @@ -348,7 +354,7 @@ pub const Lexer = struct { } } - pub fn expectContextualKeyword(self: *LexerType, comptime keyword: string) void { + pub fn expectContextualKeyword(self: *LexerType, comptime keyword: string) !void { if (!self.isContextualKeyword(keyword)) { if (std.builtin.mode == std.builtin.Mode.Debug) { self.addError(self.start, "Expected \"{s}\" but found \"{s}\" (token: {s})", .{ @@ -359,45 +365,46 @@ pub const Lexer = struct { } else { self.addError(self.start, "Expected \"{s}\" but found \"{s}\"", .{ keyword, self.raw() }, true); } + return Error.UnexpectedSyntax; } - self.next(); + try self.next(); } - pub fn maybeExpandEquals(lexer: *LexerType) void { + pub fn maybeExpandEquals(lexer: *LexerType) !void { switch (lexer.code_point) { '>' => { // "=" + ">" = "=>" lexer.token = .t_equals_greater_than; - lexer.step(); + try lexer.step(); }, '=' => { // "=" + "=" = "==" lexer.token = .t_equals_equals; - lexer.step(); + try lexer.step(); if (lexer.code_point == '=') { // "=" + "==" = "===" lexer.token = .t_equals_equals_equals; - lexer.step(); + try lexer.step(); } }, else => {}, } } - pub fn expectLessThan(lexer: *LexerType, is_inside_jsx_element: bool) void { + pub fn expectLessThan(lexer: *LexerType, is_inside_jsx_element: bool) !void { switch (lexer.token) { .t_less_than => { if (is_inside_jsx_element) { - lexer.nextInsideJSXElement(); + try lexer.nextInsideJSXElement(); } else { - lexer.next(); + try lexer.next(); } }, .t_less_than_equals => { lexer.token = .t_equals; lexer.start += 1; - lexer.maybeExpandEquals(); + try lexer.maybeExpandEquals(); }, .t_less_than_less_than => { lexer.token = .t_less_than; @@ -408,7 +415,7 @@ pub const Lexer = struct { lexer.start += 1; }, else => { - lexer.expected(.t_less_than); + try lexer.expected(.t_less_than); }, } } @@ -419,13 +426,13 @@ pub const Lexer = struct { if (is_inside_jsx_element) { try lexer.nextInsideJSXElement(); } else { - lexer.next(); + try lexer.next(); } }, .t_greater_than_equals => { lexer.token = .t_equals; lexer.start += 1; - lexer.maybeExpandEquals(); + try lexer.maybeExpandEquals(); }, .t_greater_than_greater_than => { lexer.token = .t_greater_than; @@ -440,12 +447,12 @@ pub const Lexer = struct { lexer.start += 1; }, else => { - lexer.expected(.t_greater_than); + try lexer.expected(.t_greater_than); }, } } - pub fn next(lexer: *LexerType) void { + pub fn next(lexer: *LexerType) !void { lexer.has_newline_before = lexer.end == 0; lex: while (true) { @@ -459,23 +466,23 @@ pub const Lexer = struct { '#' => { if (lexer.start == 0 and lexer.source.contents[1] == '!') { - lexer.addUnsupportedSyntaxError("#!hashbang is not supported yet."); + try lexer.addUnsupportedSyntaxError("#!hashbang is not supported yet."); return; } - lexer.step(); + try lexer.step(); if (!isIdentifierStart(lexer.code_point)) { - lexer.syntaxError(); + try lexer.syntaxError(); } - lexer.step(); + try lexer.step(); if (isIdentifierStart(lexer.code_point)) { - lexer.step(); + try lexer.step(); while (isIdentifierContinue(lexer.code_point)) { - lexer.step(); + try lexer.step(); } if (lexer.code_point == '\\') { - lexer.scanIdentifierWithEscapes(); + try lexer.scanIdentifierWithEscapes(); lexer.token = T.t_private_identifier; // lexer.Identifier, lexer.Token = lexer.scanIdentifierWithEscapes(normalIdentifier); } else { @@ -486,67 +493,67 @@ pub const Lexer = struct { } }, '\r', '\n', 0x2028, 0x2029 => { - lexer.step(); + try lexer.step(); lexer.has_newline_before = true; continue; }, '\t', ' ' => { - lexer.step(); + try lexer.step(); continue; }, '(' => { - lexer.step(); + try lexer.step(); lexer.token = T.t_open_paren; }, ')' => { - lexer.step(); + try lexer.step(); lexer.token = T.t_close_paren; }, '[' => { - lexer.step(); + try lexer.step(); lexer.token = T.t_open_bracket; }, ']' => { - lexer.step(); + try lexer.step(); lexer.token = T.t_close_bracket; }, '{' => { - lexer.step(); + try lexer.step(); lexer.token = T.t_open_brace; }, '}' => { - lexer.step(); + try lexer.step(); lexer.token = T.t_close_brace; }, ',' => { - lexer.step(); + try lexer.step(); lexer.token = T.t_comma; }, ':' => { - lexer.step(); + try lexer.step(); lexer.token = T.t_colon; }, ';' => { - lexer.step(); + try lexer.step(); lexer.token = T.t_semicolon; }, '@' => { - lexer.step(); + try lexer.step(); lexer.token = T.t_at; }, '~' => { - lexer.step(); + try lexer.step(); lexer.token = T.t_tilde; }, '?' => { // '?' or '?.' or '??' or '??=' - lexer.step(); + try lexer.step(); switch (lexer.code_point) { '?' => { - lexer.step(); + try lexer.step(); switch (lexer.code_point) { '=' => { - lexer.step(); + try lexer.step(); lexer.token = T.t_question_question_equals; }, else => { @@ -564,7 +571,7 @@ pub const Lexer = struct { if (current < contents.len) { const c = contents[current]; if (c < '0' or c > '9') { - lexer.step(); + try lexer.step(); lexer.token = T.t_question_dot; } } @@ -576,10 +583,10 @@ pub const Lexer = struct { }, '%' => { // '%' or '%=' - lexer.step(); + try lexer.step(); switch (lexer.code_point) { '=' => { - lexer.step(); + try lexer.step(); lexer.token = T.t_percent_equals; }, @@ -591,18 +598,18 @@ pub const Lexer = struct { '&' => { // '&' or '&=' or '&&' or '&&=' - lexer.step(); + try lexer.step(); switch (lexer.code_point) { '=' => { - lexer.step(); + try lexer.step(); lexer.token = T.t_ampersand_equals; }, '&' => { - lexer.step(); + try lexer.step(); switch (lexer.code_point) { '=' => { - lexer.step(); + try lexer.step(); lexer.token = T.t_ampersand_ampersand_equals; }, @@ -620,17 +627,17 @@ pub const Lexer = struct { '|' => { // '|' or '|=' or '||' or '||=' - lexer.step(); + try lexer.step(); switch (lexer.code_point) { '=' => { - lexer.step(); + try lexer.step(); lexer.token = T.t_bar_equals; }, '|' => { - lexer.step(); + try lexer.step(); switch (lexer.code_point) { '=' => { - lexer.step(); + try lexer.step(); lexer.token = T.t_bar_bar_equals; }, @@ -647,10 +654,10 @@ pub const Lexer = struct { '^' => { // '^' or '^=' - lexer.step(); + try lexer.step(); switch (lexer.code_point) { '=' => { - lexer.step(); + try lexer.step(); lexer.token = T.t_caret_equals; }, @@ -662,15 +669,15 @@ pub const Lexer = struct { '+' => { // '+' or '+=' or '++' - lexer.step(); + try lexer.step(); switch (lexer.code_point) { '=' => { - lexer.step(); + try lexer.step(); lexer.token = T.t_plus_equals; }, '+' => { - lexer.step(); + try lexer.step(); lexer.token = T.t_plus_plus; }, @@ -682,18 +689,18 @@ pub const Lexer = struct { '-' => { // '+' or '+=' or '++' - lexer.step(); + try lexer.step(); switch (lexer.code_point) { '=' => { - lexer.step(); + try lexer.step(); lexer.token = T.t_minus_equals; }, '-' => { - lexer.step(); + try lexer.step(); if (lexer.code_point == '>' and lexer.has_newline_before) { - lexer.step(); + try lexer.step(); lexer.log.addRangeWarning(lexer.source, lexer.range(), "Treating \"-->\" as the start of a legacy HTML single-line comment") catch unreachable; singleLineHTMLCloseComment: while (true) { @@ -706,7 +713,7 @@ pub const Lexer = struct { }, else => {}, } - lexer.step(); + try lexer.step(); } continue; } @@ -723,17 +730,17 @@ pub const Lexer = struct { '*' => { // '*' or '*=' or '**' or '**=' - lexer.step(); + try lexer.step(); switch (lexer.code_point) { '=' => { - lexer.step(); + try lexer.step(); lexer.token = .t_asterisk_equals; }, '*' => { - lexer.step(); + try lexer.step(); switch (lexer.code_point) { '=' => { - lexer.step(); + try lexer.step(); lexer.token = .t_asterisk_asterisk_equals; }, else => { @@ -748,7 +755,7 @@ pub const Lexer = struct { }, '/' => { // '/' or '/=' or '//' or '/* ... */' - lexer.step(); + try lexer.step(); if (lexer.for_global_name) { lexer.token = .t_slash; @@ -756,13 +763,13 @@ pub const Lexer = struct { } switch (lexer.code_point) { '=' => { - lexer.step(); + try lexer.step(); lexer.token = .t_slash_equals; }, '/' => { - lexer.step(); + try lexer.step(); singleLineComment: while (true) { - lexer.step(); + try lexer.step(); switch (lexer.code_point) { '\r', '\n', 0x2028, 0x2029 => { break :singleLineComment; @@ -776,7 +783,7 @@ pub const Lexer = struct { if (lexer.json_options) |json| { if (!json.allow_comments) { - lexer.addRangeError(lexer.range(), "JSON does not support comments", .{}, true); + try lexer.addRangeError(lexer.range(), "JSON does not support comments", .{}, true); return; } } @@ -784,19 +791,19 @@ pub const Lexer = struct { continue; }, '*' => { - lexer.step(); + try lexer.step(); multiLineComment: while (true) { switch (lexer.code_point) { '*' => { - lexer.step(); + try lexer.step(); if (lexer.code_point == '/') { - lexer.step(); + try lexer.step(); break :multiLineComment; } }, '\r', '\n', 0x2028, 0x2029 => { - lexer.step(); + try lexer.step(); lexer.has_newline_before = true; }, -1 => { @@ -804,13 +811,13 @@ pub const Lexer = struct { lexer.addError(lexer.start, "Expected \"*/\" to terminate multi-line comment", .{}, true); }, else => { - lexer.step(); + try lexer.step(); }, } } if (lexer.json_options) |json| { if (!json.allow_comments) { - lexer.addRangeError(lexer.range(), "JSON does not support comments", .{}, true); + try lexer.addRangeError(lexer.range(), "JSON does not support comments", .{}, true); return; } } @@ -825,18 +832,18 @@ pub const Lexer = struct { '=' => { // '=' or '=>' or '==' or '===' - lexer.step(); + try lexer.step(); switch (lexer.code_point) { '>' => { - lexer.step(); + try lexer.step(); lexer.token = T.t_equals_greater_than; }, '=' => { - lexer.step(); + try lexer.step(); switch (lexer.code_point) { '=' => { - lexer.step(); + try lexer.step(); lexer.token = T.t_equals_equals_equals; }, @@ -854,18 +861,18 @@ pub const Lexer = struct { '<' => { // '<' or '<<' or '<=' or '<<=' or '