diff options
Diffstat (limited to '')
-rw-r--r-- | src/js_lexer.zig | 99 |
1 files changed, 68 insertions, 31 deletions
diff --git a/src/js_lexer.zig b/src/js_lexer.zig index 687d20409..2cfc0f732 100644 --- a/src/js_lexer.zig +++ b/src/js_lexer.zig @@ -29,6 +29,7 @@ pub var emptyJavaScriptString = ([_]u16{0}); pub const JSONOptions = struct { allow_comments: bool = false, allow_trailing_commas: bool = false, + starts_with_string: bool = false, }; pub const Lexer = struct { @@ -524,7 +525,7 @@ pub const Lexer = struct { pub const InnerStringLiteral = packed struct { suffix_len: u3, needs_slow_path: bool }; fn parseStringLiteralInnter(lexer: *LexerType, comptime quote: CodePoint) !InnerStringLiteral { var needs_slow_path = false; - var suffix_len: u3 = 1; + var suffix_len: u3 = if (comptime quote == 0) 0 else 1; stringLiteral: while (true) { @setRuntimeSafety(false); @@ -533,7 +534,7 @@ pub const Lexer = struct { try lexer.step(); // Handle Windows CRLF - if (lexer.code_point == '\r' and lexer.json_options != null) { + if (lexer.code_point == 'r' and lexer.json_options != null) { try lexer.step(); if (lexer.code_point == '\n') { try lexer.step(); @@ -541,17 +542,24 @@ pub const Lexer = struct { continue :stringLiteral; } - // Skip slow path for a handful of common escaped characters that don't need UTf16 handling - needs_slow_path = switch (lexer.code_point) { - // if it was previously marked as needing slow path, then keep it - 'n', '`', '\'', '0', '"' => needs_slow_path, - else => true, - }; + switch (lexer.code_point) { + 't', 'r', 'n', '`', '\'', '0', '"', 0x2028, 0x2029 => { + try lexer.step(); + continue :stringLiteral; + }, + else => { + needs_slow_path = true; + }, + } }, // This indicates the end of the file -1 => { - try lexer.addDefaultError("Unterminated string literal"); + if (comptime quote != 0) { + try lexer.addDefaultError("Unterminated string literal"); + } + + break :stringLiteral; }, '\r' => { @@ -564,8 +572,17 @@ pub const Lexer = struct { }, '\n' => { - if (comptime quote != '`') { - try lexer.addDefaultError("Unterminated string literal"); + + // Implicitly-quoted strings end when they reach a newline OR end of file + // This only applies to .env + switch (comptime quote) { + 0 => { + break :stringLiteral; + }, + '`' => {}, + else => { + try lexer.addDefaultError("Unterminated string literal"); + }, } }, @@ -608,27 +625,23 @@ pub const Lexer = struct { return InnerStringLiteral{ .needs_slow_path = needs_slow_path, .suffix_len = suffix_len }; } - fn parseStringLiteral(lexer: *LexerType) !void { - var quote: CodePoint = lexer.code_point; - - if (quote != '`') { + pub fn parseStringLiteral(lexer: *LexerType, comptime quote: CodePoint) !void { + if (comptime quote != '`') { lexer.token = T.t_string_literal; } else if (lexer.rescan_close_brace_as_template_token) { lexer.token = T.t_template_tail; } else { lexer.token = T.t_no_substitution_template_literal; } + // quote is 0 when parsing JSON from .env + // .env values may not always be quoted. try lexer.step(); - var string_literal_details = switch (quote) { - '`' => try lexer.parseStringLiteralInnter('`'), - '\'' => try lexer.parseStringLiteralInnter('\''), - '"' => try lexer.parseStringLiteralInnter('"'), - else => unreachable, - }; + var string_literal_details = try lexer.parseStringLiteralInnter(quote); // Reset string literal - lexer.string_literal_slice = lexer.source.contents[lexer.start + 1 .. lexer.end - string_literal_details.suffix_len]; + const base = if (comptime quote == 0) lexer.start else lexer.start + 1; + lexer.string_literal_slice = lexer.source.contents[base .. lexer.end - string_literal_details.suffix_len]; lexer.string_literal_is_ascii = !string_literal_details.needs_slow_path; lexer.string_literal_buffer.shrinkRetainingCapacity(0); if (string_literal_details.needs_slow_path) { @@ -1499,8 +1512,14 @@ pub const Lexer = struct { } }, - '\'', '"', '`' => { - try lexer.parseStringLiteral(); + '\'' => { + try lexer.parseStringLiteral('\''); + }, + '"' => { + try lexer.parseStringLiteral('"'); + }, + '`' => { + try lexer.parseStringLiteral('`'); }, '_', '$', 'a'...'z', 'A'...'Z' => { @@ -1778,7 +1797,6 @@ pub const Lexer = struct { return utf16ToString(lexer, js); } - // TODO: use wtf-8 encoding. pub fn utf16ToString(lexer: *LexerType, js: JavascriptString) string { var temp: [4]u8 = undefined; var list = std.ArrayList(u8).initCapacity(lexer.allocator, js.len) catch unreachable; @@ -2595,16 +2613,32 @@ pub const Lexer = struct { pub fn isIdentifierStart(codepoint: CodePoint) bool { @setRuntimeSafety(false); + switch (codepoint) { + 'a'...'z', 'A'...'Z', '_', '$' => return true, + else => {}, + } + + if (codepoint < 127) return false; + return switch (codepoint) { - 'a'...'z', 'A'...'Z', '_', '$' => true, + std.math.max(tables.id_start.r16_min, tables.id_start.latin_offset)...tables.id_start.r16_max => tables.id_start.inRange16(@intCast(u16, codepoint)), + tables.id_start.r32_min...tables.id_start.r32_max => tables.id_start.inRange32(@intCast(u32, codepoint)), else => false, }; } pub fn isIdentifierContinue(codepoint: CodePoint) bool { @setRuntimeSafety(false); + switch (codepoint) { + 'a'...'z', 'A'...'Z', '_', '$', '0'...'9' => return true, + else => {}, + } + + if (codepoint < 127) return false; + return switch (codepoint) { - 'a'...'z', 'A'...'Z', '_', '$', '0'...'9', 0x200C, 0x200D => true, + std.math.max(tables.id_continue.r16_min, tables.id_continue.latin_offset)...tables.id_continue.r16_max => tables.id_continue.inRange16(@intCast(u16, codepoint)), + tables.id_continue.r32_min...tables.id_continue.r32_max => tables.id_continue.inRange32(@intCast(u32, codepoint)), else => false, }; } @@ -2643,13 +2677,16 @@ pub fn isIdentifier(text: string) bool { return false; } - var iter = std.unicode.Utf8Iterator{ .bytes = text, .i = 0 }; - if (!isIdentifierStart(iter.nextCodepoint() orelse unreachable)) { + var iter = strings.CodepointIterator{ .i = 0, .bytes = text }; + + if (!isIdentifierStart(iter.nextCodepoint())) { return false; } - while (iter.nextCodepoint()) |codepoint| { - if (!isIdentifierContinue(@intCast(CodePoint, codepoint))) { + iter.nextCodepointNoReturn(); + + while (iter.c > -1) : (iter.nextCodepointNoReturn()) { + if (!isIdentifierContinue(iter.c)) { return false; } } |