aboutsummaryrefslogtreecommitdiff
path: root/src/js_lexer.zig
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/js_lexer.zig99
1 files changed, 68 insertions, 31 deletions
diff --git a/src/js_lexer.zig b/src/js_lexer.zig
index 687d20409..2cfc0f732 100644
--- a/src/js_lexer.zig
+++ b/src/js_lexer.zig
@@ -29,6 +29,7 @@ pub var emptyJavaScriptString = ([_]u16{0});
pub const JSONOptions = struct {
allow_comments: bool = false,
allow_trailing_commas: bool = false,
+ starts_with_string: bool = false,
};
pub const Lexer = struct {
@@ -524,7 +525,7 @@ pub const Lexer = struct {
pub const InnerStringLiteral = packed struct { suffix_len: u3, needs_slow_path: bool };
fn parseStringLiteralInnter(lexer: *LexerType, comptime quote: CodePoint) !InnerStringLiteral {
var needs_slow_path = false;
- var suffix_len: u3 = 1;
+ var suffix_len: u3 = if (comptime quote == 0) 0 else 1;
stringLiteral: while (true) {
@setRuntimeSafety(false);
@@ -533,7 +534,7 @@ pub const Lexer = struct {
try lexer.step();
// Handle Windows CRLF
- if (lexer.code_point == '\r' and lexer.json_options != null) {
+ if (lexer.code_point == 'r' and lexer.json_options != null) {
try lexer.step();
if (lexer.code_point == '\n') {
try lexer.step();
@@ -541,17 +542,24 @@ pub const Lexer = struct {
continue :stringLiteral;
}
- // Skip slow path for a handful of common escaped characters that don't need UTf16 handling
- needs_slow_path = switch (lexer.code_point) {
- // if it was previously marked as needing slow path, then keep it
- 'n', '`', '\'', '0', '"' => needs_slow_path,
- else => true,
- };
+ switch (lexer.code_point) {
+ 't', 'r', 'n', '`', '\'', '0', '"', 0x2028, 0x2029 => {
+ try lexer.step();
+ continue :stringLiteral;
+ },
+ else => {
+ needs_slow_path = true;
+ },
+ }
},
// This indicates the end of the file
-1 => {
- try lexer.addDefaultError("Unterminated string literal");
+ if (comptime quote != 0) {
+ try lexer.addDefaultError("Unterminated string literal");
+ }
+
+ break :stringLiteral;
},
'\r' => {
@@ -564,8 +572,17 @@ pub const Lexer = struct {
},
'\n' => {
- if (comptime quote != '`') {
- try lexer.addDefaultError("Unterminated string literal");
+
+ // Implicitly-quoted strings end when they reach a newline OR end of file
+ // This only applies to .env
+ switch (comptime quote) {
+ 0 => {
+ break :stringLiteral;
+ },
+ '`' => {},
+ else => {
+ try lexer.addDefaultError("Unterminated string literal");
+ },
}
},
@@ -608,27 +625,23 @@ pub const Lexer = struct {
return InnerStringLiteral{ .needs_slow_path = needs_slow_path, .suffix_len = suffix_len };
}
- fn parseStringLiteral(lexer: *LexerType) !void {
- var quote: CodePoint = lexer.code_point;
-
- if (quote != '`') {
+ pub fn parseStringLiteral(lexer: *LexerType, comptime quote: CodePoint) !void {
+ if (comptime quote != '`') {
lexer.token = T.t_string_literal;
} else if (lexer.rescan_close_brace_as_template_token) {
lexer.token = T.t_template_tail;
} else {
lexer.token = T.t_no_substitution_template_literal;
}
+ // quote is 0 when parsing JSON from .env
+ // .env values may not always be quoted.
try lexer.step();
- var string_literal_details = switch (quote) {
- '`' => try lexer.parseStringLiteralInnter('`'),
- '\'' => try lexer.parseStringLiteralInnter('\''),
- '"' => try lexer.parseStringLiteralInnter('"'),
- else => unreachable,
- };
+ var string_literal_details = try lexer.parseStringLiteralInnter(quote);
// Reset string literal
- lexer.string_literal_slice = lexer.source.contents[lexer.start + 1 .. lexer.end - string_literal_details.suffix_len];
+ const base = if (comptime quote == 0) lexer.start else lexer.start + 1;
+ lexer.string_literal_slice = lexer.source.contents[base .. lexer.end - string_literal_details.suffix_len];
lexer.string_literal_is_ascii = !string_literal_details.needs_slow_path;
lexer.string_literal_buffer.shrinkRetainingCapacity(0);
if (string_literal_details.needs_slow_path) {
@@ -1499,8 +1512,14 @@ pub const Lexer = struct {
}
},
- '\'', '"', '`' => {
- try lexer.parseStringLiteral();
+ '\'' => {
+ try lexer.parseStringLiteral('\'');
+ },
+ '"' => {
+ try lexer.parseStringLiteral('"');
+ },
+ '`' => {
+ try lexer.parseStringLiteral('`');
},
'_', '$', 'a'...'z', 'A'...'Z' => {
@@ -1778,7 +1797,6 @@ pub const Lexer = struct {
return utf16ToString(lexer, js);
}
- // TODO: use wtf-8 encoding.
pub fn utf16ToString(lexer: *LexerType, js: JavascriptString) string {
var temp: [4]u8 = undefined;
var list = std.ArrayList(u8).initCapacity(lexer.allocator, js.len) catch unreachable;
@@ -2595,16 +2613,32 @@ pub const Lexer = struct {
pub fn isIdentifierStart(codepoint: CodePoint) bool {
@setRuntimeSafety(false);
+ switch (codepoint) {
+ 'a'...'z', 'A'...'Z', '_', '$' => return true,
+ else => {},
+ }
+
+ if (codepoint < 127) return false;
+
return switch (codepoint) {
- 'a'...'z', 'A'...'Z', '_', '$' => true,
+ std.math.max(tables.id_start.r16_min, tables.id_start.latin_offset)...tables.id_start.r16_max => tables.id_start.inRange16(@intCast(u16, codepoint)),
+ tables.id_start.r32_min...tables.id_start.r32_max => tables.id_start.inRange32(@intCast(u32, codepoint)),
else => false,
};
}
pub fn isIdentifierContinue(codepoint: CodePoint) bool {
@setRuntimeSafety(false);
+ switch (codepoint) {
+ 'a'...'z', 'A'...'Z', '_', '$', '0'...'9' => return true,
+ else => {},
+ }
+
+ if (codepoint < 127) return false;
+
return switch (codepoint) {
- 'a'...'z', 'A'...'Z', '_', '$', '0'...'9', 0x200C, 0x200D => true,
+ std.math.max(tables.id_continue.r16_min, tables.id_continue.latin_offset)...tables.id_continue.r16_max => tables.id_continue.inRange16(@intCast(u16, codepoint)),
+ tables.id_continue.r32_min...tables.id_continue.r32_max => tables.id_continue.inRange32(@intCast(u32, codepoint)),
else => false,
};
}
@@ -2643,13 +2677,16 @@ pub fn isIdentifier(text: string) bool {
return false;
}
- var iter = std.unicode.Utf8Iterator{ .bytes = text, .i = 0 };
- if (!isIdentifierStart(iter.nextCodepoint() orelse unreachable)) {
+ var iter = strings.CodepointIterator{ .i = 0, .bytes = text };
+
+ if (!isIdentifierStart(iter.nextCodepoint())) {
return false;
}
- while (iter.nextCodepoint()) |codepoint| {
- if (!isIdentifierContinue(@intCast(CodePoint, codepoint))) {
+ iter.nextCodepointNoReturn();
+
+ while (iter.c > -1) : (iter.nextCodepointNoReturn()) {
+ if (!isIdentifierContinue(iter.c)) {
return false;
}
}