aboutsummaryrefslogtreecommitdiff
path: root/src/js_lexer.zig
diff options
context:
space:
mode:
Diffstat (limited to 'src/js_lexer.zig')
-rw-r--r--src/js_lexer.zig116
1 files changed, 52 insertions, 64 deletions
diff --git a/src/js_lexer.zig b/src/js_lexer.zig
index fef12bf47..bc9ba69fd 100644
--- a/src/js_lexer.zig
+++ b/src/js_lexer.zig
@@ -20,6 +20,17 @@ pub const PropertyModifierKeyword = tables.PropertyModifierKeyword;
pub const TypescriptStmtKeyword = tables.TypescriptStmtKeyword;
pub const TypeScriptAccessibilityModifier = tables.TypeScriptAccessibilityModifier;
+fn utf8ByteSequenceLength(first_byte: u8) u3 {
+ // The switch is optimized much better than a "smart" approach using @clz
+ return switch (first_byte) {
+ 0b0000_0000...0b0111_1111 => 1,
+ 0b1100_0000...0b1101_1111 => 2,
+ 0b1110_0000...0b1110_1111 => 3,
+ 0b1111_0000...0b1111_0111 => 4,
+ else => 0,
+ };
+}
+
fn notimpl() noreturn {
Global.panic("not implemented yet!", .{});
}
@@ -123,22 +134,22 @@ pub const Lexer = struct {
return logger.usize2Loc(self.start);
}
- inline fn nextCodepointSlice(it: *LexerType) !?[]const u8 {
+ inline fn nextCodepointSlice(it: *LexerType) []const u8 {
@setRuntimeSafety(false);
- if (it.current >= it.source.contents.len) {
- // without this line, strings cut off one before the last characte
- it.end = it.current;
- @setRuntimeSafety(false);
+ // if (it.current >= it.source.contents.len) {
+ // // without this line, strings cut off one before the last characte
+ // it.end = it.current;
+ // @setRuntimeSafety(false);
- return null;
- }
+ // return null;
+ // }
- const cp_len = unicode.utf8ByteSequenceLength(it.source.contents[it.current]) catch return Error.UTF8Fail;
+ const cp_len = utf8ByteSequenceLength(it.source.contents[it.current]);
it.end = it.current;
it.current += cp_len;
- return it.source.contents[it.current - cp_len .. it.current];
+ return if (!(it.current > it.source.contents.len)) it.source.contents[it.current - cp_len .. it.current] else "";
}
pub fn syntaxError(self: *LexerType) !void {
@@ -192,27 +203,29 @@ pub const Lexer = struct {
}
inline fn nextCodepoint(it: *LexerType) !CodePoint {
- const slice = (try it.nextCodepointSlice()) orelse return @as(CodePoint, -1);
-
- switch (slice.len) {
- 1 => return @as(CodePoint, slice[0]),
- 2 => return @as(CodePoint, unicode.utf8Decode2(slice) catch unreachable),
- 3 => return @as(CodePoint, unicode.utf8Decode3(slice) catch unreachable),
- 4 => return @as(CodePoint, unicode.utf8Decode4(slice) catch unreachable),
+ const slice = it.nextCodepointSlice();
+
+ return switch (slice.len) {
+ 0 => -1,
+ 1 => @as(CodePoint, slice[0]),
+ 2 => @as(CodePoint, unicode.utf8Decode2(slice) catch unreachable),
+ 3 => @as(CodePoint, unicode.utf8Decode3(slice) catch unreachable),
+ 4 => @as(CodePoint, unicode.utf8Decode4(slice) catch unreachable),
else => unreachable,
- }
+ };
}
/// Look ahead at the next n codepoints without advancing the iterator.
/// If fewer than n codepoints are available, then return the remainder of the string.
- fn peek(it: *LexerType, n: usize) !string {
+ fn peek(it: *LexerType, n: usize) string {
const original_i = it.current;
defer it.current = original_i;
var end_ix = original_i;
var found: usize = 0;
while (found < n) : (found += 1) {
- const next_codepoint = (try it.nextCodepointSlice()) orelse return it.source.contents[original_i..];
+ const next_codepoint = it.nextCodepointSlice();
+ if (next_codepoint.len == 0) break;
end_ix += next_codepoint.len;
}
@@ -963,8 +976,7 @@ pub const Lexer = struct {
if (lexer.code_point == '\\') {
@setRuntimeSafety(false);
- const scan_result = try lexer.scanIdentifierWithEscapes(.private);
- lexer.identifier = scan_result.contents;
+ lexer.identifier = (try lexer.scanIdentifierWithEscapes(.private)).contents;
lexer.token = T.t_private_identifier;
} else {
@setRuntimeSafety(false);
@@ -978,8 +990,7 @@ pub const Lexer = struct {
try lexer.step();
}
if (lexer.code_point == '\\') {
- const scan_result = try lexer.scanIdentifierWithEscapes(.private);
- lexer.identifier = scan_result.contents;
+ lexer.identifier = (try lexer.scanIdentifierWithEscapes(.private)).contents;
lexer.token = T.t_private_identifier;
} else {
lexer.token = T.t_private_identifier;
@@ -1379,7 +1390,7 @@ pub const Lexer = struct {
},
// Handle legacy HTML-style comments
'!' => {
- if (strings.eqlComptime(try lexer.peek("--".len), "--")) {
+ if (strings.eqlComptime(lexer.peek("--".len), "--")) {
try lexer.addUnsupportedSyntaxError("Legacy HTML comments not implemented yet!");
return;
}
@@ -1470,9 +1481,10 @@ pub const Lexer = struct {
lexer.identifier = scan_result.contents;
lexer.token = scan_result.token;
} else {
- const contents = lexer.raw();
- lexer.identifier = contents;
- lexer.token = Keywords.get(contents) orelse T.t_identifier;
+ // this code is so hot that if you save lexer.raw() into a temporary variable
+ // it shows up in profiling
+ lexer.identifier = lexer.raw();
+ lexer.token = Keywords.get(lexer.identifier) orelse T.t_identifier;
}
},
@@ -2534,43 +2546,23 @@ pub const Lexer = struct {
};
pub fn isIdentifierStart(codepoint: CodePoint) bool {
- switch (codepoint) {
- 'a'...'z', 'A'...'Z', '_', '$' => {
- return true;
- },
- else => {
- return false;
- },
- }
+ @setRuntimeSafety(false);
+ return switch (codepoint) {
+ 'a'...'z', 'A'...'Z', '_', '$' => true,
+ else => false,
+ };
}
pub fn isIdentifierContinue(codepoint: CodePoint) bool {
@setRuntimeSafety(false);
- switch (codepoint) {
- '_', '$', '0'...'9', 'a'...'z', 'A'...'Z' => {
- return true;
- },
- -1 => {
- return false;
- },
- else => {},
- }
-
- // All ASCII identifier start code points are listed above
- if (codepoint < 0x7F) {
- return false;
- }
-
- // ZWNJ and ZWJ are allowed in identifiers
- if (codepoint == 0x200C or codepoint == 0x200D) {
- return true;
- }
-
- return false;
+ return switch (codepoint) {
+ 'a'...'z', 'A'...'Z', '_', '$', '0'...'9', 0x200C, 0x200D => true,
+ else => false,
+ };
}
pub fn isWhitespace(codepoint: CodePoint) bool {
- switch (codepoint) {
+ return switch (codepoint) {
0x000B, // line tabulation
0x0009, // character tabulation
0x000C, // form feed
@@ -2593,13 +2585,9 @@ pub fn isWhitespace(codepoint: CodePoint) bool {
0x205F, // medium mathematical space
0x3000, // ideographic space
0xFEFF, // zero width non-breaking space
- => {
- return true;
- },
- else => {
- return false;
- },
- }
+ => true,
+ else => false,
+ };
}
pub fn isIdentifier(text: string) bool {