aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGravatar Jarred Sumner <709451+Jarred-Sumner@users.noreply.github.com> 2023-05-06 23:36:37 -0700
committerGravatar Jarred Sumner <709451+Jarred-Sumner@users.noreply.github.com> 2023-05-06 23:36:37 -0700
commit25eecc3e07531fd18a42fa809dc22050987449a5 (patch)
treed71ecb2a7b05b0f13be78ae20c9f8ec3f7fa1730 /src
parent0e1b934e2720ba2449387bf589f0c2d0392cceef (diff)
downloadbun-25eecc3e07531fd18a42fa809dc22050987449a5.tar.gz
bun-25eecc3e07531fd18a42fa809dc22050987449a5.tar.zst
bun-25eecc3e07531fd18a42fa809dc22050987449a5.zip
Add latin1 identifier fast path
Diffstat (limited to 'src')
-rw-r--r--src/js_lexer.zig81
1 files changed, 73 insertions, 8 deletions
diff --git a/src/js_lexer.zig b/src/js_lexer.zig
index 4aaf55f0f..9218e3f53 100644
--- a/src/js_lexer.zig
+++ b/src/js_lexer.zig
@@ -697,11 +697,11 @@ fn NewLexer_(
if (lexer.code_point == '{') {
suffix_len = 2;
lexer.step();
- if (lexer.rescan_close_brace_as_template_token) {
- lexer.token = T.t_template_middle;
- } else {
- lexer.token = T.t_template_head;
- }
+ lexer.token = if (lexer.rescan_close_brace_as_template_token)
+ T.t_template_middle
+ else
+ T.t_template_head;
+
break :stringLiteral;
}
continue :stringLiteral;
@@ -1698,16 +1698,23 @@ fn NewLexer_(
},
'_', '$', 'a'...'z', 'A'...'Z' => {
+ const advance = latin1IdentifierContinueLength(lexer.source.contents[lexer.current..]);
+
+ lexer.end = lexer.current + advance;
+ lexer.current = lexer.end;
+
lexer.step();
- while (isIdentifierContinue(lexer.code_point)) {
- lexer.step();
+
+ if (lexer.code_point >= 0x80) {
+ while (isIdentifierContinue(lexer.code_point)) {
+ lexer.step();
+ }
}
if (lexer.code_point != '\\') {
// this code is so hot that if you save lexer.raw() into a temporary variable
// it shows up in profiling
lexer.identifier = lexer.raw();
- // switching to strings.ExactSizeMatcher doesn't seem to have an impact here
lexer.token = Keywords.get(lexer.identifier) orelse T.t_identifier;
} else {
const scan_result = try lexer.scanIdentifierWithEscapes(.normal);
@@ -3179,6 +3186,64 @@ pub fn isLatin1Identifier(comptime Buffer: type, name: Buffer) bool {
return true;
}
+fn latin1IdentifierContinueLength(name: []const u8) usize {
+ var remaining = name;
+ const wrap_len = 16;
+ const len_wrapped: usize = if (comptime Environment.enableSIMD) remaining.len - (remaining.len % wrap_len) else 0;
+ var wrapped = name[0..len_wrapped];
+ remaining = name[wrapped.len..];
+
+ if (comptime Environment.enableSIMD) {
+ // This is not meaningfully faster on aarch64.
+ // Earlier attempt: https://zig.godbolt.org/z/j5G8M9ooG
+ // Later: https://zig.godbolt.org/z/7Yzh7df9v
+ const Vec = @Vector(wrap_len, u8);
+
+ while (wrapped.len > 0) : (wrapped = wrapped[wrap_len..]) {
+ var other: [wrap_len]u8 = undefined;
+ const vec: [wrap_len]u8 = wrapped[0..wrap_len].*;
+ for (vec, &other) |c, *dest| {
+ dest.* = switch (c) {
+ '0'...'9',
+ 'a'...'z',
+ 'A'...'Z',
+ '$',
+ '_',
+ => 0,
+ else => 1,
+ };
+ }
+
+ if (std.simd.firstIndexOfValue(@bitCast(Vec, other), 1)) |first| {
+ if (comptime Environment.allow_assert) {
+ for (vec[0..first]) |c| {
+ std.debug.assert(isIdentifierContinue(c));
+ }
+
+ std.debug.assert(!isIdentifierContinue(vec[first]));
+ }
+
+ return @as(usize, first) +
+ @ptrToInt(wrapped.ptr) - @ptrToInt(name.ptr);
+ }
+ }
+ }
+
+ for (remaining, 0..) |c, len| {
+ switch (c) {
+ '0'...'9',
+ 'a'...'z',
+ 'A'...'Z',
+ '$',
+ '_',
+ => {},
+ else => return len + len_wrapped,
+ }
+ }
+
+ return name.len;
+}
+
pub const PragmaArg = enum {
no_space_first,
skip_space_first,