diff options
author | 2023-05-06 23:36:37 -0700 | |
---|---|---|
committer | 2023-05-06 23:36:37 -0700 | |
commit | 25eecc3e07531fd18a42fa809dc22050987449a5 (patch) | |
tree | d71ecb2a7b05b0f13be78ae20c9f8ec3f7fa1730 /src | |
parent | 0e1b934e2720ba2449387bf589f0c2d0392cceef (diff) | |
download | bun-25eecc3e07531fd18a42fa809dc22050987449a5.tar.gz bun-25eecc3e07531fd18a42fa809dc22050987449a5.tar.zst bun-25eecc3e07531fd18a42fa809dc22050987449a5.zip |
Add latin1 identifier fast path
Diffstat (limited to 'src')
-rw-r--r-- | src/js_lexer.zig | 81 |
1 files changed, 73 insertions, 8 deletions
diff --git a/src/js_lexer.zig b/src/js_lexer.zig index 4aaf55f0f..9218e3f53 100644 --- a/src/js_lexer.zig +++ b/src/js_lexer.zig @@ -697,11 +697,11 @@ fn NewLexer_( if (lexer.code_point == '{') { suffix_len = 2; lexer.step(); - if (lexer.rescan_close_brace_as_template_token) { - lexer.token = T.t_template_middle; - } else { - lexer.token = T.t_template_head; - } + lexer.token = if (lexer.rescan_close_brace_as_template_token) + T.t_template_middle + else + T.t_template_head; + break :stringLiteral; } continue :stringLiteral; @@ -1698,16 +1698,23 @@ fn NewLexer_( }, '_', '$', 'a'...'z', 'A'...'Z' => { + const advance = latin1IdentifierContinueLength(lexer.source.contents[lexer.current..]); + + lexer.end = lexer.current + advance; + lexer.current = lexer.end; + lexer.step(); - while (isIdentifierContinue(lexer.code_point)) { - lexer.step(); + + if (lexer.code_point >= 0x80) { + while (isIdentifierContinue(lexer.code_point)) { + lexer.step(); + } } if (lexer.code_point != '\\') { // this code is so hot that if you save lexer.raw() into a temporary variable // it shows up in profiling lexer.identifier = lexer.raw(); - // switching to strings.ExactSizeMatcher doesn't seem to have an impact here lexer.token = Keywords.get(lexer.identifier) orelse T.t_identifier; } else { const scan_result = try lexer.scanIdentifierWithEscapes(.normal); @@ -3179,6 +3186,64 @@ pub fn isLatin1Identifier(comptime Buffer: type, name: Buffer) bool { return true; } +fn latin1IdentifierContinueLength(name: []const u8) usize { + var remaining = name; + const wrap_len = 16; + const len_wrapped: usize = if (comptime Environment.enableSIMD) remaining.len - (remaining.len % wrap_len) else 0; + var wrapped = name[0..len_wrapped]; + remaining = name[wrapped.len..]; + + if (comptime Environment.enableSIMD) { + // This is not meaningfully faster on aarch64. + // Earlier attempt: https://zig.godbolt.org/z/j5G8M9ooG + // Later: https://zig.godbolt.org/z/7Yzh7df9v + const Vec = @Vector(wrap_len, u8); + + while (wrapped.len > 0) : (wrapped = wrapped[wrap_len..]) { + var other: [wrap_len]u8 = undefined; + const vec: [wrap_len]u8 = wrapped[0..wrap_len].*; + for (vec, &other) |c, *dest| { + dest.* = switch (c) { + '0'...'9', + 'a'...'z', + 'A'...'Z', + '$', + '_', + => 0, + else => 1, + }; + } + + if (std.simd.firstIndexOfValue(@bitCast(Vec, other), 1)) |first| { + if (comptime Environment.allow_assert) { + for (vec[0..first]) |c| { + std.debug.assert(isIdentifierContinue(c)); + } + + std.debug.assert(!isIdentifierContinue(vec[first])); + } + + return @as(usize, first) + + @ptrToInt(wrapped.ptr) - @ptrToInt(name.ptr); + } + } + } + + for (remaining, 0..) |c, len| { + switch (c) { + '0'...'9', + 'a'...'z', + 'A'...'Z', + '$', + '_', + => {}, + else => return len + len_wrapped, + } + } + + return name.len; +} + pub const PragmaArg = enum { no_space_first, skip_space_first, |