aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGravatar Jarred Sumner <jarred@jarredsumner.com> 2022-03-07 00:30:51 -0800
committerGravatar Jarred Sumner <jarred@jarredsumner.com> 2022-03-07 00:30:51 -0800
commit5ee063f6b22e00f1d0cd98e611063c2b9407a31a (patch)
treee8f8bf9273d9cf0308b7e737d253d2c3d3799e82 /src
parent50c8747c9769663b0d9485cc3dfa74f92fd7ffc3 (diff)
downloadbun-5ee063f6b22e00f1d0cd98e611063c2b9407a31a.tar.gz
bun-5ee063f6b22e00f1d0cd98e611063c2b9407a31a.tar.zst
bun-5ee063f6b22e00f1d0cd98e611063c2b9407a31a.zip
[JS/JSON] Optimize parsing long strings
Diffstat (limited to 'src')
-rw-r--r--src/js_lexer.zig38
1 files changed, 37 insertions, 1 deletions
diff --git a/src/js_lexer.zig b/src/js_lexer.zig
index 8beb9ccd1..d6c1977f0 100644
--- a/src/js_lexer.zig
+++ b/src/js_lexer.zig
@@ -700,8 +700,18 @@ fn NewLexer_(
needs_slow_path = true;
} else if (is_json and lexer.code_point < 0x20) {
try lexer.syntaxError();
+ } else if (comptime quote == '"' or quote == '\'') {
+ // this is only faster at the 800 KB or so mark
+ // so, pretty good for source maps
+ // but probably not a lot else
+ const remainder = lexer.source.contents[lexer.current..];
+ if (remainder.len > 16_000) {
+ lexer.current += indexOfInterestingCharacterInStringLiteral(remainder, quote) orelse remainder.len;
+ lexer.end = lexer.current -| 1;
+ lexer.step();
+ continue;
+ }
}
- // this is only faster at the 800 KB or so mark
// which is kind of nonsensical for real usage?
// fast path: if you feed bun a string that is greater than around 800 KB
// it becomes worthwhile to do a vectorized search
@@ -3015,6 +3025,32 @@ pub fn isLatin1Identifier(comptime Buffer: type, name: Buffer) bool {
return true;
}
+fn indexOfInterestingCharacterInStringLiteral(text_: []const u8, quote: u8) ?usize {
+ var text = text_;
+ const quote_ = @splat(strings.ascii_vector_size, @as(u8, quote));
+ const backslash = @splat(strings.ascii_vector_size, @as(u8, '\\'));
+ const V1x16 = strings.AsciiVectorU1;
+
+ while (strings.ascii_vector_size < text.len) {
+ const vec: strings.AsciiVector = text[0..strings.ascii_vector_size].*;
+
+ const any_significant =
+ @bitCast(V1x16, vec > strings.max_16_ascii) |
+ @bitCast(V1x16, vec < strings.min_16_ascii) |
+ @bitCast(V1x16, quote_ == vec) |
+ @bitCast(V1x16, backslash == vec);
+
+ const bitmask = @ptrCast(*const u16, &any_significant).*;
+ const first = @ctz(u16, bitmask);
+
+ if (first < strings.ascii_vector_size) {
+ return first + (text_.len - text.len);
+ }
+ text = text[strings.ascii_vector_size..];
+ }
+
+ return null;
+}
test "isIdentifier" {
const expect = std.testing.expect;
try expect(!isIdentifierStart(0x2029));