[JS/JSON] Optimize parsing long strings

author: Jarred Sumner <jarred@jarredsumner.com> 2022-03-07 00:30:51 -0800
committer: Jarred Sumner <jarred@jarredsumner.com> 2022-03-07 00:30:51 -0800
commit: 5ee063f6b22e00f1d0cd98e611063c2b9407a31a (patch)
tree: e8f8bf9273d9cf0308b7e737d253d2c3d3799e82 /src
parent: 50c8747c9769663b0d9485cc3dfa74f92fd7ffc3 (diff)
download: bun-5ee063f6b22e00f1d0cd98e611063c2b9407a31a.tar.gz
bun-5ee063f6b22e00f1d0cd98e611063c2b9407a31a.tar.zst
bun-5ee063f6b22e00f1d0cd98e611063c2b9407a31a.zip
1 files changed, 37 insertions, 1 deletions
diff --git a/src/js_lexer.zig b/src/js_lexer.zig
index 8beb9ccd1..d6c1977f0 100644
--- a/src/js_lexer.zig
+++ b/src/js_lexer.zig
@@ -700,8 +700,18 @@ fn NewLexer_(
                             needs_slow_path = true;
                         } else if (is_json and lexer.code_point < 0x20) {
                             try lexer.syntaxError();
+                        } else if (comptime quote == '"' or quote == '\'') {
+                            // this is only faster at the 800 KB or so mark
+                            // so, pretty good for source maps
+                            // but probably not a lot else
+                            const remainder = lexer.source.contents[lexer.current..];
+                            if (remainder.len > 16_000) {
+                                lexer.current += indexOfInterestingCharacterInStringLiteral(remainder, quote) orelse remainder.len;
+                                lexer.end = lexer.current -| 1;
+                                lexer.step();
+                                continue;
+                            }
                         }
-                        // this is only faster at the 800 KB or so mark
                         // which is kind of nonsensical for real usage?
                         // fast path: if you feed bun a string that is greater than around 800 KB
                         // it becomes worthwhile to do a vectorized search
@@ -3015,6 +3025,32 @@ pub fn isLatin1Identifier(comptime Buffer: type, name: Buffer) bool {
     return true;
 }
 
+fn indexOfInterestingCharacterInStringLiteral(text_: []const u8, quote: u8) ?usize {
+    var text = text_;
+    const quote_ = @splat(strings.ascii_vector_size, @as(u8, quote));
+    const backslash = @splat(strings.ascii_vector_size, @as(u8, '\\'));
+    const V1x16 = strings.AsciiVectorU1;
+
+    while (strings.ascii_vector_size < text.len) {
+        const vec: strings.AsciiVector = text[0..strings.ascii_vector_size].*;
+
+        const any_significant =
+            @bitCast(V1x16, vec > strings.max_16_ascii) |
+            @bitCast(V1x16, vec < strings.min_16_ascii) |
+            @bitCast(V1x16, quote_ == vec) |
+            @bitCast(V1x16, backslash == vec);
+
+        const bitmask = @ptrCast(*const u16, &any_significant).*;
+        const first = @ctz(u16, bitmask);
+
+        if (first < strings.ascii_vector_size) {
+            return first + (text_.len - text.len);
+        }
+        text = text[strings.ascii_vector_size..];
+    }
+
+    return null;
+}
 test "isIdentifier" {
     const expect = std.testing.expect;
     try expect(!isIdentifierStart(0x2029));
author	Jarred Sumner <jarred@jarredsumner.com>	2022-03-07 00:30:51 -0800
committer	Jarred Sumner <jarred@jarredsumner.com>	2022-03-07 00:30:51 -0800
commit	5ee063f6b22e00f1d0cd98e611063c2b9407a31a (patch)
tree	e8f8bf9273d9cf0308b7e737d253d2c3d3799e82 /src
parent	50c8747c9769663b0d9485cc3dfa74f92fd7ffc3 (diff)
download	bun-5ee063f6b22e00f1d0cd98e611063c2b9407a31a.tar.gz bun-5ee063f6b22e00f1d0cd98e611063c2b9407a31a.tar.zst bun-5ee063f6b22e00f1d0cd98e611063c2b9407a31a.zip