2 files changed, 142 insertions, 31 deletions
diff --git a/src/js_lexer.zig b/src/js_lexer.zig
index a55053bca..4febdfdfa 100644
--- a/src/js_lexer.zig
+++ b/src/js_lexer.zig
@@ -265,7 +265,7 @@ fn NewLexer_(
             defer buf_.* = buf;
             if (comptime is_json) lexer.is_ascii_only = false;
 
-            const iterator = strings.CodepointIterator{ .bytes = text[start..], .i = 0 };
+            const iterator = strings.CodepointIterator{ .bytes = text, .i = 0 };
             var iter = strings.CodepointIterator.Cursor{};
             while (iterator.next(&iter)) {
                 const width = iter.width;
@@ -511,6 +511,7 @@ fn NewLexer_(
                                             .{},
                                             true,
                                         );
+
                                         return;
                                     }
 
@@ -602,6 +603,7 @@ fn NewLexer_(
         }
 
         pub const InnerStringLiteral = packed struct { suffix_len: u3, needs_slow_path: bool };
+
         fn parseStringLiteralInnter(lexer: *LexerType, comptime quote: CodePoint) !InnerStringLiteral {
             var needs_slow_path = false;
             var suffix_len: u3 = if (comptime quote == 0) 0 else 1;
@@ -631,6 +633,7 @@ fn NewLexer_(
                             // 0 cannot be in this list because it may be a legacy octal literal
                             'v', 'f', 't', 'r', 'n', '`', '\'', '"', 0x2028, 0x2029 => {
                                 lexer.step();
+
                                 continue :stringLiteral;
                             },
                             else => {
@@ -703,6 +706,40 @@ fn NewLexer_(
                         } else if (is_json and lexer.code_point < 0x20) {
                             try lexer.syntaxError();
                         }
+                        // this is only faster at the 800 KB or so mark
+                        // which is kind of nonsensical for real usage?
+                        // fast path: if you feed bun a string that is greater than around 800 KB
+                        // it becomes worthwhile to do a vectorized search
+                        // if (comptime big) {
+                        //     if (comptime quote == '"' or quote == '\'') {
+                        //         while (lexer.current + 16 < lexer.source.contents.len) {
+                        //             const quote_ = @splat(16, @as(u8, quote));
+                        //             const backslash = @splat(16, @as(u8, '\\'));
+                        //             const V1x16 = @Vector(16, u1);
+
+                        //             {
+                        //                 const vec: strings.AsciiVector = lexer.source.contents[lexer.current..][0..strings.ascii_vector_size].*;
+
+                        //                 const any_significant =
+                        //                     @bitCast(V1x16, vec > strings.max_16_ascii) |
+                        //                     @bitCast(V1x16, quote_ == vec) |
+                        //                     @bitCast(V1x16, backslash == vec);
+
+                        //                 // vec > strings.max_16_ascii);
+                        //                 const bitmask = @ptrCast(*const u16, &any_significant).*;
+                        //                 const first = @ctz(u16, bitmask);
+
+                        //                 if (first < 16) {
+                        //                     lexer.end = lexer.current + @maximum(first, 1) - 1;
+                        //                     lexer.current = lexer.current + first;
+                        //                     lexer.step();
+                        //                     continue :stringLiteral;
+                        //                 }
+                        //                 lexer.current += 16;
+                        //             }
+                        //         }
+                        //     }
+                        // }
                     },
                 }
                 lexer.step();
@@ -723,7 +760,7 @@ fn NewLexer_(
             // .env values may not always be quoted.
             lexer.step();
 
-            var string_literal_details = try lexer.parseStringLiteralInnter(quote);
+            const string_literal_details = try lexer.parseStringLiteralInnter(quote);
 
             // Reset string literal
             const base = if (comptime quote == 0) lexer.start else lexer.start + 1;
@@ -839,7 +876,7 @@ fn NewLexer_(
 
         // This is an edge case that doesn't really exist in the wild, so it doesn't
         // need to be as fast as possible.
-        pub fn scanIdentifierWithEscapes(lexer: *LexerType, comptime kind: IdentifierKind) anyerror!ScanResult {
+        pub fn scanIdentifierWithEscapes(lexer: *LexerType, kind: IdentifierKind) anyerror!ScanResult {
             var result = ScanResult{ .token = .t_end_of_file, .contents = "" };
             // First pass: scan over the identifier to see how long it is
             while (true) {
@@ -860,9 +897,7 @@ fn NewLexer_(
                                 '0'...'9', 'a'...'f', 'A'...'F' => {
                                     lexer.step();
                                 },
-                                else => {
-                                    try lexer.syntaxError();
-                                },
+                                else => try lexer.syntaxError(),
                             }
                         }
 
@@ -875,33 +910,25 @@ fn NewLexer_(
                             '0'...'9', 'a'...'f', 'A'...'F' => {
                                 lexer.step();
                             },
-                            else => {
-                                try lexer.syntaxError();
-                            },
+                            else => try lexer.syntaxError(),
                         }
                         switch (lexer.code_point) {
                             '0'...'9', 'a'...'f', 'A'...'F' => {
                                 lexer.step();
                             },
-                            else => {
-                                try lexer.syntaxError();
-                            },
+                            else => try lexer.syntaxError(),
                         }
                         switch (lexer.code_point) {
                             '0'...'9', 'a'...'f', 'A'...'F' => {
                                 lexer.step();
                             },
-                            else => {
-                                try lexer.syntaxError();
-                            },
+                            else => try lexer.syntaxError(),
                         }
                         switch (lexer.code_point) {
                             '0'...'9', 'a'...'f', 'A'...'F' => {
                                 lexer.step();
                             },
-                            else => {
-                                try lexer.syntaxError();
-                            },
+                            else => try lexer.syntaxError(),
                         }
                     }
                     continue;
@@ -914,7 +941,7 @@ fn NewLexer_(
             }
 
             // Second pass: re-use our existing escape sequence parser
-            var original_text = lexer.raw();
+            const original_text = lexer.raw();
             if (original_text.len < 1024) {
                 var buf = FakeArrayList16{ .items = &small_escape_sequence_buffer, .i = 0 };
                 try lexer.decodeEscapeSequences(lexer.start, original_text, FakeArrayList16, &buf);
@@ -930,10 +957,10 @@ fn NewLexer_(
                 result.contents = lexer.utf16ToString(large_escape_sequence_list.items);
             }
 
-            var identifier = result.contents;
-            if (kind == .private) {
-                identifier = result.contents[1..];
-            }
+            const identifier = if (kind != .private)
+                result.contents
+            else
+                result.contents[1..];
 
             if (!isIdentifier(identifier)) {
                 try lexer.addRangeError(
@@ -943,7 +970,8 @@ fn NewLexer_(
                     true,
                 );
             }
-            result.contents = identifier;
+
+            result.contents = result.contents;
 
             // Escaped keywords are not allowed to work as actual keywords, but they are
             // allowed wherever we allow identifiers or keywords. For example:
@@ -1081,6 +1109,9 @@ fn NewLexer_(
                     },
 
                     '#' => {
+                        if (comptime is_json) {
+                            return lexer.addUnsupportedSyntaxError("Private identifiers are not allowed in JSON");
+                        }
                         if (lexer.start == 0 and lexer.source.contents[1] == '!') {
                             // "#!/usr/bin/env node"
                             lexer.token = .t_hashbang;
@@ -1098,10 +1129,10 @@ fn NewLexer_(
                             }
                             lexer.identifier = lexer.raw();
                         } else {
+                            // "#foo"
                             lexer.step();
                             if (lexer.code_point == '\\') {
                                 lexer.identifier = (try lexer.scanIdentifierWithEscapes(.private)).contents;
-                                lexer.token = T.t_private_identifier;
                             } else {
                                 if (!isIdentifierStart(lexer.code_point)) {
                                     try lexer.syntaxError();
@@ -1113,13 +1144,12 @@ fn NewLexer_(
                                 }
                                 if (lexer.code_point == '\\') {
                                     lexer.identifier = (try lexer.scanIdentifierWithEscapes(.private)).contents;
-                                    lexer.token = T.t_private_identifier;
                                 } else {
-                                    lexer.token = T.t_private_identifier;
                                     lexer.identifier = lexer.raw();
                                 }
-                                break;
                             }
+                            lexer.token = T.t_private_identifier;
+                            break;
                         }
                     },
                     '\r', '\n', 0x2028, 0x2029 => {
@@ -1164,14 +1194,26 @@ fn NewLexer_(
                         lexer.token = T.t_colon;
                     },
                     ';' => {
+                        if (comptime is_json) {
+                            return lexer.addUnsupportedSyntaxError("Semicolons are not allowed in JSON");
+                        }
+
                         lexer.step();
                         lexer.token = T.t_semicolon;
                     },
                     '@' => {
+                        if (comptime is_json) {
+                            return lexer.addUnsupportedSyntaxError("Decorators are not allowed in JSON");
+                        }
+
                         lexer.step();
                         lexer.token = T.t_at;
                     },
                     '~' => {
+                        if (comptime is_json) {
+                            return lexer.addUnsupportedSyntaxError("~ is not allowed in JSON");
+                        }
+
                         lexer.step();
                         lexer.token = T.t_tilde;
                     },
@@ -1212,6 +1254,10 @@ fn NewLexer_(
                         }
                     },
                     '%' => {
+                        if (comptime is_json) {
+                            return lexer.addUnsupportedSyntaxError("Operators are not allowed in JSON");
+                        }
+
                         // '%' or '%='
                         lexer.step();
                         switch (lexer.code_point) {
@@ -1227,6 +1273,10 @@ fn NewLexer_(
                     },
 
                     '&' => {
+                        if (comptime is_json) {
+                            return lexer.addUnsupportedSyntaxError("Operators are not allowed in JSON");
+                        }
+
                         // '&' or '&=' or '&&' or '&&='
                         lexer.step();
                         switch (lexer.code_point) {
@@ -1255,6 +1305,9 @@ fn NewLexer_(
                     },
 
                     '|' => {
+                        if (comptime is_json) {
+                            return lexer.addUnsupportedSyntaxError("Operators are not allowed in JSON");
+                        }
 
                         // '|' or '|=' or '||' or '||='
                         lexer.step();
@@ -1283,6 +1336,10 @@ fn NewLexer_(
                     },
 
                     '^' => {
+                        if (comptime is_json) {
+                            return lexer.addUnsupportedSyntaxError("Operators are not allowed in JSON");
+                        }
+
                         // '^' or '^='
                         lexer.step();
                         switch (lexer.code_point) {
@@ -1298,6 +1355,10 @@ fn NewLexer_(
                     },
 
                     '+' => {
+                        if (comptime is_json) {
+                            return lexer.addUnsupportedSyntaxError("Operators are not allowed in JSON");
+                        }
+
                         // '+' or '+=' or '++'
                         lexer.step();
                         switch (lexer.code_point) {
@@ -1318,6 +1379,10 @@ fn NewLexer_(
                     },
 
                     '-' => {
+                        if (comptime is_json) {
+                            return lexer.addUnsupportedSyntaxError("Operators are not allowed in JSON");
+                        }
+
                         // '+' or '+=' or '++'
                         lexer.step();
                         switch (lexer.code_point) {
@@ -1384,6 +1449,7 @@ fn NewLexer_(
                         }
                     },
                     '/' => {
+
                         // '/' or '/=' or '//' or '/* ... */'
                         lexer.step();
 
@@ -1460,6 +1526,10 @@ fn NewLexer_(
                     },
 
                     '=' => {
+                        if (comptime is_json) {
+                            return lexer.addUnsupportedSyntaxError("Operators are not allowed in JSON");
+                        }
+
                         // '=' or '=>' or '==' or '==='
                         lexer.step();
                         switch (lexer.code_point) {
@@ -1489,6 +1559,10 @@ fn NewLexer_(
                     },
 
                     '<' => {
+                        if (comptime is_json) {
+                            return lexer.addUnsupportedSyntaxError("Operators are not allowed in JSON");
+                        }
+
                         // '<' or '<<' or '<=' or '<<=' or '<!--'
                         lexer.step();
                         switch (lexer.code_point) {
@@ -1527,6 +1601,10 @@ fn NewLexer_(
                     },
 
                     '>' => {
+                        if (comptime is_json) {
+                            return lexer.addUnsupportedSyntaxError("Operators are not allowed in JSON");
+                        }
+
                         // '>' or '>>' or '>>>' or '>=' or '>>=' or '>>>='
                         lexer.step();
 
@@ -1566,6 +1644,10 @@ fn NewLexer_(
                     },
 
                     '!' => {
+                        if (comptime is_json) {
+                            return lexer.addUnsupportedSyntaxError("Operators are not allowed in JSON");
+                        }
+
                         // '!' or '!=' or '!=='
                         lexer.step();
                         switch (lexer.code_point) {
@@ -1707,7 +1789,7 @@ fn NewLexer_(
                 }
             };
 
-            try self.addRangeError(self.range(), "Expected {s} but found {s}", .{ text, found }, true);
+            try self.addRangeError(self.range(), "Expected {s} but found \"{s}\"", .{ text, found }, true);
         }
 
         pub fn scanCommentText(lexer: *LexerType) void {
@@ -1807,7 +1889,13 @@ fn NewLexer_(
             }
         }
 
+        inline fn assertNotJSON(_: *const LexerType) void {
+            if (comptime is_json) @compileError("JSON should not reach this point");
+            if (comptime is_json) unreachable;
+        }
+
         pub fn scanRegExp(lexer: *LexerType) !void {
+            lexer.assertNotJSON();
             lexer.regex_flags_start = null;
             while (true) {
                 switch (lexer.code_point) {
@@ -1887,6 +1975,8 @@ fn NewLexer_(
         }
 
         pub fn nextInsideJSXElement(lexer: *LexerType) !void {
+            lexer.assertNotJSON();
+
             lexer.has_newline_before = false;
 
             while (true) {
@@ -2035,6 +2125,8 @@ fn NewLexer_(
             }
         }
         pub fn parseJSXStringLiteral(lexer: *LexerType, comptime quote: u8) !void {
+            lexer.assertNotJSON();
+
             var backslash = logger.Range.None;
             var needs_decode = false;
 
@@ -2099,6 +2191,8 @@ fn NewLexer_(
         }
 
         pub fn expectJSXElementChild(lexer: *LexerType, token: T) !void {
+            lexer.assertNotJSON();
+
             if (lexer.token != token) {
                 try lexer.expected(token);
             }
@@ -2107,6 +2201,8 @@ fn NewLexer_(
         }
 
         pub fn nextJSXElementChild(lexer: *LexerType) !void {
+            lexer.assertNotJSON();
+
             lexer.has_newline_before = false;
             const original_start = lexer.end;
 
@@ -2173,6 +2269,8 @@ fn NewLexer_(
         threadlocal var jsx_decode_buf: std.ArrayList(u16) = undefined;
         threadlocal var jsx_decode_init = false;
         pub fn fixWhitespaceAndDecodeJSXEntities(lexer: *LexerType, text: string) !JavascriptString {
+            lexer.assertNotJSON();
+
             if (!jsx_decode_init) {
                 jsx_decode_init = true;
                 jsx_decode_buf = std.ArrayList(u16).init(default_allocator);
@@ -2232,6 +2330,8 @@ fn NewLexer_(
         }
 
         inline fn maybeDecodeJSXEntity(lexer: *LexerType, text: string, cursor: *strings.CodepointIterator.Cursor) void {
+            lexer.assertNotJSON();
+
             if (strings.indexOfChar(text[cursor.width + cursor.i ..], ';')) |length| {
                 const end = cursor.width + cursor.i;
                 const entity = text[end .. end + length];
@@ -2266,6 +2366,8 @@ fn NewLexer_(
         }
 
         pub fn decodeJSXEntities(lexer: *LexerType, text: string, out: *std.ArrayList(u16)) !void {
+            lexer.assertNotJSON();
+
             const iterator = strings.CodepointIterator.init(text);
             var cursor = strings.CodepointIterator.Cursor{};
 
@@ -2292,6 +2394,8 @@ fn NewLexer_(
             }
         }
         pub fn expectInsideJSXElement(lexer: *LexerType, token: T) !void {
+            lexer.assertNotJSON();
+
             if (lexer.token != token) {
                 try lexer.expected(token);
             }
@@ -2300,6 +2404,8 @@ fn NewLexer_(
         }
 
         fn scanRegExpValidateAndStep(lexer: *LexerType) !void {
+            lexer.assertNotJSON();
+
             if (lexer.code_point == '\\') {
                 lexer.step();
             }
@@ -2319,6 +2425,8 @@ fn NewLexer_(
         }
 
         pub fn rescanCloseBraceAsTemplateToken(lexer: *LexerType) !void {
+            lexer.assertNotJSON();
+
             if (lexer.token != .t_close_brace) {
                 try lexer.expected(.t_close_brace);
             }
@@ -2332,6 +2440,8 @@ fn NewLexer_(
         }
 
         pub fn rawTemplateContents(lexer: *LexerType) string {
+            lexer.assertNotJSON();
+
             var text: string = undefined;
 
             switch (lexer.token) {
@@ -2805,6 +2915,7 @@ pub fn rangeOfIdentifier(source: *const Source, loc: logger.Loc) logger.Range {
     const end = @intCast(u32, text.len);
 
     if (!iter.next(&cursor)) return r;
+
     // Handle private names
     if (cursor.c == '#') {
         if (!iter.next(&cursor)) {
diff --git a/src/js_parser/js_parser.zig b/src/js_parser/js_parser.zig
index fefbee2f6..4c8591423 100644
--- a/src/js_parser/js_parser.zig
+++ b/src/js_parser/js_parser.zig
@@ -10765,7 +10765,7 @@ fn NewParser_(
                     const target = try p.parseExprWithFlags(.member, flags);
                     var args = ExprNodeList{};
 
-                    if (is_typescript_enabled) {
+                    if (comptime is_typescript_enabled) {
                         // Skip over TypeScript non-null assertions
                         if (p.lexer.token == .t_exclamation and !p.lexer.has_newline_before) {
                             try p.lexer.next();
@@ -10967,7 +10967,7 @@ fn NewParser_(
                     //     <A[]>(x)
                     //     <A>(x) => {}
                     //     <A = B>(x) => {}
-                    if (is_typescript_enabled and is_jsx_enabled) {
+                    if (comptime is_typescript_enabled and is_jsx_enabled) {
                         var oldLexer = std.mem.toBytes(p.lexer);
 
                         try p.lexer.next();