diff options
-rw-r--r-- | src/global.zig | 1 | ||||
-rw-r--r-- | src/js_ast.zig | 25 | ||||
-rw-r--r-- | src/js_lexer.zig | 608 | ||||
-rw-r--r-- | src/js_lexer_tables.zig | 2 | ||||
-rw-r--r-- | src/js_parser/js_parser.zig | 206 | ||||
-rw-r--r-- | src/js_printer.zig | 30 | ||||
-rw-r--r-- | src/logger.zig | 6 |
7 files changed, 789 insertions, 89 deletions
diff --git a/src/global.zig b/src/global.zig index acd9bc0a7..3bbcce18f 100644 --- a/src/global.zig +++ b/src/global.zig @@ -22,6 +22,7 @@ pub const isWindows = std.Target.current.os.tag == .windows; pub const enableTracing = true; pub const isDebug = std.builtin.Mode.Debug == std.builtin.mode; +pub const isTest = std.builtin.is_test; pub const Output = struct { var source: *Source = undefined; diff --git a/src/js_ast.zig b/src/js_ast.zig index bd2655289..71842789c 100644 --- a/src/js_ast.zig +++ b/src/js_ast.zig @@ -1058,16 +1058,20 @@ pub const Stmt = struct { } pub fn empty() Stmt { - return Stmt.init(&Stmt.None, logger.Loc.Empty); + return Stmt.init(Stmt.None, logger.Loc.Empty); } var None = S.Empty{}; pub fn init(origData: anytype, loc: logger.Loc) Stmt { - if (@typeInfo(@TypeOf(origData)) != .Pointer) { + if (@typeInfo(@TypeOf(origData)) != .Pointer and @TypeOf(origData) != S.Empty) { @compileError("Stmt.init needs a pointer."); } + if (@TypeOf(origData) == S.Empty) { + return Stmt{ .loc = loc, .data = Data{ .s_empty = S.Empty{} } }; + } + switch (@TypeOf(origData.*)) { S.Block => { return Stmt.comptime_init("s_block", S.Block, origData, loc); @@ -1210,7 +1214,7 @@ pub const Stmt = struct { return Stmt.comptime_alloc(allocator, "s_do_while", S.DoWhile, origData, loc); }, S.Empty => { - return Stmt.comptime_alloc(allocator, "s_empty", S.Empty, origData, loc); + return Stmt{ .loc = loc, .data = Data{ .s_empty = S.Empty{} } }; }, S.Enum => { return Stmt.comptime_alloc(allocator, "s_enum", S.Enum, origData, loc); @@ -1336,7 +1340,7 @@ pub const Stmt = struct { s_debugger: *S.Debugger, s_directive: *S.Directive, s_do_while: *S.DoWhile, - s_empty: *S.Empty, + s_empty: S.Empty, s_enum: *S.Enum, s_export_clause: *S.ExportClause, s_export_default: *S.ExportDefault, @@ -1382,7 +1386,12 @@ pub const Stmt = struct { pub const Expr = struct { loc: logger.Loc, data: Data, - + pub fn toEmpty(expr: *Expr) Expr { + return Expr{ .data = .{ .e_missing = E.Missing{} }, .loc = expr.loc }; + } + pub fn isEmpty(expr: *Expr) bool { + return std.meta.activeTag(expr.data) == .e_missing; + } pub const Query = struct { expr: Expr, loc: logger.Loc }; pub fn getProperty(expr: *const Expr, name: string) ?Query { @@ -1829,9 +1838,7 @@ pub const Expr = struct { return Expr{ .loc = loc, .data = Data{ .e_jsx_element = dat } }; }, E.Missing => { - var dat = allocator.create(E.Missing) catch unreachable; - dat.* = st; - return Expr{ .loc = loc, .data = Data{ .e_missing = dat } }; + return Expr{ .loc = loc, .data = Data{ .e_missing = E.Missing{} } }; }, E.Number => { var dat = allocator.create(E.Number) catch unreachable; @@ -2460,7 +2467,7 @@ pub const Expr = struct { e_import_identifier: *E.ImportIdentifier, e_private_identifier: *E.PrivateIdentifier, e_jsx_element: *E.JSXElement, - e_missing: *E.Missing, + e_missing: E.Missing, e_number: *E.Number, e_big_int: *E.BigInt, e_object: *E.Object, diff --git a/src/js_lexer.zig b/src/js_lexer.zig index 96b6f6835..4b78f2536 100644 --- a/src/js_lexer.zig +++ b/src/js_lexer.zig @@ -191,25 +191,258 @@ pub const Lexer = struct { } } - fn parseStringLiteral(lexer: *LexerType) !void { - var quote: CodePoint = lexer.code_point; - var needs_slow_path = false; - var suffixLen: usize = 1; + pub fn decodeEscapeSequences(lexer: *LexerType, start: usize, text: string, buf: anytype) !void { + var iter = CodepointIterator{ .bytes = text[start..], .i = 0 }; + const start_length = buf.items.len; + while (iter.nextCodepoint()) |c| { + const width = iter.width; - if (quote != '`') { - lexer.token = T.t_string_literal; - } else if (lexer.rescan_close_brace_as_template_token) { - lexer.token = T.t_template_tail; - } else { - lexer.token = T.t_no_substitution_template_literal; + switch (c) { + '\r' => { + // From the specification: + // + // 11.8.6.1 Static Semantics: TV and TRV + // + // TV excludes the code units of LineContinuation while TRV includes + // them. <CR><LF> and <CR> LineTerminatorSequences are normalized to + // <LF> for both TV and TRV. An explicit EscapeSequence is needed to + // include a <CR> or <CR><LF> sequence. + + // Convert '\r\n' into '\n' + if (iter.i < text.len and text[iter.i] == '\n') { + iter.i += 1; + } + + // Convert '\r' into '\n' + buf.append('\n') catch unreachable; + continue; + }, + + '\\' => { + const c2 = iter.nextCodepoint() orelse return; + const width2 = iter.width; + switch (c2) { + 'b' => { + buf.append(std.mem.readIntNative(u16, "\\b")) catch unreachable; + continue; + }, + 'f' => { + buf.append(std.mem.readIntNative(u16, "\\f")) catch unreachable; + continue; + }, + 'n' => { + buf.append(std.mem.readIntNative(u16, "\\n")) catch unreachable; + continue; + }, + 'r' => { + buf.append(std.mem.readIntNative(u16, "\\r")) catch unreachable; + continue; + }, + 't' => { + buf.append(std.mem.readIntNative(u16, "\\t")) catch unreachable; + continue; + }, + 'v' => { + if (lexer.json_options != null) { + lexer.end = start + iter.i - width2; + try lexer.syntaxError(); + } + + buf.append(std.mem.readIntNative(u16, "\\v")) catch unreachable; + continue; + }, + '0'...'7' => { + try lexer.addUnsupportedSyntaxError("Legacy octal literals are not supported."); + }, + '8', '9' => { + try lexer.addUnsupportedSyntaxError("Legacy octal literals are not supported."); + }, + 'x' => { + if (lexer.json_options != null) { + lexer.end = start + iter.i - width2; + try lexer.syntaxError(); + } + + var value: CodePoint = 0; + var c3: CodePoint = 0; + var width3: u3 = 0; + comptime var j: usize = 0; + inline while (j < 2) : (j += 1) { + c3 = iter.nextCodepoint() orelse return lexer.syntaxError(); + width3 = iter.width; + switch (c3) { + '0'...'9' => { + value = value * 16 | (c3 - '0'); + }, + 'a'...'f' => { + value = value * 16 | (c3 + 10 - 'a'); + }, + 'A'...'F' => { + value = value * 16 | (c3 + 10 - 'A'); + }, + else => { + lexer.end = start + iter.i - width3; + return lexer.syntaxError(); + }, + } + } + iter.c = value; + }, + 'u' => { + // We're going to make this an i64 so we don't risk integer overflows + // when people do weird things + var value: i64 = 0; + + var c3 = iter.nextCodepoint() orelse return lexer.syntaxError(); + var width3 = iter.width; + + // variable-length + if (c3 == '{') { + if (lexer.json_options != null) { + lexer.end = start + iter.i - width2; + try lexer.syntaxError(); + } + + const hex_start = iter.i - width - width2 - width3; + var is_first = true; + var is_out_of_range = false; + variableLength: while (true) { + c3 = iter.nextCodepoint() orelse break :variableLength; + + switch (c3) { + '0'...'9' => { + value = value * 16 | (c3 - '0'); + }, + 'a'...'f' => { + value = value * 16 | (c3 + 10 - 'a'); + }, + 'A'...'F' => { + value = value * 16 | (c3 + 10 - 'A'); + }, + '}' => { + if (is_first) { + lexer.end = start + iter.i - width3; + return lexer.syntaxError(); + } + break :variableLength; + }, + else => { + lexer.end = start + iter.i - width3; + return lexer.syntaxError(); + }, + } + + // '\U0010FFFF + // copied from golang utf8.MaxRune + if (value > 1114111) { + is_out_of_range = true; + } + is_first = false; + } + + if (is_out_of_range) { + try lexer.addRangeError( + .{ .loc = .{ .start = @intCast(i32, start + hex_start) }, .len = @intCast(i32, (iter.i - hex_start)) }, + "Unicode escape sequence is out of range", + .{}, + true, + ); + return; + } + + // fixed-length + } else { + // Fixed-length + comptime var j: usize = 0; + inline while (j < 4) : (j += 1) { + switch (c3) { + '0'...'9' => { + value = value * 16 | (c3 - '0'); + }, + 'a'...'f' => { + value = value * 16 | (c3 + 10 - 'a'); + }, + 'A'...'F' => { + value = value * 16 | (c3 + 10 - 'A'); + }, + else => { + lexer.end = start + iter.i - width3; + return lexer.syntaxError(); + }, + } + + if (j < 3) { + c3 = iter.nextCodepoint() orelse return lexer.syntaxError(); + width3 = iter.width; + } + } + } + + iter.c = @truncate(CodePoint, value); + }, + '\r' => { + if (lexer.json_options != null) { + lexer.end = start + iter.i - width2; + try lexer.syntaxError(); + } + + // Ignore line continuations. A line continuation is not an escaped newline. + if (iter.i < text.len and text[iter.i + 1] == '\n') { + // Make sure Windows CRLF counts as a single newline + iter.i += 1; + } + continue; + }, + '\n', 0x2028, 0x2029 => { + if (lexer.json_options != null) { + lexer.end = start + iter.i - width2; + try lexer.syntaxError(); + } + + // Ignore line continuations. A line continuation is not an escaped newline. + continue; + }, + else => { + if (lexer.json_options != null) { + switch (c2) { + '"', '\\', '/' => {}, + else => { + lexer.end = start + iter.i - width2; + try lexer.syntaxError(); + }, + } + } + iter.c = c2; + }, + } + }, + else => {}, + } + + if (iter.c <= 0xFFFF) { + buf.append(@intCast(u16, c)) catch unreachable; + } else { + iter.c -= 0x10000; + buf.ensureUnusedCapacity(2) catch unreachable; + buf.appendAssumeCapacity(@intCast(u16, 0xD800 + ((iter.c >> 10) & 0x3FF))); + buf.appendAssumeCapacity(@intCast(u16, 0xDC00 + (iter.c & 0x3FF))); + } } - try lexer.step(); + } + pub const InnerStringLiteral = packed struct { suffix_len: u3, needs_slow_path: bool }; + fn parseStringLiteralInnter(lexer: *LexerType, comptime quote: CodePoint) !InnerStringLiteral { + var needs_slow_path = false; + var suffix_len: u3 = 1; stringLiteral: while (true) { switch (lexer.code_point) { '\\' => { - needs_slow_path = true; try lexer.step(); + // Skip slow path for \n in a string literal + // This is pretty common, shows up in e.g. React + // Example code: array.split("\n") + // We don't need to decode as UTF16 for that. We know it's just a newline char. + needs_slow_path = lexer.code_point != 'n'; // Handle Windows CRLF if (lexer.code_point == '\r' and lexer.json_options != null) { @@ -245,7 +478,7 @@ pub const Lexer = struct { if (quote == '`') { try lexer.step(); if (lexer.code_point == '{') { - suffixLen = 2; + suffix_len = 2; try lexer.step(); if (lexer.rescan_close_brace_as_template_token) { lexer.token = T.t_template_middle; @@ -257,12 +490,15 @@ pub const Lexer = struct { continue :stringLiteral; } }, + // exit condition + quote => { + try lexer.step(); + + break; + }, else => { - if (quote == lexer.code_point) { - try lexer.step(); - break :stringLiteral; - } + // Non-ASCII strings need the slow path if (lexer.code_point >= 0x80) { needs_slow_path = true; @@ -274,19 +510,41 @@ pub const Lexer = struct { try lexer.step(); } + return InnerStringLiteral{ .needs_slow_path = needs_slow_path, .suffix_len = suffix_len }; + } + + fn parseStringLiteral(lexer: *LexerType) !void { + var quote: CodePoint = lexer.code_point; + + if (quote != '`') { + lexer.token = T.t_string_literal; + } else if (lexer.rescan_close_brace_as_template_token) { + lexer.token = T.t_template_tail; + } else { + lexer.token = T.t_no_substitution_template_literal; + } + try lexer.step(); + + var string_literal_details = switch (quote) { + '`' => try lexer.parseStringLiteralInnter('`'), + '\'' => try lexer.parseStringLiteralInnter('\''), + '"' => try lexer.parseStringLiteralInnter('"'), + else => unreachable, + }; + // Reset string literal - lexer.string_literal_slice = lexer.source.contents[lexer.start + 1 .. lexer.end - suffixLen]; - lexer.string_literal_is_ascii = !needs_slow_path; + lexer.string_literal_slice = lexer.source.contents[lexer.start + 1 .. lexer.end - string_literal_details.suffix_len]; + lexer.string_literal_is_ascii = !string_literal_details.needs_slow_path; lexer.string_literal_buffer.shrinkRetainingCapacity(0); - if (needs_slow_path) { - lexer.string_literal_buffer.ensureTotalCapacity(lexer.string_literal_slice.len) catch unreachable; - var slice = lexer.string_literal_buffer.allocatedSlice(); - lexer.string_literal_buffer.items = slice[0..strings.toUTF16Buf(lexer.string_literal_slice, slice)]; + if (string_literal_details.needs_slow_path) { + lexer.string_literal_buffer.ensureUnusedCapacity(lexer.string_literal_slice.len) catch unreachable; + try lexer.decodeEscapeSequences(0, lexer.string_literal_slice, &lexer.string_literal_buffer); } if (quote == '\'' and lexer.json_options != null) { try lexer.addRangeError(lexer.range(), "JSON strings must use double quotes", .{}, true); } + // for (text) // // if (needs_slow_path) { // // // Slow path @@ -333,8 +591,131 @@ pub const Lexer = struct { return Error.SyntaxError; } - pub fn scanIdentifierWithEscapes(self: *LexerType) !void { - try self.addUnsupportedSyntaxError("escape sequence"); + pub const IdentifierKind = enum { normal, private }; + pub const ScanResult = struct { token: T, contents: string }; + threadlocal var small_escape_sequence_buffer: [4096]u16 = undefined; + const FakeArrayList16 = struct { + items: []u16, + i: usize = 0, + + pub fn append(fake: *FakeArrayList16, value: u16) !void { + std.debug.assert(fake.items.len < fake.i); + fake.items[fake.i] = value; + fake.i += 1; + } + + pub fn appendAssumeCapacity(fake: *FakeArrayList16, value: u16) void { + std.debug.assert(fake.items.len < fake.i); + fake.items[fake.i] = value; + fake.i += 1; + } + pub fn ensureUnusedCapacity(fake: *FakeArrayList16, int: anytype) !void { + std.debug.assert(fake.items.len < fake.i + int); + } + }; + threadlocal var large_escape_sequence_list: std.ArrayList(u16) = undefined; + threadlocal var large_escape_sequence_list_loaded: bool = false; + + // This is an edge case that doesn't really exist in the wild, so it doesn't + // need to be as fast as possible. + pub fn scanIdentifierWithEscapes(lexer: *LexerType, comptime kind: IdentifierKind) !ScanResult { + var result = ScanResult{ .token = .t_end_of_file, .contents = "" }; + // First pass: scan over the identifier to see how long it is + while (true) { + // Scan a unicode escape sequence. There is at least one because that's + // what caused us to get on this slow path in the first place. + if (lexer.code_point == '\\') { + try lexer.step(); + if (lexer.code_point != 'u') { + try lexer.syntaxError(); + } + try lexer.step(); + if (lexer.code_point == '{') { + // Variable-length + try lexer.step(); + while (lexer.code_point != '}') { + switch (lexer.code_point) { + '0'...'9', 'a'...'f', 'A'...'F' => { + try lexer.step(); + }, + else => { + try lexer.syntaxError(); + }, + } + } + + try lexer.step(); + } else { + // Fixed-length + comptime var j: usize = 0; + inline while (j < 4) : (j += 1) { + switch (lexer.code_point) { + '0'...'9', 'a'...'f', 'A'...'F' => { + try lexer.step(); + }, + else => { + try lexer.syntaxError(); + }, + } + } + } + continue; + } + + if (!isIdentifierContinue(lexer.code_point)) { + break; + } + try lexer.step(); + } + + // Second pass: re-use our existing escape sequence parser + var original_text = lexer.raw(); + if (original_text.len < 1024) { + var buf = FakeArrayList16{ .items = &small_escape_sequence_buffer, .i = 0 }; + try lexer.decodeEscapeSequences(lexer.start, original_text, &buf); + result.contents = lexer.utf16ToString(buf.items[0..buf.i]); + } else { + if (!large_escape_sequence_list_loaded) { + large_escape_sequence_list = try std.ArrayList(u16).initCapacity(lexer.allocator, original_text.len); + large_escape_sequence_list_loaded = true; + } + + large_escape_sequence_list.shrinkRetainingCapacity(0); + try lexer.decodeEscapeSequences(lexer.start, original_text, &large_escape_sequence_list); + result.contents = lexer.utf16ToString(large_escape_sequence_list.items); + } + + var identifier = result.contents; + if (kind == .private) { + identifier = result.contents[1..]; + } + + if (!isIdentifier(identifier)) { + try lexer.addRangeError( + .{ .loc = logger.usize2Loc(lexer.start), .len = @intCast(i32, lexer.end - lexer.start) }, + "Invalid identifier: \"{s}\"", + .{result.contents}, + true, + ); + } + result.contents = identifier; + + // Escaped keywords are not allowed to work as actual keywords, but they are + // allowed wherever we allow identifiers or keywords. For example: + // + // // This is an error (equivalent to "var var;") + // var \u0076\u0061\u0072; + // + // // This is an error (equivalent to "var foo;" except for this rule) + // \u0076\u0061\u0072 foo; + // + // // This is an fine (equivalent to "foo.var;") + // foo.\u0076\u0061\u0072; + // + result.token = if (Keywords.has(result.contents)) .t_escaped_keyword else .t_identifier; + + // const text = lexer.decodeEscapeSequences(lexer.start, lexer.raw(), ) + return result; } pub fn debugInfo(self: *LexerType) void { @@ -462,31 +843,46 @@ pub const Lexer = struct { '#' => { if (lexer.start == 0 and lexer.source.contents[1] == '!') { - try lexer.addUnsupportedSyntaxError("#!hashbang is not supported yet."); - return; - } - - try lexer.step(); - if (!isIdentifierStart(lexer.code_point)) { - try lexer.syntaxError(); - } - try lexer.step(); - - if (isIdentifierStart(lexer.code_point)) { - try lexer.step(); - while (isIdentifierContinue(lexer.code_point)) { + // "#!/usr/bin/env node" + lexer.token = .t_hashbang; + hashbang: while (true) { try lexer.step(); + switch (lexer.code_point) { + '\r', '\n', 0x2028, 0x2029 => { + break :hashbang; + }, + -1 => { + break :hashbang; + }, + else => {}, + } } + lexer.identifier = lexer.raw(); + } else { + try lexer.step(); if (lexer.code_point == '\\') { - try lexer.scanIdentifierWithEscapes(); + const scan_result = try lexer.scanIdentifierWithEscapes(.private); + lexer.identifier = scan_result.contents; lexer.token = T.t_private_identifier; - - // lexer.Identifier, lexer.Token = lexer.scanIdentifierWithEscapes(normalIdentifier); } else { - lexer.token = T.t_private_identifier; - lexer.identifier = lexer.raw(); + if (!isIdentifierStart(lexer.code_point)) { + try lexer.syntaxError(); + } + + try lexer.step(); + while (isIdentifierContinue(lexer.code_point)) { + try lexer.step(); + } + if (lexer.code_point == '\\') { + const scan_result = try lexer.scanIdentifierWithEscapes(.private); + lexer.identifier = scan_result.contents; + lexer.token = T.t_private_identifier; + } else { + lexer.token = T.t_private_identifier; + lexer.identifier = lexer.raw(); + } + break; } - break; } }, '\r', '\n', 0x2028, 0x2029 => { @@ -966,7 +1362,9 @@ pub const Lexer = struct { } if (lexer.code_point == '\\') { - try lexer.scanIdentifierWithEscapes(); + const scan_result = try lexer.scanIdentifierWithEscapes(.normal); + lexer.identifier = scan_result.contents; + lexer.token = scan_result.token; } else { const contents = lexer.raw(); lexer.identifier = contents; @@ -975,8 +1373,9 @@ pub const Lexer = struct { }, '\\' => { - // TODO: normal - try lexer.scanIdentifierWithEscapes(); + const scan_result = try lexer.scanIdentifierWithEscapes(.normal); + lexer.identifier = scan_result.contents; + lexer.token = scan_result.token; }, '.', '0'...'9' => { @@ -996,8 +1395,9 @@ pub const Lexer = struct { try lexer.step(); } if (lexer.code_point == '\\') { - - // lexer.Identifier, lexer.Token = lexer.scanIdentifierWithEscapes(normalIdentifier); + const scan_result = try lexer.scanIdentifierWithEscapes(.normal); + lexer.identifier = scan_result.contents; + lexer.token = scan_result.token; } else { lexer.token = T.t_identifier; lexer.identifier = lexer.raw(); @@ -2143,26 +2543,114 @@ pub fn isIdentifierUTF16(text: JavascriptString) bool { return true; } +pub const CodepointIterator = struct { + bytes: []const u8, + i: usize, + width: u3 = 0, + c: CodePoint = 0, + + pub fn nextCodepointSlice(it: *CodepointIterator) ?[]const u8 { + if (it.i >= it.bytes.len) { + return null; + } + + const cp_len = std + .unicode.utf8ByteSequenceLength(it.bytes[it.i]) catch unreachable; + it.i += cp_len; + return it.bytes[it.i - cp_len .. it.i]; + } + + pub fn nextCodepoint(it: *CodepointIterator) ?CodePoint { + const slice = it.nextCodepointSlice() orelse return null; + it.width = @intCast(u3, slice.len); + + it.c = switch (it.width) { + 1 => @intCast(CodePoint, slice[0]), + 2 => @intCast(CodePoint, std.unicode.utf8Decode2(slice) catch unreachable), + 3 => @intCast(CodePoint, std.unicode.utf8Decode3(slice) catch unreachable), + 4 => @intCast(CodePoint, std.unicode.utf8Decode4(slice) catch unreachable), + else => unreachable, + }; + + return it.c; + } + + /// Look ahead at the next n codepoints without advancing the iterator. + /// If fewer than n codepoints are available, then return the remainder of the string. + pub fn peek(it: *CodepointIterator, n: usize) []const u8 { + const original_i = it.i; + defer it.i = original_i; + + var end_ix = original_i; + var found: usize = 0; + while (found < n) : (found += 1) { + const next_codepoint = it.nextCodepointSlice() orelse return it.bytes[original_i..]; + end_ix += next_codepoint.len; + } + + return it.bytes[original_i..end_ix]; + } +}; + // TODO: implement this to actually work right // this fn is a stub! pub fn rangeOfIdentifier(source: *const Source, loc: logger.Loc) logger.Range { + const text = source.contents[loc.toUsize()..]; var r = logger.Range{ .loc = loc, .len = 0 }; - const offset = @intCast(usize, loc.start); - var i: usize = 0; - for (source.contents[offset..]) |c| { - if (isIdentifierStart(@as(CodePoint, c))) { - for (source.contents[offset + i ..]) |c_| { - if (!isIdentifierContinue(c_)) { - r.len = std.math.lossyCast(i32, i); - return r; + if (text.len == 0) { + return r; + } + + var iter = CodepointIterator{ .bytes = text, .i = 0 }; + var c = @intCast(CodePoint, iter.nextCodepoint() orelse unreachable); + + // Handle private names + if (c == '#') { + c = @intCast(CodePoint, iter.nextCodepoint() orelse { + r.len = 1; + return r; + }); + } + + if (isIdentifierStart(c) or c == '\\') { + defer r.len = @intCast(i32, iter.i); + while (iter.nextCodepoint()) |code_point| { + if (code_point == '\\') { + // Search for the end of the identifier + + // Skip over bracketed unicode escapes such as "\u{10000}" + if (iter.i + 2 < text.len and text[iter.i + 1] == 'u' and text[iter.i + 2] == '{') { + iter.i += 2; + while (iter.i < text.len) { + if (text[iter.i] == '}') { + iter.i += 1; + break; + } + iter.i += 1; + } } - i += 1; + } else if (!isIdentifierContinue(code_point)) { + return r; } } - - i += 1; } + // const offset = @intCast(usize, loc.start); + // var i: usize = 0; + // for (text) |c| { + // if (isIdentifierStart(@as(CodePoint, c))) { + // for (source.contents[offset + i ..]) |c_| { + // if (!isIdentifierContinue(c_)) { + // r.len = std.math.lossyCast(i32, i); + // return r; + // } + // i += 1; + // } + // } + + // i += 1; + // } + return r; } diff --git a/src/js_lexer_tables.zig b/src/js_lexer_tables.zig index c6a5d4954..d373cb0b0 100644 --- a/src/js_lexer_tables.zig +++ b/src/js_lexer_tables.zig @@ -201,7 +201,7 @@ pub const StrictModeReservedWords = std.ComptimeStringMap(bool, .{ .{ "yield", true }, }); -pub const CodePoint = i22; +pub const CodePoint = i32; pub const PropertyModifierKeyword = enum { p_abstract, diff --git a/src/js_parser/js_parser.zig b/src/js_parser/js_parser.zig index 7fb347c19..7a859a3a5 100644 --- a/src/js_parser/js_parser.zig +++ b/src/js_parser/js_parser.zig @@ -520,6 +520,155 @@ pub const SideEffects = enum { } } + pub fn simpifyUnusedExpr(p: *P, expr: Expr) ?Expr { + switch (expr.data) { + .e_null, .e_undefined, .e_missing, .e_boolean, .e_number, .e_big_int, .e_string, .e_this, .e_reg_exp, .e_function, .e_arrow, .e_import_meta => { + return null; + }, + + .e_dot => |dot| { + if (dot.can_be_removed_if_unused) { + return null; + } + }, + .e_identifier => |ident| { + if (ident.must_keep_due_to_with_stmt) { + return expr; + } + + if (ident.can_be_removed_if_unused or p.symbols.items[ident.ref.inner_index].kind != .unbound) { + return null; + } + }, + .e_if => |__if__| { + __if__.yes = simpifyUnusedExpr(p, __if__.yes) orelse __if__.yes.toEmpty(); + __if__.no = simpifyUnusedExpr(p, __if__.no) orelse __if__.no.toEmpty(); + + // "foo() ? 1 : 2" => "foo()" + if (__if__.yes.isEmpty() and __if__.no.isEmpty()) { + return simpifyUnusedExpr(p, __if__.test_); + } + }, + + .e_call => |call| { + // A call that has been marked "__PURE__" can be removed if all arguments + // can be removed. The annotation causes us to ignore the target. + if (call.can_be_unwrapped_if_unused) { + return Expr.joinAllWithComma(call.args, p.allocator); + } + }, + + .e_binary => |bin| { + switch (bin.op) { + // We can simplify "==" and "!=" even though they can call "toString" and/or + // "valueOf" if we can statically determine that the types of both sides are + // primitives. In that case there won't be any chance for user-defined + // "toString" and/or "valueOf" to be called. + .bin_loose_eq, .bin_loose_ne => { + if (isPrimitiveWithSideEffects(bin.left.data) and isPrimitiveWithSideEffects(bin.right.data)) { + return Expr.joinWithComma(simpifyUnusedExpr(p, bin.left) orelse bin.left.toEmpty(), simpifyUnusedExpr(p, bin.right) orelse bin.right.toEmpty(), p.allocator); + } + }, + else => {}, + } + }, + + .e_new => |call| { + // A constructor call that has been marked "__PURE__" can be removed if all arguments + // can be removed. The annotation causes us to ignore the target. + if (call.can_be_unwrapped_if_unused) { + return Expr.joinAllWithComma(call.args, p.allocator); + } + }, + else => {}, + } + + return expr; + } + + // If this is in a dead branch, then we want to trim as much dead code as we + // can. Everything can be trimmed except for hoisted declarations ("var" and + // "function"), which affect the parent scope. For example: + // + // function foo() { + // if (false) { var x; } + // x = 1; + // } + // + // We can't trim the entire branch as dead or calling foo() will incorrectly + // assign to a global variable instead. + + // The main goal here is to trim conditionals + pub fn shouldKeepStmtInDeadControlFlow(stmt: Stmt) bool { + switch (stmt.data) { + .s_empty, .s_expr, .s_throw, .s_return, .s_break, .s_continue, .s_class, .s_debugger => { + // Omit these statements entirely + return false; + }, + + .s_local => |local| { + return local.kind != .k_var; + // if (local.kind != .k_var) { + // // Omit these statements entirely + // return false; + // } + }, + + .s_block => |block| { + for (block.stmts) |child| { + if (shouldKeepStmtInDeadControlFlow(child)) { + return true; + } + } + + return false; + }, + + .s_if => |_if_| { + if (shouldKeepStmtInDeadControlFlow(_if_.yes)) { + return true; + } + + const no = _if_.no orelse return false; + + return shouldKeepStmtInDeadControlFlow(no); + }, + + .s_while => |__while__| { + return shouldKeepStmtInDeadControlFlow(__while__.body); + }, + + .s_do_while => |__while__| { + return shouldKeepStmtInDeadControlFlow(__while__.body); + }, + + .s_for => |__for__| { + if (__for__.init) |init_| { + if (shouldKeepStmtInDeadControlFlow(init_)) { + return true; + } + } + + return shouldKeepStmtInDeadControlFlow(__for__.body); + }, + + .s_for_in => |__for__| { + return shouldKeepStmtInDeadControlFlow(__for__.init) or shouldKeepStmtInDeadControlFlow(__for__.body); + }, + + .s_for_of => |__for__| { + return shouldKeepStmtInDeadControlFlow(__for__.init) or shouldKeepStmtInDeadControlFlow(__for__.body); + }, + + .s_label => |label| { + return shouldKeepStmtInDeadControlFlow(label.stmt); + }, + else => { + return true; + }, + } + } + pub const Equality = struct { equal: bool = false, ok: bool = false }; // Returns "equal, ok". If "ok" is false, then nothing is known about the two @@ -642,9 +791,10 @@ pub const SideEffects = enum { .bin_comma => { return isPrimitiveWithSideEffects(e.right.data); }, + else => {}, } }, - .e_if => { + .e_if => |e| { return isPrimitiveWithSideEffects(e.yes.data) and isPrimitiveWithSideEffects(e.no.data); }, else => {}, @@ -1283,6 +1433,14 @@ pub const Parser = struct { var result: js_ast.Result = undefined; if (self.p) |p| { + + // Consume a leading hashbang comment + var hashbang: string = ""; + if (p.lexer.token == .t_hashbang) { + hashbang = p.lexer.identifier; + try p.lexer.next(); + } + // Parse the file in the first pass, but do not bind symbols var opts = ParseStatementOptions{ .is_module_scope = true }; debugl("<p.parseStmtsUpTo>"); @@ -1499,8 +1657,8 @@ const ParseStatementOptions = struct { var e_missing_data = E.Missing{}; var s_missing = S.Empty{}; -var nullExprData = Expr.Data{ .e_missing = &e_missing_data }; -var nullStmtData = Stmt.Data{ .s_empty = &s_missing }; +var nullExprData = Expr.Data{ .e_missing = e_missing_data }; +var nullStmtData = Stmt.Data{ .s_empty = s_missing }; pub const Prefill = struct { pub const StringLiteral = struct { pub var Key = [3]u16{ 'k', 'e', 'y' }; @@ -1523,10 +1681,10 @@ pub const Prefill = struct { pub var BMissing = B{ .b_missing = &BMissing_ }; pub var BMissing_ = B.Missing{}; - pub var EMissing = Expr.Data{ .e_missing = &EMissing_ }; + pub var EMissing = Expr.Data{ .e_missing = EMissing_ }; pub var EMissing_ = E.Missing{}; - pub var SEmpty = Stmt.Data{ .s_empty = &SEmpty_ }; + pub var SEmpty = Stmt.Data{ .s_empty = SEmpty_ }; pub var SEmpty_ = S.Empty{}; pub var Filename = Expr.Data{ .e_string = &Prefill.String.Filename }; @@ -4032,7 +4190,7 @@ pub const P = struct { const name = p.lexer.identifier; var emiss = E.Missing{}; // Parse either an async function, an async expression, or a normal expression - var expr: Expr = Expr{ .loc = loc, .data = Expr.Data{ .e_missing = &emiss } }; + var expr: Expr = Expr{ .loc = loc, .data = Expr.Data{ .e_missing = emiss } }; if (is_identifier and strings.eqlComptime(p.lexer.raw(), "async")) { var async_range = p.lexer.range(); try p.lexer.next(); @@ -4589,7 +4747,7 @@ pub const P = struct { const name = p.lexer.identifier; const loc = p.lexer.loc(); - const e_str = p.lexer.toEString(); + const e_str = E.String{ .utf8 = name }; if (!p.lexer.isIdentifierOrKeyword()) { try p.lexer.expect(.t_identifier); @@ -7262,7 +7420,7 @@ pub const P = struct { } return p.e(E.Array{ .items = items.toOwnedSlice(), - .comma_after_spread = comma_after_spread, + .comma_after_spread = comma_after_spread.toNullable(), .is_single_line = is_single_line, }, loc); }, @@ -7325,7 +7483,7 @@ pub const P = struct { } return p.e(E.Object{ .properties = properties.toOwnedSlice(), - .comma_after_spread = comma_after_spread, + .comma_after_spread = comma_after_spread.toNullable(), .is_single_line = is_single_line, }, loc); }, @@ -9707,11 +9865,8 @@ pub const P = struct { .s_expr => |data| { p.stmt_expr_value = data.value.data; data.value = p.visitExpr(data.value); - - // TODO: - // if (p.options.mangle_syntax) { - - // } + // simplify unused + data.value = SideEffects.simpifyUnusedExpr(p, data.value) orelse data.value.toEmpty(); }, .s_throw => |data| { data.value = p.visitExpr(data.value); @@ -10622,9 +10777,10 @@ pub const P = struct { // Save the current control-flow liveness. This represents if we are // currently inside an "if (false) { ... }" block. var old_is_control_flow_dead = p.is_control_flow_dead; + defer p.is_control_flow_dead = old_is_control_flow_dead; // visit all statements first - var visited = List(Stmt).init(p.allocator); + var visited = try List(Stmt).initCapacity(p.allocator, stmts.items.len); var before = List(Stmt).init(p.allocator); var after = List(Stmt).init(p.allocator); defer before.deinit(); @@ -10657,8 +10813,21 @@ pub const P = struct { try p.visitAndAppendStmt(list, stmt); } - p.is_control_flow_dead = old_is_control_flow_dead; - try stmts.resize(visited.items.len + before.items.len + after.items.len); + var visited_count = visited.items.len; + if (p.is_control_flow_dead) { + var end: usize = 0; + for (visited.items) |item, i| { + if (!SideEffects.shouldKeepStmtInDeadControlFlow(item)) { + continue; + } + + visited.items[end] = item; + end += 1; + } + visited_count = end; + } + + try stmts.resize(visited_count + before.items.len + after.items.len); var i: usize = 0; for (before.items) |item| { @@ -10666,7 +10835,8 @@ pub const P = struct { i += 1; } - for (visited.items) |item| { + const visited_slice = visited.items[0..visited_count]; + for (visited_slice) |item| { stmts.items[i] = item; i += 1; } diff --git a/src/js_printer.zig b/src/js_printer.zig index e72eefbde..8170ff9ad 100644 --- a/src/js_printer.zig +++ b/src/js_printer.zig @@ -199,24 +199,54 @@ pub fn NewPrinter(comptime ascii_only: bool) type { p.js.appendChar(str) catch unreachable; }, string => { + if (isDebug or isTest) { + if (str[0] == 0 or (str[0] == '\\' and str[1] == '0')) { + Global.panic("Attempted to print null char", .{}); + } + } p.js.append(str) catch unreachable; }, u8 => { + if (isDebug or isTest) { + if (str == 0) { + Global.panic("Attempted to print null char", .{}); + } + } p.js.appendChar(str) catch unreachable; }, u16 => { + if (isDebug or isTest) { + if (str == 0) { + Global.panic("Attempted to print null char", .{}); + } + } p.js.appendChar(@intCast(u8, str)) catch unreachable; }, u21 => { + if (isDebug or isTest) { + if (str == 0) { + Global.panic("Attempted to print null char", .{}); + } + } p.js.appendChar(@intCast(u8, str)) catch unreachable; }, else => { + if (isDebug or isTest) { + if (str[0] == 0 or (str[0] == '\\' and str[1] == '0')) { + Global.panic("Attempted to print null char", .{}); + } + } p.js.append(@as(string, str)) catch unreachable; }, } } pub fn unsafePrint(p: *Printer, str: string) void { + if (isDebug or isTest) { + if (str[0] == 0 or (str[0] == '\\' and str[1] == '0')) { + Global.panic("Attempted to print null char", .{}); + } + } p.js.appendAssumeCapacity(str); } diff --git a/src/logger.zig b/src/logger.zig index 2584566eb..8d282ee13 100644 --- a/src/logger.zig +++ b/src/logger.zig @@ -30,8 +30,12 @@ pub const Kind = enum { pub const Loc = packed struct { start: i32 = -1, + pub fn toNullable(loc: *Loc) ?Loc { + return if (loc.start == -1) null else loc.*; + } + // TODO: remove this stupidity - pub fn toUsize(self: *Loc) usize { + pub fn toUsize(self: *const Loc) usize { return @intCast(usize, self.start); } |