aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/global.zig1
-rw-r--r--src/js_ast.zig25
-rw-r--r--src/js_lexer.zig608
-rw-r--r--src/js_lexer_tables.zig2
-rw-r--r--src/js_parser/js_parser.zig206
-rw-r--r--src/js_printer.zig30
-rw-r--r--src/logger.zig6
7 files changed, 789 insertions, 89 deletions
diff --git a/src/global.zig b/src/global.zig
index acd9bc0a7..3bbcce18f 100644
--- a/src/global.zig
+++ b/src/global.zig
@@ -22,6 +22,7 @@ pub const isWindows = std.Target.current.os.tag == .windows;
pub const enableTracing = true;
pub const isDebug = std.builtin.Mode.Debug == std.builtin.mode;
+pub const isTest = std.builtin.is_test;
pub const Output = struct {
var source: *Source = undefined;
diff --git a/src/js_ast.zig b/src/js_ast.zig
index bd2655289..71842789c 100644
--- a/src/js_ast.zig
+++ b/src/js_ast.zig
@@ -1058,16 +1058,20 @@ pub const Stmt = struct {
}
pub fn empty() Stmt {
- return Stmt.init(&Stmt.None, logger.Loc.Empty);
+ return Stmt.init(Stmt.None, logger.Loc.Empty);
}
var None = S.Empty{};
pub fn init(origData: anytype, loc: logger.Loc) Stmt {
- if (@typeInfo(@TypeOf(origData)) != .Pointer) {
+ if (@typeInfo(@TypeOf(origData)) != .Pointer and @TypeOf(origData) != S.Empty) {
@compileError("Stmt.init needs a pointer.");
}
+ if (@TypeOf(origData) == S.Empty) {
+ return Stmt{ .loc = loc, .data = Data{ .s_empty = S.Empty{} } };
+ }
+
switch (@TypeOf(origData.*)) {
S.Block => {
return Stmt.comptime_init("s_block", S.Block, origData, loc);
@@ -1210,7 +1214,7 @@ pub const Stmt = struct {
return Stmt.comptime_alloc(allocator, "s_do_while", S.DoWhile, origData, loc);
},
S.Empty => {
- return Stmt.comptime_alloc(allocator, "s_empty", S.Empty, origData, loc);
+ return Stmt{ .loc = loc, .data = Data{ .s_empty = S.Empty{} } };
},
S.Enum => {
return Stmt.comptime_alloc(allocator, "s_enum", S.Enum, origData, loc);
@@ -1336,7 +1340,7 @@ pub const Stmt = struct {
s_debugger: *S.Debugger,
s_directive: *S.Directive,
s_do_while: *S.DoWhile,
- s_empty: *S.Empty,
+ s_empty: S.Empty,
s_enum: *S.Enum,
s_export_clause: *S.ExportClause,
s_export_default: *S.ExportDefault,
@@ -1382,7 +1386,12 @@ pub const Stmt = struct {
pub const Expr = struct {
loc: logger.Loc,
data: Data,
-
+ pub fn toEmpty(expr: *Expr) Expr {
+ return Expr{ .data = .{ .e_missing = E.Missing{} }, .loc = expr.loc };
+ }
+ pub fn isEmpty(expr: *Expr) bool {
+ return std.meta.activeTag(expr.data) == .e_missing;
+ }
pub const Query = struct { expr: Expr, loc: logger.Loc };
pub fn getProperty(expr: *const Expr, name: string) ?Query {
@@ -1829,9 +1838,7 @@ pub const Expr = struct {
return Expr{ .loc = loc, .data = Data{ .e_jsx_element = dat } };
},
E.Missing => {
- var dat = allocator.create(E.Missing) catch unreachable;
- dat.* = st;
- return Expr{ .loc = loc, .data = Data{ .e_missing = dat } };
+ return Expr{ .loc = loc, .data = Data{ .e_missing = E.Missing{} } };
},
E.Number => {
var dat = allocator.create(E.Number) catch unreachable;
@@ -2460,7 +2467,7 @@ pub const Expr = struct {
e_import_identifier: *E.ImportIdentifier,
e_private_identifier: *E.PrivateIdentifier,
e_jsx_element: *E.JSXElement,
- e_missing: *E.Missing,
+ e_missing: E.Missing,
e_number: *E.Number,
e_big_int: *E.BigInt,
e_object: *E.Object,
diff --git a/src/js_lexer.zig b/src/js_lexer.zig
index 96b6f6835..4b78f2536 100644
--- a/src/js_lexer.zig
+++ b/src/js_lexer.zig
@@ -191,25 +191,258 @@ pub const Lexer = struct {
}
}
- fn parseStringLiteral(lexer: *LexerType) !void {
- var quote: CodePoint = lexer.code_point;
- var needs_slow_path = false;
- var suffixLen: usize = 1;
+ pub fn decodeEscapeSequences(lexer: *LexerType, start: usize, text: string, buf: anytype) !void {
+ var iter = CodepointIterator{ .bytes = text[start..], .i = 0 };
+ const start_length = buf.items.len;
+ while (iter.nextCodepoint()) |c| {
+ const width = iter.width;
- if (quote != '`') {
- lexer.token = T.t_string_literal;
- } else if (lexer.rescan_close_brace_as_template_token) {
- lexer.token = T.t_template_tail;
- } else {
- lexer.token = T.t_no_substitution_template_literal;
+ switch (c) {
+ '\r' => {
+ // From the specification:
+ //
+ // 11.8.6.1 Static Semantics: TV and TRV
+ //
+ // TV excludes the code units of LineContinuation while TRV includes
+ // them. <CR><LF> and <CR> LineTerminatorSequences are normalized to
+ // <LF> for both TV and TRV. An explicit EscapeSequence is needed to
+ // include a <CR> or <CR><LF> sequence.
+
+ // Convert '\r\n' into '\n'
+ if (iter.i < text.len and text[iter.i] == '\n') {
+ iter.i += 1;
+ }
+
+ // Convert '\r' into '\n'
+ buf.append('\n') catch unreachable;
+ continue;
+ },
+
+ '\\' => {
+ const c2 = iter.nextCodepoint() orelse return;
+ const width2 = iter.width;
+ switch (c2) {
+ 'b' => {
+ buf.append(std.mem.readIntNative(u16, "\\b")) catch unreachable;
+ continue;
+ },
+ 'f' => {
+ buf.append(std.mem.readIntNative(u16, "\\f")) catch unreachable;
+ continue;
+ },
+ 'n' => {
+ buf.append(std.mem.readIntNative(u16, "\\n")) catch unreachable;
+ continue;
+ },
+ 'r' => {
+ buf.append(std.mem.readIntNative(u16, "\\r")) catch unreachable;
+ continue;
+ },
+ 't' => {
+ buf.append(std.mem.readIntNative(u16, "\\t")) catch unreachable;
+ continue;
+ },
+ 'v' => {
+ if (lexer.json_options != null) {
+ lexer.end = start + iter.i - width2;
+ try lexer.syntaxError();
+ }
+
+ buf.append(std.mem.readIntNative(u16, "\\v")) catch unreachable;
+ continue;
+ },
+ '0'...'7' => {
+ try lexer.addUnsupportedSyntaxError("Legacy octal literals are not supported.");
+ },
+ '8', '9' => {
+ try lexer.addUnsupportedSyntaxError("Legacy octal literals are not supported.");
+ },
+ 'x' => {
+ if (lexer.json_options != null) {
+ lexer.end = start + iter.i - width2;
+ try lexer.syntaxError();
+ }
+
+ var value: CodePoint = 0;
+ var c3: CodePoint = 0;
+ var width3: u3 = 0;
+ comptime var j: usize = 0;
+ inline while (j < 2) : (j += 1) {
+ c3 = iter.nextCodepoint() orelse return lexer.syntaxError();
+ width3 = iter.width;
+ switch (c3) {
+ '0'...'9' => {
+ value = value * 16 | (c3 - '0');
+ },
+ 'a'...'f' => {
+ value = value * 16 | (c3 + 10 - 'a');
+ },
+ 'A'...'F' => {
+ value = value * 16 | (c3 + 10 - 'A');
+ },
+ else => {
+ lexer.end = start + iter.i - width3;
+ return lexer.syntaxError();
+ },
+ }
+ }
+ iter.c = value;
+ },
+ 'u' => {
+ // We're going to make this an i64 so we don't risk integer overflows
+ // when people do weird things
+ var value: i64 = 0;
+
+ var c3 = iter.nextCodepoint() orelse return lexer.syntaxError();
+ var width3 = iter.width;
+
+ // variable-length
+ if (c3 == '{') {
+ if (lexer.json_options != null) {
+ lexer.end = start + iter.i - width2;
+ try lexer.syntaxError();
+ }
+
+ const hex_start = iter.i - width - width2 - width3;
+ var is_first = true;
+ var is_out_of_range = false;
+ variableLength: while (true) {
+ c3 = iter.nextCodepoint() orelse break :variableLength;
+
+ switch (c3) {
+ '0'...'9' => {
+ value = value * 16 | (c3 - '0');
+ },
+ 'a'...'f' => {
+ value = value * 16 | (c3 + 10 - 'a');
+ },
+ 'A'...'F' => {
+ value = value * 16 | (c3 + 10 - 'A');
+ },
+ '}' => {
+ if (is_first) {
+ lexer.end = start + iter.i - width3;
+ return lexer.syntaxError();
+ }
+ break :variableLength;
+ },
+ else => {
+ lexer.end = start + iter.i - width3;
+ return lexer.syntaxError();
+ },
+ }
+
+ // '\U0010FFFF
+ // copied from golang utf8.MaxRune
+ if (value > 1114111) {
+ is_out_of_range = true;
+ }
+ is_first = false;
+ }
+
+ if (is_out_of_range) {
+ try lexer.addRangeError(
+ .{ .loc = .{ .start = @intCast(i32, start + hex_start) }, .len = @intCast(i32, (iter.i - hex_start)) },
+ "Unicode escape sequence is out of range",
+ .{},
+ true,
+ );
+ return;
+ }
+
+ // fixed-length
+ } else {
+ // Fixed-length
+ comptime var j: usize = 0;
+ inline while (j < 4) : (j += 1) {
+ switch (c3) {
+ '0'...'9' => {
+ value = value * 16 | (c3 - '0');
+ },
+ 'a'...'f' => {
+ value = value * 16 | (c3 + 10 - 'a');
+ },
+ 'A'...'F' => {
+ value = value * 16 | (c3 + 10 - 'A');
+ },
+ else => {
+ lexer.end = start + iter.i - width3;
+ return lexer.syntaxError();
+ },
+ }
+
+ if (j < 3) {
+ c3 = iter.nextCodepoint() orelse return lexer.syntaxError();
+ width3 = iter.width;
+ }
+ }
+ }
+
+ iter.c = @truncate(CodePoint, value);
+ },
+ '\r' => {
+ if (lexer.json_options != null) {
+ lexer.end = start + iter.i - width2;
+ try lexer.syntaxError();
+ }
+
+ // Ignore line continuations. A line continuation is not an escaped newline.
+ if (iter.i < text.len and text[iter.i + 1] == '\n') {
+ // Make sure Windows CRLF counts as a single newline
+ iter.i += 1;
+ }
+ continue;
+ },
+ '\n', 0x2028, 0x2029 => {
+ if (lexer.json_options != null) {
+ lexer.end = start + iter.i - width2;
+ try lexer.syntaxError();
+ }
+
+ // Ignore line continuations. A line continuation is not an escaped newline.
+ continue;
+ },
+ else => {
+ if (lexer.json_options != null) {
+ switch (c2) {
+ '"', '\\', '/' => {},
+ else => {
+ lexer.end = start + iter.i - width2;
+ try lexer.syntaxError();
+ },
+ }
+ }
+ iter.c = c2;
+ },
+ }
+ },
+ else => {},
+ }
+
+ if (iter.c <= 0xFFFF) {
+ buf.append(@intCast(u16, c)) catch unreachable;
+ } else {
+ iter.c -= 0x10000;
+ buf.ensureUnusedCapacity(2) catch unreachable;
+ buf.appendAssumeCapacity(@intCast(u16, 0xD800 + ((iter.c >> 10) & 0x3FF)));
+ buf.appendAssumeCapacity(@intCast(u16, 0xDC00 + (iter.c & 0x3FF)));
+ }
}
- try lexer.step();
+ }
+ pub const InnerStringLiteral = packed struct { suffix_len: u3, needs_slow_path: bool };
+ fn parseStringLiteralInnter(lexer: *LexerType, comptime quote: CodePoint) !InnerStringLiteral {
+ var needs_slow_path = false;
+ var suffix_len: u3 = 1;
stringLiteral: while (true) {
switch (lexer.code_point) {
'\\' => {
- needs_slow_path = true;
try lexer.step();
+ // Skip slow path for \n in a string literal
+ // This is pretty common, shows up in e.g. React
+ // Example code: array.split("\n")
+ // We don't need to decode as UTF16 for that. We know it's just a newline char.
+ needs_slow_path = lexer.code_point != 'n';
// Handle Windows CRLF
if (lexer.code_point == '\r' and lexer.json_options != null) {
@@ -245,7 +478,7 @@ pub const Lexer = struct {
if (quote == '`') {
try lexer.step();
if (lexer.code_point == '{') {
- suffixLen = 2;
+ suffix_len = 2;
try lexer.step();
if (lexer.rescan_close_brace_as_template_token) {
lexer.token = T.t_template_middle;
@@ -257,12 +490,15 @@ pub const Lexer = struct {
continue :stringLiteral;
}
},
+ // exit condition
+ quote => {
+ try lexer.step();
+
+ break;
+ },
else => {
- if (quote == lexer.code_point) {
- try lexer.step();
- break :stringLiteral;
- }
+
// Non-ASCII strings need the slow path
if (lexer.code_point >= 0x80) {
needs_slow_path = true;
@@ -274,19 +510,41 @@ pub const Lexer = struct {
try lexer.step();
}
+ return InnerStringLiteral{ .needs_slow_path = needs_slow_path, .suffix_len = suffix_len };
+ }
+
+ fn parseStringLiteral(lexer: *LexerType) !void {
+ var quote: CodePoint = lexer.code_point;
+
+ if (quote != '`') {
+ lexer.token = T.t_string_literal;
+ } else if (lexer.rescan_close_brace_as_template_token) {
+ lexer.token = T.t_template_tail;
+ } else {
+ lexer.token = T.t_no_substitution_template_literal;
+ }
+ try lexer.step();
+
+ var string_literal_details = switch (quote) {
+ '`' => try lexer.parseStringLiteralInnter('`'),
+ '\'' => try lexer.parseStringLiteralInnter('\''),
+ '"' => try lexer.parseStringLiteralInnter('"'),
+ else => unreachable,
+ };
+
// Reset string literal
- lexer.string_literal_slice = lexer.source.contents[lexer.start + 1 .. lexer.end - suffixLen];
- lexer.string_literal_is_ascii = !needs_slow_path;
+ lexer.string_literal_slice = lexer.source.contents[lexer.start + 1 .. lexer.end - string_literal_details.suffix_len];
+ lexer.string_literal_is_ascii = !string_literal_details.needs_slow_path;
lexer.string_literal_buffer.shrinkRetainingCapacity(0);
- if (needs_slow_path) {
- lexer.string_literal_buffer.ensureTotalCapacity(lexer.string_literal_slice.len) catch unreachable;
- var slice = lexer.string_literal_buffer.allocatedSlice();
- lexer.string_literal_buffer.items = slice[0..strings.toUTF16Buf(lexer.string_literal_slice, slice)];
+ if (string_literal_details.needs_slow_path) {
+ lexer.string_literal_buffer.ensureUnusedCapacity(lexer.string_literal_slice.len) catch unreachable;
+ try lexer.decodeEscapeSequences(0, lexer.string_literal_slice, &lexer.string_literal_buffer);
}
if (quote == '\'' and lexer.json_options != null) {
try lexer.addRangeError(lexer.range(), "JSON strings must use double quotes", .{}, true);
}
+
// for (text)
// // if (needs_slow_path) {
// // // Slow path
@@ -333,8 +591,131 @@ pub const Lexer = struct {
return Error.SyntaxError;
}
- pub fn scanIdentifierWithEscapes(self: *LexerType) !void {
- try self.addUnsupportedSyntaxError("escape sequence");
+ pub const IdentifierKind = enum { normal, private };
+ pub const ScanResult = struct { token: T, contents: string };
+ threadlocal var small_escape_sequence_buffer: [4096]u16 = undefined;
+ const FakeArrayList16 = struct {
+ items: []u16,
+ i: usize = 0,
+
+ pub fn append(fake: *FakeArrayList16, value: u16) !void {
+ std.debug.assert(fake.items.len < fake.i);
+ fake.items[fake.i] = value;
+ fake.i += 1;
+ }
+
+ pub fn appendAssumeCapacity(fake: *FakeArrayList16, value: u16) void {
+ std.debug.assert(fake.items.len < fake.i);
+ fake.items[fake.i] = value;
+ fake.i += 1;
+ }
+ pub fn ensureUnusedCapacity(fake: *FakeArrayList16, int: anytype) !void {
+ std.debug.assert(fake.items.len < fake.i + int);
+ }
+ };
+ threadlocal var large_escape_sequence_list: std.ArrayList(u16) = undefined;
+ threadlocal var large_escape_sequence_list_loaded: bool = false;
+
+ // This is an edge case that doesn't really exist in the wild, so it doesn't
+ // need to be as fast as possible.
+ pub fn scanIdentifierWithEscapes(lexer: *LexerType, comptime kind: IdentifierKind) !ScanResult {
+ var result = ScanResult{ .token = .t_end_of_file, .contents = "" };
+ // First pass: scan over the identifier to see how long it is
+ while (true) {
+ // Scan a unicode escape sequence. There is at least one because that's
+ // what caused us to get on this slow path in the first place.
+ if (lexer.code_point == '\\') {
+ try lexer.step();
+ if (lexer.code_point != 'u') {
+ try lexer.syntaxError();
+ }
+ try lexer.step();
+ if (lexer.code_point == '{') {
+ // Variable-length
+ try lexer.step();
+ while (lexer.code_point != '}') {
+ switch (lexer.code_point) {
+ '0'...'9', 'a'...'f', 'A'...'F' => {
+ try lexer.step();
+ },
+ else => {
+ try lexer.syntaxError();
+ },
+ }
+ }
+
+ try lexer.step();
+ } else {
+ // Fixed-length
+ comptime var j: usize = 0;
+ inline while (j < 4) : (j += 1) {
+ switch (lexer.code_point) {
+ '0'...'9', 'a'...'f', 'A'...'F' => {
+ try lexer.step();
+ },
+ else => {
+ try lexer.syntaxError();
+ },
+ }
+ }
+ }
+ continue;
+ }
+
+ if (!isIdentifierContinue(lexer.code_point)) {
+ break;
+ }
+ try lexer.step();
+ }
+
+ // Second pass: re-use our existing escape sequence parser
+ var original_text = lexer.raw();
+ if (original_text.len < 1024) {
+ var buf = FakeArrayList16{ .items = &small_escape_sequence_buffer, .i = 0 };
+ try lexer.decodeEscapeSequences(lexer.start, original_text, &buf);
+ result.contents = lexer.utf16ToString(buf.items[0..buf.i]);
+ } else {
+ if (!large_escape_sequence_list_loaded) {
+ large_escape_sequence_list = try std.ArrayList(u16).initCapacity(lexer.allocator, original_text.len);
+ large_escape_sequence_list_loaded = true;
+ }
+
+ large_escape_sequence_list.shrinkRetainingCapacity(0);
+ try lexer.decodeEscapeSequences(lexer.start, original_text, &large_escape_sequence_list);
+ result.contents = lexer.utf16ToString(large_escape_sequence_list.items);
+ }
+
+ var identifier = result.contents;
+ if (kind == .private) {
+ identifier = result.contents[1..];
+ }
+
+ if (!isIdentifier(identifier)) {
+ try lexer.addRangeError(
+ .{ .loc = logger.usize2Loc(lexer.start), .len = @intCast(i32, lexer.end - lexer.start) },
+ "Invalid identifier: \"{s}\"",
+ .{result.contents},
+ true,
+ );
+ }
+ result.contents = identifier;
+
+ // Escaped keywords are not allowed to work as actual keywords, but they are
+ // allowed wherever we allow identifiers or keywords. For example:
+ //
+ // // This is an error (equivalent to "var var;")
+ // var \u0076\u0061\u0072;
+ //
+ // // This is an error (equivalent to "var foo;" except for this rule)
+ // \u0076\u0061\u0072 foo;
+ //
+ // // This is an fine (equivalent to "foo.var;")
+ // foo.\u0076\u0061\u0072;
+ //
+ result.token = if (Keywords.has(result.contents)) .t_escaped_keyword else .t_identifier;
+
+ // const text = lexer.decodeEscapeSequences(lexer.start, lexer.raw(), )
+ return result;
}
pub fn debugInfo(self: *LexerType) void {
@@ -462,31 +843,46 @@ pub const Lexer = struct {
'#' => {
if (lexer.start == 0 and lexer.source.contents[1] == '!') {
- try lexer.addUnsupportedSyntaxError("#!hashbang is not supported yet.");
- return;
- }
-
- try lexer.step();
- if (!isIdentifierStart(lexer.code_point)) {
- try lexer.syntaxError();
- }
- try lexer.step();
-
- if (isIdentifierStart(lexer.code_point)) {
- try lexer.step();
- while (isIdentifierContinue(lexer.code_point)) {
+ // "#!/usr/bin/env node"
+ lexer.token = .t_hashbang;
+ hashbang: while (true) {
try lexer.step();
+ switch (lexer.code_point) {
+ '\r', '\n', 0x2028, 0x2029 => {
+ break :hashbang;
+ },
+ -1 => {
+ break :hashbang;
+ },
+ else => {},
+ }
}
+ lexer.identifier = lexer.raw();
+ } else {
+ try lexer.step();
if (lexer.code_point == '\\') {
- try lexer.scanIdentifierWithEscapes();
+ const scan_result = try lexer.scanIdentifierWithEscapes(.private);
+ lexer.identifier = scan_result.contents;
lexer.token = T.t_private_identifier;
-
- // lexer.Identifier, lexer.Token = lexer.scanIdentifierWithEscapes(normalIdentifier);
} else {
- lexer.token = T.t_private_identifier;
- lexer.identifier = lexer.raw();
+ if (!isIdentifierStart(lexer.code_point)) {
+ try lexer.syntaxError();
+ }
+
+ try lexer.step();
+ while (isIdentifierContinue(lexer.code_point)) {
+ try lexer.step();
+ }
+ if (lexer.code_point == '\\') {
+ const scan_result = try lexer.scanIdentifierWithEscapes(.private);
+ lexer.identifier = scan_result.contents;
+ lexer.token = T.t_private_identifier;
+ } else {
+ lexer.token = T.t_private_identifier;
+ lexer.identifier = lexer.raw();
+ }
+ break;
}
- break;
}
},
'\r', '\n', 0x2028, 0x2029 => {
@@ -966,7 +1362,9 @@ pub const Lexer = struct {
}
if (lexer.code_point == '\\') {
- try lexer.scanIdentifierWithEscapes();
+ const scan_result = try lexer.scanIdentifierWithEscapes(.normal);
+ lexer.identifier = scan_result.contents;
+ lexer.token = scan_result.token;
} else {
const contents = lexer.raw();
lexer.identifier = contents;
@@ -975,8 +1373,9 @@ pub const Lexer = struct {
},
'\\' => {
- // TODO: normal
- try lexer.scanIdentifierWithEscapes();
+ const scan_result = try lexer.scanIdentifierWithEscapes(.normal);
+ lexer.identifier = scan_result.contents;
+ lexer.token = scan_result.token;
},
'.', '0'...'9' => {
@@ -996,8 +1395,9 @@ pub const Lexer = struct {
try lexer.step();
}
if (lexer.code_point == '\\') {
-
- // lexer.Identifier, lexer.Token = lexer.scanIdentifierWithEscapes(normalIdentifier);
+ const scan_result = try lexer.scanIdentifierWithEscapes(.normal);
+ lexer.identifier = scan_result.contents;
+ lexer.token = scan_result.token;
} else {
lexer.token = T.t_identifier;
lexer.identifier = lexer.raw();
@@ -2143,26 +2543,114 @@ pub fn isIdentifierUTF16(text: JavascriptString) bool {
return true;
}
+pub const CodepointIterator = struct {
+ bytes: []const u8,
+ i: usize,
+ width: u3 = 0,
+ c: CodePoint = 0,
+
+ pub fn nextCodepointSlice(it: *CodepointIterator) ?[]const u8 {
+ if (it.i >= it.bytes.len) {
+ return null;
+ }
+
+ const cp_len = std
+ .unicode.utf8ByteSequenceLength(it.bytes[it.i]) catch unreachable;
+ it.i += cp_len;
+ return it.bytes[it.i - cp_len .. it.i];
+ }
+
+ pub fn nextCodepoint(it: *CodepointIterator) ?CodePoint {
+ const slice = it.nextCodepointSlice() orelse return null;
+ it.width = @intCast(u3, slice.len);
+
+ it.c = switch (it.width) {
+ 1 => @intCast(CodePoint, slice[0]),
+ 2 => @intCast(CodePoint, std.unicode.utf8Decode2(slice) catch unreachable),
+ 3 => @intCast(CodePoint, std.unicode.utf8Decode3(slice) catch unreachable),
+ 4 => @intCast(CodePoint, std.unicode.utf8Decode4(slice) catch unreachable),
+ else => unreachable,
+ };
+
+ return it.c;
+ }
+
+ /// Look ahead at the next n codepoints without advancing the iterator.
+ /// If fewer than n codepoints are available, then return the remainder of the string.
+ pub fn peek(it: *CodepointIterator, n: usize) []const u8 {
+ const original_i = it.i;
+ defer it.i = original_i;
+
+ var end_ix = original_i;
+ var found: usize = 0;
+ while (found < n) : (found += 1) {
+ const next_codepoint = it.nextCodepointSlice() orelse return it.bytes[original_i..];
+ end_ix += next_codepoint.len;
+ }
+
+ return it.bytes[original_i..end_ix];
+ }
+};
+
// TODO: implement this to actually work right
// this fn is a stub!
pub fn rangeOfIdentifier(source: *const Source, loc: logger.Loc) logger.Range {
+ const text = source.contents[loc.toUsize()..];
var r = logger.Range{ .loc = loc, .len = 0 };
- const offset = @intCast(usize, loc.start);
- var i: usize = 0;
- for (source.contents[offset..]) |c| {
- if (isIdentifierStart(@as(CodePoint, c))) {
- for (source.contents[offset + i ..]) |c_| {
- if (!isIdentifierContinue(c_)) {
- r.len = std.math.lossyCast(i32, i);
- return r;
+ if (text.len == 0) {
+ return r;
+ }
+
+ var iter = CodepointIterator{ .bytes = text, .i = 0 };
+ var c = @intCast(CodePoint, iter.nextCodepoint() orelse unreachable);
+
+ // Handle private names
+ if (c == '#') {
+ c = @intCast(CodePoint, iter.nextCodepoint() orelse {
+ r.len = 1;
+ return r;
+ });
+ }
+
+ if (isIdentifierStart(c) or c == '\\') {
+ defer r.len = @intCast(i32, iter.i);
+ while (iter.nextCodepoint()) |code_point| {
+ if (code_point == '\\') {
+ // Search for the end of the identifier
+
+ // Skip over bracketed unicode escapes such as "\u{10000}"
+ if (iter.i + 2 < text.len and text[iter.i + 1] == 'u' and text[iter.i + 2] == '{') {
+ iter.i += 2;
+ while (iter.i < text.len) {
+ if (text[iter.i] == '}') {
+ iter.i += 1;
+ break;
+ }
+ iter.i += 1;
+ }
}
- i += 1;
+ } else if (!isIdentifierContinue(code_point)) {
+ return r;
}
}
-
- i += 1;
}
+ // const offset = @intCast(usize, loc.start);
+ // var i: usize = 0;
+ // for (text) |c| {
+ // if (isIdentifierStart(@as(CodePoint, c))) {
+ // for (source.contents[offset + i ..]) |c_| {
+ // if (!isIdentifierContinue(c_)) {
+ // r.len = std.math.lossyCast(i32, i);
+ // return r;
+ // }
+ // i += 1;
+ // }
+ // }
+
+ // i += 1;
+ // }
+
return r;
}
diff --git a/src/js_lexer_tables.zig b/src/js_lexer_tables.zig
index c6a5d4954..d373cb0b0 100644
--- a/src/js_lexer_tables.zig
+++ b/src/js_lexer_tables.zig
@@ -201,7 +201,7 @@ pub const StrictModeReservedWords = std.ComptimeStringMap(bool, .{
.{ "yield", true },
});
-pub const CodePoint = i22;
+pub const CodePoint = i32;
pub const PropertyModifierKeyword = enum {
p_abstract,
diff --git a/src/js_parser/js_parser.zig b/src/js_parser/js_parser.zig
index 7fb347c19..7a859a3a5 100644
--- a/src/js_parser/js_parser.zig
+++ b/src/js_parser/js_parser.zig
@@ -520,6 +520,155 @@ pub const SideEffects = enum {
}
}
+ pub fn simpifyUnusedExpr(p: *P, expr: Expr) ?Expr {
+ switch (expr.data) {
+ .e_null, .e_undefined, .e_missing, .e_boolean, .e_number, .e_big_int, .e_string, .e_this, .e_reg_exp, .e_function, .e_arrow, .e_import_meta => {
+ return null;
+ },
+
+ .e_dot => |dot| {
+ if (dot.can_be_removed_if_unused) {
+ return null;
+ }
+ },
+ .e_identifier => |ident| {
+ if (ident.must_keep_due_to_with_stmt) {
+ return expr;
+ }
+
+ if (ident.can_be_removed_if_unused or p.symbols.items[ident.ref.inner_index].kind != .unbound) {
+ return null;
+ }
+ },
+ .e_if => |__if__| {
+ __if__.yes = simpifyUnusedExpr(p, __if__.yes) orelse __if__.yes.toEmpty();
+ __if__.no = simpifyUnusedExpr(p, __if__.no) orelse __if__.no.toEmpty();
+
+ // "foo() ? 1 : 2" => "foo()"
+ if (__if__.yes.isEmpty() and __if__.no.isEmpty()) {
+ return simpifyUnusedExpr(p, __if__.test_);
+ }
+ },
+
+ .e_call => |call| {
+ // A call that has been marked "__PURE__" can be removed if all arguments
+ // can be removed. The annotation causes us to ignore the target.
+ if (call.can_be_unwrapped_if_unused) {
+ return Expr.joinAllWithComma(call.args, p.allocator);
+ }
+ },
+
+ .e_binary => |bin| {
+ switch (bin.op) {
+ // We can simplify "==" and "!=" even though they can call "toString" and/or
+ // "valueOf" if we can statically determine that the types of both sides are
+ // primitives. In that case there won't be any chance for user-defined
+ // "toString" and/or "valueOf" to be called.
+ .bin_loose_eq, .bin_loose_ne => {
+ if (isPrimitiveWithSideEffects(bin.left.data) and isPrimitiveWithSideEffects(bin.right.data)) {
+ return Expr.joinWithComma(simpifyUnusedExpr(p, bin.left) orelse bin.left.toEmpty(), simpifyUnusedExpr(p, bin.right) orelse bin.right.toEmpty(), p.allocator);
+ }
+ },
+ else => {},
+ }
+ },
+
+ .e_new => |call| {
+ // A constructor call that has been marked "__PURE__" can be removed if all arguments
+ // can be removed. The annotation causes us to ignore the target.
+ if (call.can_be_unwrapped_if_unused) {
+ return Expr.joinAllWithComma(call.args, p.allocator);
+ }
+ },
+ else => {},
+ }
+
+ return expr;
+ }
+
+ // If this is in a dead branch, then we want to trim as much dead code as we
+ // can. Everything can be trimmed except for hoisted declarations ("var" and
+ // "function"), which affect the parent scope. For example:
+ //
+ // function foo() {
+ // if (false) { var x; }
+ // x = 1;
+ // }
+ //
+ // We can't trim the entire branch as dead or calling foo() will incorrectly
+ // assign to a global variable instead.
+
+ // The main goal here is to trim conditionals
+ pub fn shouldKeepStmtInDeadControlFlow(stmt: Stmt) bool {
+ switch (stmt.data) {
+ .s_empty, .s_expr, .s_throw, .s_return, .s_break, .s_continue, .s_class, .s_debugger => {
+ // Omit these statements entirely
+ return false;
+ },
+
+ .s_local => |local| {
+ return local.kind != .k_var;
+ // if (local.kind != .k_var) {
+ // // Omit these statements entirely
+ // return false;
+ // }
+ },
+
+ .s_block => |block| {
+ for (block.stmts) |child| {
+ if (shouldKeepStmtInDeadControlFlow(child)) {
+ return true;
+ }
+ }
+
+ return false;
+ },
+
+ .s_if => |_if_| {
+ if (shouldKeepStmtInDeadControlFlow(_if_.yes)) {
+ return true;
+ }
+
+ const no = _if_.no orelse return false;
+
+ return shouldKeepStmtInDeadControlFlow(no);
+ },
+
+ .s_while => |__while__| {
+ return shouldKeepStmtInDeadControlFlow(__while__.body);
+ },
+
+ .s_do_while => |__while__| {
+ return shouldKeepStmtInDeadControlFlow(__while__.body);
+ },
+
+ .s_for => |__for__| {
+ if (__for__.init) |init_| {
+ if (shouldKeepStmtInDeadControlFlow(init_)) {
+ return true;
+ }
+ }
+
+ return shouldKeepStmtInDeadControlFlow(__for__.body);
+ },
+
+ .s_for_in => |__for__| {
+ return shouldKeepStmtInDeadControlFlow(__for__.init) or shouldKeepStmtInDeadControlFlow(__for__.body);
+ },
+
+ .s_for_of => |__for__| {
+ return shouldKeepStmtInDeadControlFlow(__for__.init) or shouldKeepStmtInDeadControlFlow(__for__.body);
+ },
+
+ .s_label => |label| {
+ return shouldKeepStmtInDeadControlFlow(label.stmt);
+ },
+ else => {
+ return true;
+ },
+ }
+ }
+
pub const Equality = struct { equal: bool = false, ok: bool = false };
// Returns "equal, ok". If "ok" is false, then nothing is known about the two
@@ -642,9 +791,10 @@ pub const SideEffects = enum {
.bin_comma => {
return isPrimitiveWithSideEffects(e.right.data);
},
+ else => {},
}
},
- .e_if => {
+ .e_if => |e| {
return isPrimitiveWithSideEffects(e.yes.data) and isPrimitiveWithSideEffects(e.no.data);
},
else => {},
@@ -1283,6 +1433,14 @@ pub const Parser = struct {
var result: js_ast.Result = undefined;
if (self.p) |p| {
+
+ // Consume a leading hashbang comment
+ var hashbang: string = "";
+ if (p.lexer.token == .t_hashbang) {
+ hashbang = p.lexer.identifier;
+ try p.lexer.next();
+ }
+
// Parse the file in the first pass, but do not bind symbols
var opts = ParseStatementOptions{ .is_module_scope = true };
debugl("<p.parseStmtsUpTo>");
@@ -1499,8 +1657,8 @@ const ParseStatementOptions = struct {
var e_missing_data = E.Missing{};
var s_missing = S.Empty{};
-var nullExprData = Expr.Data{ .e_missing = &e_missing_data };
-var nullStmtData = Stmt.Data{ .s_empty = &s_missing };
+var nullExprData = Expr.Data{ .e_missing = e_missing_data };
+var nullStmtData = Stmt.Data{ .s_empty = s_missing };
pub const Prefill = struct {
pub const StringLiteral = struct {
pub var Key = [3]u16{ 'k', 'e', 'y' };
@@ -1523,10 +1681,10 @@ pub const Prefill = struct {
pub var BMissing = B{ .b_missing = &BMissing_ };
pub var BMissing_ = B.Missing{};
- pub var EMissing = Expr.Data{ .e_missing = &EMissing_ };
+ pub var EMissing = Expr.Data{ .e_missing = EMissing_ };
pub var EMissing_ = E.Missing{};
- pub var SEmpty = Stmt.Data{ .s_empty = &SEmpty_ };
+ pub var SEmpty = Stmt.Data{ .s_empty = SEmpty_ };
pub var SEmpty_ = S.Empty{};
pub var Filename = Expr.Data{ .e_string = &Prefill.String.Filename };
@@ -4032,7 +4190,7 @@ pub const P = struct {
const name = p.lexer.identifier;
var emiss = E.Missing{};
// Parse either an async function, an async expression, or a normal expression
- var expr: Expr = Expr{ .loc = loc, .data = Expr.Data{ .e_missing = &emiss } };
+ var expr: Expr = Expr{ .loc = loc, .data = Expr.Data{ .e_missing = emiss } };
if (is_identifier and strings.eqlComptime(p.lexer.raw(), "async")) {
var async_range = p.lexer.range();
try p.lexer.next();
@@ -4589,7 +4747,7 @@ pub const P = struct {
const name = p.lexer.identifier;
const loc = p.lexer.loc();
- const e_str = p.lexer.toEString();
+ const e_str = E.String{ .utf8 = name };
if (!p.lexer.isIdentifierOrKeyword()) {
try p.lexer.expect(.t_identifier);
@@ -7262,7 +7420,7 @@ pub const P = struct {
}
return p.e(E.Array{
.items = items.toOwnedSlice(),
- .comma_after_spread = comma_after_spread,
+ .comma_after_spread = comma_after_spread.toNullable(),
.is_single_line = is_single_line,
}, loc);
},
@@ -7325,7 +7483,7 @@ pub const P = struct {
}
return p.e(E.Object{
.properties = properties.toOwnedSlice(),
- .comma_after_spread = comma_after_spread,
+ .comma_after_spread = comma_after_spread.toNullable(),
.is_single_line = is_single_line,
}, loc);
},
@@ -9707,11 +9865,8 @@ pub const P = struct {
.s_expr => |data| {
p.stmt_expr_value = data.value.data;
data.value = p.visitExpr(data.value);
-
- // TODO:
- // if (p.options.mangle_syntax) {
-
- // }
+ // simplify unused
+ data.value = SideEffects.simpifyUnusedExpr(p, data.value) orelse data.value.toEmpty();
},
.s_throw => |data| {
data.value = p.visitExpr(data.value);
@@ -10622,9 +10777,10 @@ pub const P = struct {
// Save the current control-flow liveness. This represents if we are
// currently inside an "if (false) { ... }" block.
var old_is_control_flow_dead = p.is_control_flow_dead;
+ defer p.is_control_flow_dead = old_is_control_flow_dead;
// visit all statements first
- var visited = List(Stmt).init(p.allocator);
+ var visited = try List(Stmt).initCapacity(p.allocator, stmts.items.len);
var before = List(Stmt).init(p.allocator);
var after = List(Stmt).init(p.allocator);
defer before.deinit();
@@ -10657,8 +10813,21 @@ pub const P = struct {
try p.visitAndAppendStmt(list, stmt);
}
- p.is_control_flow_dead = old_is_control_flow_dead;
- try stmts.resize(visited.items.len + before.items.len + after.items.len);
+ var visited_count = visited.items.len;
+ if (p.is_control_flow_dead) {
+ var end: usize = 0;
+ for (visited.items) |item, i| {
+ if (!SideEffects.shouldKeepStmtInDeadControlFlow(item)) {
+ continue;
+ }
+
+ visited.items[end] = item;
+ end += 1;
+ }
+ visited_count = end;
+ }
+
+ try stmts.resize(visited_count + before.items.len + after.items.len);
var i: usize = 0;
for (before.items) |item| {
@@ -10666,7 +10835,8 @@ pub const P = struct {
i += 1;
}
- for (visited.items) |item| {
+ const visited_slice = visited.items[0..visited_count];
+ for (visited_slice) |item| {
stmts.items[i] = item;
i += 1;
}
diff --git a/src/js_printer.zig b/src/js_printer.zig
index e72eefbde..8170ff9ad 100644
--- a/src/js_printer.zig
+++ b/src/js_printer.zig
@@ -199,24 +199,54 @@ pub fn NewPrinter(comptime ascii_only: bool) type {
p.js.appendChar(str) catch unreachable;
},
string => {
+ if (isDebug or isTest) {
+ if (str[0] == 0 or (str[0] == '\\' and str[1] == '0')) {
+ Global.panic("Attempted to print null char", .{});
+ }
+ }
p.js.append(str) catch unreachable;
},
u8 => {
+ if (isDebug or isTest) {
+ if (str == 0) {
+ Global.panic("Attempted to print null char", .{});
+ }
+ }
p.js.appendChar(str) catch unreachable;
},
u16 => {
+ if (isDebug or isTest) {
+ if (str == 0) {
+ Global.panic("Attempted to print null char", .{});
+ }
+ }
p.js.appendChar(@intCast(u8, str)) catch unreachable;
},
u21 => {
+ if (isDebug or isTest) {
+ if (str == 0) {
+ Global.panic("Attempted to print null char", .{});
+ }
+ }
p.js.appendChar(@intCast(u8, str)) catch unreachable;
},
else => {
+ if (isDebug or isTest) {
+ if (str[0] == 0 or (str[0] == '\\' and str[1] == '0')) {
+ Global.panic("Attempted to print null char", .{});
+ }
+ }
p.js.append(@as(string, str)) catch unreachable;
},
}
}
pub fn unsafePrint(p: *Printer, str: string) void {
+ if (isDebug or isTest) {
+ if (str[0] == 0 or (str[0] == '\\' and str[1] == '0')) {
+ Global.panic("Attempted to print null char", .{});
+ }
+ }
p.js.appendAssumeCapacity(str);
}
diff --git a/src/logger.zig b/src/logger.zig
index 2584566eb..8d282ee13 100644
--- a/src/logger.zig
+++ b/src/logger.zig
@@ -30,8 +30,12 @@ pub const Kind = enum {
pub const Loc = packed struct {
start: i32 = -1,
+ pub fn toNullable(loc: *Loc) ?Loc {
+ return if (loc.start == -1) null else loc.*;
+ }
+
// TODO: remove this stupidity
- pub fn toUsize(self: *Loc) usize {
+ pub fn toUsize(self: *const Loc) usize {
return @intCast(usize, self.start);
}