aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/fs.zig28
-rw-r--r--src/js_lexer.zig343
-rw-r--r--src/js_lexer_tables.zig4
-rw-r--r--src/logger.zig195
4 files changed, 524 insertions, 46 deletions
diff --git a/src/fs.zig b/src/fs.zig
index c6e353d9b..6b4d390cc 100644
--- a/src/fs.zig
+++ b/src/fs.zig
@@ -1,5 +1,6 @@
const std = @import("std");
const strings = @import("strings.zig");
+const alloc = @import("alloc.zig");
const expect = std.testing.expect;
pub const FileSystem = struct { tree: std.AutoHashMap(FileSystemEntry) };
@@ -17,11 +18,14 @@ pub const PathName = struct {
dir: []u8,
ext: []u8,
- pub fn init(_path: []u8) PathName {
- var path = _path;
- var base: []u8 = path;
- var dir: []u8 = path;
- var ext: []u8 = path;
+ pub fn init(_path: []const u8, allocator: *std.mem.Allocator) PathName {
+ // TODO: leak.
+ var path: []u8 = allocator.alloc(u8, _path.len) catch unreachable;
+ std.mem.copy(u8, path, _path);
+
+ var base = path;
+ var dir = path;
+ var ext = path;
var _i = strings.lastIndexOfChar(path, '/');
while (_i) |i| {
@@ -54,10 +58,14 @@ pub const PathName = struct {
};
pub const Path = struct {
- pretty_path: []u8,
- text: []u8,
- namespace: []u8,
- path_disabled: []u8,
+ pretty_path: []const u8,
+ text: []const u8,
+ namespace: []const u8,
+ name: PathName,
+
+ pub fn init(text: []const u8, allocator: *std.mem.Allocator) Path {
+ return Path{ .pretty_path = text, .text = text, .namespace = "file", .name = PathName.init(text, allocator) };
+ }
pub fn isBefore(a: *Path, b: Path) bool {
return a.namespace > b.namespace ||
@@ -69,7 +77,7 @@ pub const Path = struct {
test "PathName.init" {
var file = "/root/directory/file.ext".*;
- const res = PathName.init(&file);
+ const res = PathName.init(&file, std.heap.page_allocator);
std.testing.expectEqualStrings(res.dir, "/root/directory");
std.testing.expectEqualStrings(res.base, "file");
std.testing.expectEqualStrings(res.ext, ".ext");
diff --git a/src/js_lexer.zig b/src/js_lexer.zig
index 41c5a084e..4901d8153 100644
--- a/src/js_lexer.zig
+++ b/src/js_lexer.zig
@@ -10,7 +10,15 @@ pub const Keywords = tables.Keywords;
pub const tokenToString = tables.tokenToString;
pub const jsxEntity = tables.jsxEntity;
+const string = []const u8;
+
pub const Lexer = struct {
+ // pub const Error = error{
+ // UnexpectedToken,
+ // EndOfFile,
+ // };
+
+ // err: ?Lexer.Error,
log: logger.Log,
source: logger.Source,
current: usize = 0,
@@ -26,9 +34,9 @@ pub const Lexer = struct {
is_legacy_octal_literal: bool = false,
// comments_to_preserve_before: []js_ast.Comment,
// all_original_comments: []js_ast.Comment,
- code_point: CodePoint = 0,
- string_literal: []u16,
- identifier: []u8 = "",
+ code_point: CodePoint = -1,
+ string_literal: std.ArrayList([]u16),
+ identifier: []const u8 = "",
// jsx_factory_pragma_comment: js_ast.Span,
// jsx_fragment_pragma_comment: js_ast.Span,
// source_mapping_url: js_ast.Span,
@@ -48,12 +56,41 @@ pub const Lexer = struct {
return it.source.contents[it.current - cp_len .. it.current];
}
- pub fn addError(self: *Lexer, loc: logger.Loc, text: []u8) void {
- if (loc == self.prevErrorLoc) {
+ pub fn syntax_error(self: *Lexer) void {
+ self.addError(self.start, "Syntax Error!!", .{}, true);
+ }
+
+ pub fn addError(self: *Lexer, _loc: usize, comptime format: []const u8, args: anytype, panic: bool) void {
+ const loc = logger.usize2Loc(_loc);
+ if (loc == self.prev_error_loc) {
+ return;
+ }
+
+ const errorMessage = std.fmt.allocPrint(self.string_literal.allocator, format, args) catch unreachable;
+ self.log.addError(self.source, loc, errorMessage) catch unreachable;
+ self.prev_error_loc = loc;
+
+ if (panic) {
+ self.doPanic(errorMessage);
+ }
+ }
+
+ pub fn addRangeError(self: *Lexer, range: logger.Range, comptime format: []const u8, args: anytype, panic: bool) void {
+ if (loc == self.prev_error_loc) {
return;
}
+ const errorMessage = std.fmt.allocPrint(self.string_literal.allocator, format, args) catch unreachable;
+ var msg = self.log.addRangeError(self.source, range, errorMessage);
self.prev_error_loc = loc;
+
+ if (panic) {
+ self.doPanic(errorMessage);
+ }
+ }
+
+ fn doPanic(self: *Lexer, content: []const u8) void {
+ std.debug.panic("{s}", .{content});
}
pub fn codePointEql(self: *Lexer, a: u8) bool {
@@ -61,7 +98,7 @@ pub const Lexer = struct {
}
fn nextCodepoint(it: *Lexer) callconv(.Inline) CodePoint {
- const slice = it.nextCodepointSlice() orelse return @as(CodePoint, 0);
+ const slice = it.nextCodepointSlice() orelse return @as(CodePoint, -1);
switch (slice.len) {
1 => return @as(CodePoint, slice[0]),
@@ -118,39 +155,315 @@ pub const Lexer = struct {
}
}
- pub fn next(self: *Lexer) void {}
+ pub fn addUnsupportedSyntaxError(self: *Lexer, msg: []const u8) void {
+ self.addError(self.end, "Unsupported syntax: {s}", .{msg}, true);
+ }
+
+ pub fn scanIdentifierWithEscapes(self: *Lexer) void {
+ self.addUnsupportedSyntaxError("escape sequence");
+ return;
+ }
+
+ pub fn next(lexer: *Lexer) void {
+ lexer.has_newline_before = lexer.end == 0;
+
+ while (true) {
+ lexer.start = lexer.end;
+ lexer.token = T.t_end_of_file;
+
+ switch (lexer.code_point) {
+ -1 => {
+ lexer.token = T.t_end_of_file;
+ },
+
+ '#' => {
+ if (lexer.start == 0 and lexer.source.contents[1] == '!') {
+ lexer.addUnsupportedSyntaxError("#!hashbang is not supported yet.");
+ return;
+ }
+
+ lexer.step();
+ if (!isIdentifierStart(lexer.code_point)) {
+ lexer.syntax_error();
+ }
+ lexer.step();
+
+ if (isIdentifierStart(lexer.code_point)) {
+ lexer.step();
+ while (isIdentifierContinue(lexer.code_point)) {
+ lexer.step();
+ }
+ if (lexer.code_point == '\\') {
+ lexer.scanIdentifierWithEscapes();
+ lexer.token = T.t_private_identifier;
+ // lexer.Identifier, lexer.Token = lexer.scanIdentifierWithEscapes(normalIdentifier);
+ } else {
+ lexer.token = T.t_private_identifier;
+ lexer.identifier = lexer.raw();
+ }
+ break;
+ }
+ },
+ '\r', '\n', 0x2028, 0x2029 => {
+ lexer.step();
+ lexer.has_newline_before = true;
+ continue;
+ },
+
+ '\t', ' ' => {
+ lexer.step();
+ continue;
+ },
+
+ '(' => {
+ lexer.step();
+ lexer.token = T.t_open_paren;
+ },
+ ')' => {
+ lexer.step();
+ lexer.token = T.t_close_paren;
+ },
+ '[' => {
+ lexer.step();
+ lexer.token = T.t_open_bracket;
+ },
+ ']' => {
+ lexer.step();
+ lexer.token = T.t_close_bracket;
+ },
+ '{' => {
+ lexer.step();
+ lexer.token = T.t_open_brace;
+ },
+ '}' => {
+ lexer.step();
+ lexer.token = T.t_close_brace;
+ },
+ ',' => {
+ lexer.step();
+ lexer.token = T.t_comma;
+ },
+ ':' => {
+ lexer.step();
+ lexer.token = T.t_colon;
+ },
+ ';' => {
+ lexer.step();
+ lexer.token = T.t_semicolon;
+ },
+ '@' => {
+ lexer.step();
+ lexer.token = T.t_at;
+ },
+ '~' => {
+ lexer.step();
+ lexer.token = T.t_tilde;
+ },
+
+ '?' => {
+ // '?' or '?.' or '??' or '??='
+ lexer.step();
+ switch (lexer.code_point) {
+ '?' => {
+ lexer.step();
+ switch (lexer.code_point) {
+ '=' => {
+ lexer.step();
+ lexer.token = T.t_question_question_equals;
+ },
+ else => {
+ lexer.token = T.t_question_question;
+ },
+ }
+ },
+
+ '.' => {
+ lexer.token = T.t_question;
+ const current = lexer.current;
+ const contents = lexer.source.contents;
+
+ // Lookahead to disambiguate with 'a?.1:b'
+ if (current < contents.len) {
+ const c = contents[current];
+ if (c < '0' or c > '9') {
+ lexer.step();
+ lexer.token = T.t_question_dot;
+ }
+ }
+ },
+ else => {
+ lexer.token = T.t_question;
+ },
+ }
+ },
+
+ '%' => {
+ // '%' or '%='
+ lexer.step();
+ switch (lexer.code_point) {
+ '=' => {
+ lexer.step();
+ lexer.token = T.t_percent_equals;
+ },
+
+ else => {
+ lexer.token = T.t_percent;
+ },
+ }
+ },
- pub fn init(log: logger.Log, source: logger.Source) Lexer {
- var string_literal = [1]u16{0};
+ else => {
+ // Check for unusual whitespace characters
+ if (isWhitespace(lexer.code_point)) {
+ lexer.step();
+ continue;
+ }
+ if (isIdentifierStart(lexer.code_point)) {
+ lexer.step();
+ while (isIdentifierContinue(lexer.code_point)) {
+ lexer.step();
+ }
+ if (lexer.code_point == '\\') {
+
+ // lexer.Identifier, lexer.Token = lexer.scanIdentifierWithEscapes(normalIdentifier);
+ } else {
+ lexer.token = T.t_identifier;
+ lexer.identifier = lexer.raw();
+ }
+ break;
+ }
+
+ lexer.end = lexer.current;
+ lexer.token = T.t_syntax_error;
+ },
+ }
+ }
+ }
+
+ pub fn expected(self: *Lexer, token: T) void {
+ if (tokenToString.has(text)) {
+ self.expectedString(text);
+ } else {
+ self.unexpected();
+ }
+ }
+
+ pub fn raw(self: *Lexer) []const u8 {
+ return self.source.contents[self.start..self.end];
+ }
+
+ pub fn expectedString(self: *Lexer, text: string) void {
+ var found = text;
+ if (self.source.contents.len == self.start) {
+ found = "end of file";
+ }
+ self.addRangeError(self.range(), "Expected %s but found %s", .{ text, found }, true);
+ }
+
+ pub fn range(self: *Lexer) logger.Range {
+ return logger.Range{
+ .start = self.start,
+ .len = self.end - self.start,
+ };
+ }
+
+ pub fn init(log: logger.Log, source: logger.Source, allocator: *std.mem.Allocator) !Lexer {
var lex = Lexer{
.log = log,
.source = source,
- .string_literal = &string_literal,
+ .string_literal = try std.ArrayList([]u16).initCapacity(allocator, 16),
.prev_error_loc = -1,
};
lex.step();
- lex.next();
+ // lex.next();
return lex;
}
};
+fn isIdentifierStart(codepoint: CodePoint) bool {
+ switch (codepoint) {
+ 'a'...'z', 'A'...'Z', '_', '$' => {
+ return true;
+ },
+ else => {
+ return false;
+ },
+ }
+}
+fn isIdentifierContinue(codepoint: CodePoint) bool {
+ switch (codepoint) {
+ '_', '$', '0'...'9', 'a'...'z', 'A'...'Z' => {
+ return true;
+ },
+ else => {},
+ }
+
+ // All ASCII identifier start code points are listed above
+ if (codepoint < 0x7F) {
+ return false;
+ }
+
+ // ZWNJ and ZWJ are allowed in identifiers
+ if (codepoint == 0x200C or codepoint == 0x200D) {
+ return true;
+ }
+
+ return false;
+}
+
+fn isWhitespace(codepoint: CodePoint) bool {
+ switch (codepoint) {
+ 0x000B, // line tabulation
+ 0x0009, // character tabulation
+ 0x000C, // form feed
+ 0x0020, // space
+ 0x00A0, // no-break space
+ // Unicode "Space_Separator" code points
+ 0x1680, // ogham space mark
+ 0x2000, // en quad
+ 0x2001, // em quad
+ 0x2002, // en space
+ 0x2003, // em space
+ 0x2004, // three-per-em space
+ 0x2005, // four-per-em space
+ 0x2006, // six-per-em space
+ 0x2007, // figure space
+ 0x2008, // punctuation space
+ 0x2009, // thin space
+ 0x200A, // hair space
+ 0x202F, // narrow no-break space
+ 0x205F, // medium mathematical space
+ 0x3000, // ideographic space
+ 0xFEFF,
+ => {
+ return true;
+ }, // zero width non-breaking space
+ else => {
+ return false;
+ },
+ }
+}
+
test "Lexer.step()" {
const msgs = std.ArrayList(logger.Msg).init(std.testing.allocator);
const log = logger.Log{
.msgs = msgs,
};
- var sourcefile = "for (let i = 0; i < 100; i++) { console.log('hi'); }".*;
- var identifier_name = "loop".*;
defer std.testing.allocator.free(msgs.items);
- const source = logger.Source{ .index = 0, .contents = &sourcefile, .identifier_name = &identifier_name };
+ const source = logger.Source.initPathString("index.js", "for (let i = 0; i < 100; i++) { console.log('hi'); }", std.heap.page_allocator);
- var lex = Lexer.init(log, source);
+ var lex = try Lexer.init(log, source, std.testing.allocator);
+ defer lex.string_literal.shrinkAndFree(0);
std.testing.expect('f' == lex.code_point);
lex.step();
std.testing.expect('o' == lex.code_point);
lex.step();
std.testing.expect('r' == lex.code_point);
+ while (lex.current < source.contents.len) {
+ std.testing.expect(lex.code_point == source.contents[lex.current - 1]);
+ lex.step();
+ }
}
diff --git a/src/js_lexer_tables.zig b/src/js_lexer_tables.zig
index 916323f4f..ab6b0f95a 100644
--- a/src/js_lexer_tables.zig
+++ b/src/js_lexer_tables.zig
@@ -178,11 +178,11 @@ pub const Keywords = std.ComptimeStringMap(T, .{
.{ "with", .t_with },
});
-pub const CodePoint = u21;
+pub const CodePoint = i22;
pub const TokenEnumType = std.EnumArray(T, []u8);
-pub const tokenToString: TokenEnumType = comptime {
+pub const tokenToString = comptime {
var TEndOfFile = "end of file".*;
var TSyntaxError = "syntax error".*;
var THashbang = "hashbang comment".*;
diff --git a/src/logger.zig b/src/logger.zig
index 617eb4434..8180fd04b 100644
--- a/src/logger.zig
+++ b/src/logger.zig
@@ -1,5 +1,7 @@
const std = @import("std");
const strings = @import("strings.zig");
+const fs = @import("fs.zig");
+const unicode = std.unicode;
const expect = std.testing.expect;
const assert = std.debug.assert;
@@ -24,13 +26,13 @@ pub const Kind = enum {
pub const Loc = i32;
pub const Location = struct {
- file: []u8,
- namespace: []u8 = "file",
+ file: []const u8,
+ namespace: []const u8 = "file",
line: i32 = 1, // 1-based
column: i32 = 0, // 0-based, in bytes
- length: u32 = 0, // in bytes
- line_text: ?[]u8,
- suggestion: ?[]u8,
+ length: usize = 0, // in bytes
+ line_text: ?[]const u8 = null,
+ suggestion: ?[]const u8 = null,
pub fn init(file: []u8, namespace: []u8, line: i32, column: i32, length: u32, line_text: ?[]u8, suggestion: ?[]u8) Location {
return Location{
@@ -44,7 +46,23 @@ pub const Location = struct {
};
}
- pub fn init_file(file: []u8, line: i32, column: i32, length: u32, line_text: ?[]u8, suggestion: ?[]u8) Location {
+ pub fn init_or_nil(_source: ?Source, r: Range) ?Location {
+ if (_source) |source| {
+ var data = source.initErrorPosition(r.loc);
+ return Location{
+ .file = source.path.pretty_path,
+ .namespace = source.path.namespace,
+ .line = usize2Loc(data.line_count),
+ .column = usize2Loc(data.column_count),
+ .length = source.contents.len,
+ .line_text = source.contents[data.line_start..data.line_end],
+ };
+ } else {
+ return null;
+ }
+ }
+
+ pub fn init_file(file: []const u8, line: i32, column: i32, length: u32, line_text: ?[]u8, suggestion: ?[]u8) Location {
var namespace = "file".*;
return Location{
@@ -59,14 +77,14 @@ pub const Location = struct {
}
};
-pub const Data = struct { text: []u8, location: *Location };
+pub const Data = struct { text: []u8, location: ?Location = null };
pub const Msg = struct {
kind: Kind = Kind.err,
data: Data,
};
-pub const Range = struct { start: u32 = 0, len: i32 = 0 };
+pub const Range = struct { loc: Loc = 0, len: i32 = 0 };
pub const Log = struct {
debug: bool = false,
@@ -74,30 +92,169 @@ pub const Log = struct {
errors: u8 = 0,
msgs: ArrayList(Msg),
+ pub fn addVerbose(log: *Log, source: ?Source, loc: Loc, text: []u8) void {
+ log.addMsg(Msg{
+ .kind = .verbose,
+ .data = rangeData(source, Range{ .Loc = loc }, text),
+ });
+ }
+
+ pub fn addVerboseWithNotes(source: ?Source, loc: Loc, text: []u8, notes: []Data) void {
+ log.addMsg(Msg{
+ .kind = .verbose,
+ .data = rangeData(source, Range{ .loc = loc }, text),
+ .notes = notes,
+ });
+ }
+
+ pub fn addRangeError(log: *Log, source: ?Source, r: Range, text: []u8) void {
+ log.addMsg(Msg{
+ .kind = .Error,
+ .data = rangeData(source, r, text),
+ });
+ }
+
+ pub fn addRangeWarning(log: *Log, source: ?Source, r: Range, text: []u8) void {
+ log.addMsg(Msg{
+ .kind = .warning,
+ .data = rangeData(source, r, text),
+ });
+ }
+
+ pub fn addRangeDebug(log: *Log, source: ?Source, r: Range, text: []u8) void {
+ log.addMsg(Msg{
+ .kind = .debug,
+ .data = rangeData(source, r, text),
+ });
+ }
+
+ pub fn addRangeErrorWithNotes(log: *Log, source: ?Source, r: Range, text: []u8, notes: []Data) void {
+ log.addMsg(Msg{
+ .kind = Kind.err,
+ .data = rangeData(source, r, text),
+ .notes = notes,
+ });
+ }
+
+ pub fn addRangeWarningWithNotes(log: *Log, source: ?Source, r: Range, text: []u8, notes: []Data) void {
+ log.addMsg(Msg{
+ .kind = .warning,
+ .data = rangeData(source, r, text),
+ .notes = notes,
+ });
+ }
+
// TODO:
- pub fn add_msg(self: *Log, msg: Msg) !void {
+ pub fn addMsg(self: *Log, msg: Msg) !void {
try self.msgs.append(msg);
}
// TODO:
- pub fn add_err(self: *Log, msg: Msg) !void {
- // try self.msgs.append(msg);
+ pub fn addError(self: *Log, _source: ?Source, loc: Loc, text: []u8) !void {
+ try self.addMsg(Msg{ .kind = .err, .data = rangeData(_source, Range{ .loc = loc }, text) });
+ self.errors += 1;
}
// TODO:
pub fn print(self: *Log, to: anytype) !void {
for (self.msgs.items) |msg| {
- try std.fmt.format(to, "\n\n{s}: {s}\n{s}\n{s}:{}:{}", .{ msg.kind.string(), msg.data.text, msg.data.location.line_text, msg.data.location.file, msg.data.location.line, msg.data.location.column });
+ try std.fmt.format(to, "\n\n{s}: {s}\n{s}\n{s}:{}:{}", .{ msg.kind.string(), msg.data.text, msg.data.location.?.line_text, msg.data.location.?.file, msg.data.location.?.line, msg.data.location.?.column });
}
}
};
-pub const Source = struct { index: u32 = 0, contents: []u8,
+pub fn usize2Loc(loc: usize) Loc {
+ if (loc > std.math.maxInt(Loc)) {
+ return 9999;
+ } else {
+ return @intCast(Loc, loc);
+ }
+}
+
+pub const Source = struct {
+ path: fs.Path,
+ index: u32 = 0,
+ contents: []const u8,
+
+ // An identifier that is mixed in to automatically-generated symbol names to
+ // improve readability. For example, if the identifier is "util" then the
+ // symbol for an "export default" statement will be called "util_default".
+ identifier_name: []u8,
+
+ pub const ErrorPosition = struct { line_start: usize, line_end: usize, column_count: usize, line_count: usize };
-// An identifier that is mixed in to automatically-generated symbol names to
-// improve readability. For example, if the identifier is "util" then the
-// symbol for an "export default" statement will be called "util_default".
-identifier_name: []u8 };
+ pub fn initPathString(pathString: []const u8, contents: []const u8, allocator: *std.mem.Allocator) Source {
+ const path = fs.Path.init(pathString, allocator);
+ return Source{ .path = path, .identifier_name = path.name.base, .contents = contents };
+ }
+
+ pub fn initErrorPosition(self: *const Source, _offset: Loc) ErrorPosition {
+ var prev_code_point: u21 = 0;
+ var offset: usize = if (_offset < 0) 0 else @intCast(usize, _offset);
+
+ const contents = self.contents;
+
+ var iter = unicode.Utf8Iterator{
+ .bytes = self.contents[0..offset],
+ .i = std.math.min(offset, self.contents.len),
+ };
+
+ var line_start: usize = 0;
+ var line_count: usize = 0;
+
+ while (iter.nextCodepoint()) |code_point| {
+ switch (code_point) {
+ '\n' => {
+ line_start = iter.i + 1;
+ if (prev_code_point != '\r') {
+ line_count += 1;
+ }
+ },
+
+ '\r' => {
+ line_start = iter.i + 1;
+ line_count += 1;
+ },
+
+ 0x2028, 0x2029 => {
+ line_start = iter.i + 3; // These take three bytes to encode in UTF-8
+ line_count += 1;
+ },
+ else => {},
+ }
+
+ prev_code_point = code_point;
+ }
+
+ iter = unicode.Utf8Iterator{
+ .bytes = self.contents[offset..],
+ .i = std.math.min(offset, self.contents.len),
+ };
+
+ // Scan to the end of the line (or end of file if this is the last line)
+ var line_end: usize = contents.len;
+
+ loop: while (iter.nextCodepoint()) |code_point| {
+ switch (code_point) {
+ '\r', '\n', 0x2028, 0x2029 => {
+ line_end = offset + iter.i;
+ break :loop;
+ },
+ else => {},
+ }
+ }
+ return ErrorPosition{
+ .line_start = line_start,
+ .line_end = line_end,
+ .line_count = line_count,
+ .column_count = offset - line_start,
+ };
+ }
+};
+
+pub fn rangeData(source: ?Source, r: Range, text: []u8) Data {
+ return Data{ .text = text, .location = Location.init_or_nil(source, r) };
+}
test "print msg" {
var log = Log{ .msgs = ArrayList(Msg).init(std.testing.allocator) };
@@ -107,9 +264,9 @@ test "print msg" {
var err = "invalid syntax".*;
var namespace = "file".*;
- try log.add_msg(Msg{
+ try log.addMsg(Msg{
.kind = .err,
- .data = Data{ .location = &Location.init_file(&filename, 1, 3, 0, &syntax, ""), .text = &err },
+ .data = Data{ .location = Location.init_file(&filename, 1, 3, 0, &syntax, ""), .text = &err },
});
const stdout = std.io.getStdOut().writer();