diff options
author | 2022-01-28 23:56:45 -0800 | |
---|---|---|
committer | 2022-01-28 23:56:45 -0800 | |
commit | bf4943eec152ca1617db9c004f19fa5c27aa4b82 (patch) | |
tree | a59d793b36a9c28c2ccf7f74c61f48a5502bbcc1 /src | |
parent | 97d17904d3f6b850e8973b84d6b4ad5e22afb941 (diff) | |
download | bun-bf4943eec152ca1617db9c004f19fa5c27aa4b82.tar.gz bun-bf4943eec152ca1617db9c004f19fa5c27aa4b82.tar.zst bun-bf4943eec152ca1617db9c004f19fa5c27aa4b82.zip |
Implement TOML parser
No Date/DateTime/Time/Local Time yet
Diffstat (limited to 'src')
-rw-r--r-- | src/api/schema.d.ts | 3 | ||||
-rw-r--r-- | src/api/schema.js | 4 | ||||
-rw-r--r-- | src/api/schema.peechy | 1 | ||||
-rw-r--r-- | src/api/schema.zig | 3 | ||||
-rw-r--r-- | src/bundler.zig | 24 | ||||
-rw-r--r-- | src/bunfig.zig | 9 | ||||
-rw-r--r-- | src/cli.zig | 4 | ||||
-rw-r--r-- | src/fallback.version | 2 | ||||
-rw-r--r-- | src/http.zig | 4 | ||||
-rw-r--r-- | src/js_ast.zig | 323 | ||||
-rw-r--r-- | src/js_lexer.zig | 1 | ||||
-rw-r--r-- | src/options.zig | 23 | ||||
-rw-r--r-- | src/toml/toml_lexer.zig | 1211 | ||||
-rw-r--r-- | src/toml/toml_parser.zig | 396 |
14 files changed, 1990 insertions, 18 deletions
diff --git a/src/api/schema.d.ts b/src/api/schema.d.ts index f0437d19c..015f6af40 100644 --- a/src/api/schema.d.ts +++ b/src/api/schema.d.ts @@ -20,6 +20,7 @@ export enum Loader { css = 5, file = 6, json = 7, + toml = 8, } export const LoaderKeys = { 1: "jsx", @@ -36,6 +37,8 @@ export const LoaderKeys = { file: "file", 7: "json", json: "json", + 8: "toml", + toml: "toml", }; export enum FrameworkEntryPointType { client = 1, diff --git a/src/api/schema.js b/src/api/schema.js index 0405b4457..da91af252 100644 --- a/src/api/schema.js +++ b/src/api/schema.js @@ -6,6 +6,7 @@ const Loader = { 5: 5, 6: 6, 7: 7, + 8: 8, jsx: 1, js: 2, ts: 3, @@ -13,6 +14,7 @@ const Loader = { css: 5, file: 6, json: 7, + toml: 8, }; const LoaderKeys = { 1: "jsx", @@ -22,6 +24,7 @@ const LoaderKeys = { 5: "css", 6: "file", 7: "json", + 8: "toml", jsx: "jsx", js: "js", ts: "ts", @@ -29,6 +32,7 @@ const LoaderKeys = { css: "css", file: "file", json: "json", + toml: "toml", }; const FrameworkEntryPointType = { 1: 1, diff --git a/src/api/schema.peechy b/src/api/schema.peechy index 5dea057a9..58ca3f013 100644 --- a/src/api/schema.peechy +++ b/src/api/schema.peechy @@ -8,6 +8,7 @@ smol Loader { css = 5; file = 6; json = 7; + toml = 8; } smol FrameworkEntryPointType { diff --git a/src/api/schema.zig b/src/api/schema.zig index 4bca9cd1f..8f21c350c 100644 --- a/src/api/schema.zig +++ b/src/api/schema.zig @@ -353,6 +353,9 @@ pub const Api = struct { /// json json, + /// toml + toml, + _, pub fn jsonStringify(self: *const @This(), opts: anytype, o: anytype) !void { diff --git a/src/bundler.zig b/src/bundler.zig index f453f7b89..8891b3eb6 100644 --- a/src/bundler.zig +++ b/src/bundler.zig @@ -53,6 +53,7 @@ const URL = @import("./query_string_map.zig").URL; const Report = @import("./report.zig"); const Linker = linker.Linker; const Resolver = _resolver.Resolver; +const TOML = @import("./toml/toml_parser.zig").TOML; const EntryPoints = @import("./bundler/entry_points.zig"); pub usingnamespace EntryPoints; @@ -2429,7 +2430,7 @@ pub const Bundler = struct { } switch (loader) { - .jsx, .tsx, .js, .ts, .json => { + .jsx, .tsx, .js, .ts, .json, .toml => { var result = bundler.parse( ParseOptions{ .allocator = bundler.allocator, @@ -2777,6 +2778,24 @@ pub const Bundler = struct { .input_fd = input_fd, }; }, + .toml => { + var expr = TOML.parse(&source, bundler.log, allocator) catch return null; + var stmt = js_ast.Stmt.alloc(js_ast.S.ExportDefault, js_ast.S.ExportDefault{ + .value = js_ast.StmtOrExpr{ .expr = expr }, + .default_name = js_ast.LocRef{ .loc = logger.Loc{}, .ref = Ref{} }, + }, logger.Loc{ .start = 0 }); + var stmts = allocator.alloc(js_ast.Stmt, 1) catch unreachable; + stmts[0] = stmt; + var parts = allocator.alloc(js_ast.Part, 1) catch unreachable; + parts[0] = js_ast.Part{ .stmts = stmts }; + + return ParseResult{ + .ast = js_ast.Ast.initTest(parts), + .source = source, + .loader = loader, + .input_fd = input_fd, + }; + }, .css => {}, else => Global.panic("Unsupported loader {s} for path: {s}", .{ loader, source.path.text }), } @@ -2870,7 +2889,7 @@ pub const Bundler = struct { ), }; }, - .json => { + .toml, .json => { return ServeResult{ .file = options.OutputFile.initPending(loader, resolved), .mime_type = MimeType.transpiled_json, @@ -3387,6 +3406,7 @@ pub const Transformer = struct { ast = js_ast.Ast.initTest(parts); }, + .jsx, .tsx, .ts, .js => { var parser = try js_parser.Parser.init(opts, log, source, define, allocator); var res = try parser.parse(); diff --git a/src/bunfig.zig b/src/bunfig.zig index 9af59976f..c22834006 100644 --- a/src/bunfig.zig +++ b/src/bunfig.zig @@ -28,6 +28,7 @@ const LoaderMap = std.StringArrayHashMapUnmanaged(options.Loader); const Analytics = @import("./analytics.zig"); const JSONParser = @import("./json_parser.zig"); const Command = @import("cli.zig").Command; +const TOML = @import("./toml/toml_parser.zig").TOML; // TODO: replace Api.TransformOptions with Bunfig pub const Bunfig = struct { @@ -262,7 +263,13 @@ pub const Bunfig = struct { pub fn parse(allocator: std.mem.Allocator, source: logger.Source, ctx: *Command.Context, comptime cmd: Command.Tag) !void { const log_count = ctx.log.errors + ctx.log.warnings; - var expr = JSONParser.ParseTSConfig(&source, ctx.log, allocator) catch |err| { + + var expr = if (strings.eqlComptime(source.path.name.ext[1..], "toml")) TOML.parse(&source, ctx.log, allocator) catch |err| { + if (ctx.log.errors + ctx.log.warnings == log_count) { + ctx.log.addErrorFmt(&source, logger.Loc.Empty, allocator, "Failed to parse", .{}) catch unreachable; + } + return err; + } else JSONParser.ParseTSConfig(&source, ctx.log, allocator) catch |err| { if (ctx.log.errors + ctx.log.warnings == log_count) { ctx.log.addErrorFmt(&source, logger.Loc.Empty, allocator, "Failed to parse", .{}) catch unreachable; } diff --git a/src/cli.zig b/src/cli.zig index a223324a8..16ed1f5d1 100644 --- a/src/cli.zig +++ b/src/cli.zig @@ -163,7 +163,7 @@ pub const Arguments = struct { clap.parseParam("--bunfile <STR> Use a .bun file (default: node_modules.bun)") catch unreachable, clap.parseParam("--server-bunfile <STR> Use a .server.bun file (default: node_modules.server.bun)") catch unreachable, clap.parseParam("--cwd <STR> Absolute path to resolve files & entry points from. This just changes the process' cwd.") catch unreachable, - clap.parseParam("-c, --config <PATH>? Config file to load bun from (e.g. -c bunfig.json") catch unreachable, + clap.parseParam("-c, --config <PATH>? Config file to load bun from (e.g. -c bunfig.toml") catch unreachable, clap.parseParam("--disable-react-fast-refresh Disable React Fast Refresh") catch unreachable, clap.parseParam("--disable-hmr Disable Hot Module Reloading (disables fast refresh too)") catch unreachable, clap.parseParam("--extension-order <STR>... defaults to: .tsx,.ts,.jsx,.js,.json ") catch unreachable, @@ -237,7 +237,7 @@ pub const Arguments = struct { var config_buf: [std.fs.MAX_PATH_BYTES]u8 = undefined; var config_path_ = config_path__; if (config_path_.len == 0) { - config_path_ = "bunfig.json"; + config_path_ = "bunfig.toml"; } var config_path: [:0]u8 = undefined; if (config_path_[0] == '/') { diff --git a/src/fallback.version b/src/fallback.version index 0c89f656c..6d559b7b7 100644 --- a/src/fallback.version +++ b/src/fallback.version @@ -1 +1 @@ -10830f5dbfd2585f
\ No newline at end of file +9effd1b1e91cdf3
\ No newline at end of file diff --git a/src/http.zig b/src/http.zig index abcd60bdb..1ec847aad 100644 --- a/src/http.zig +++ b/src/http.zig @@ -847,7 +847,7 @@ pub const RequestContext = struct { } switch (loader) { - .json, .ts, .tsx, .js, .jsx => { + .toml, .json, .ts, .tsx, .js, .jsx => { // Since we already have: // - The file descriptor // - The path @@ -2189,7 +2189,7 @@ pub const RequestContext = struct { if (written.empty) { switch (loader) { .css => try ctx.sendNoContent(), - .js, .jsx, .ts, .tsx, .json => { + .toml, .js, .jsx, .ts, .tsx, .json => { const buf = "export default {};"; const strong_etag = comptime std.hash.Wyhash.hash(0, buf); const etag_content_slice = std.fmt.bufPrintIntToSlice(strong_etag_buffer[0..49], strong_etag, 16, .upper, .{}); diff --git a/src/js_ast.zig b/src/js_ast.zig index a78650266..b1b2e26f4 100644 --- a/src/js_ast.zig +++ b/src/js_ast.zig @@ -17,6 +17,7 @@ const RefHashCtx = @import("ast/base.zig").RefHashCtx; const ObjectPool = @import("./pool.zig").ObjectPool; const ImportRecord = @import("import_record.zig").ImportRecord; const allocators = @import("allocators.zig"); +const JSC = @import("javascript_core"); const RefCtx = @import("./ast/base.zig").RefCtx; const _hash_map = @import("hash_map.zig"); @@ -238,6 +239,10 @@ pub fn BabyList(comptime Type: type) type { return if (this.len > 0) this.ptr[0] else @as(?*Type, null); } + pub inline fn last(this: ListType) ?*Type { + return if (this.len > 0) &this.ptr[this.len - 1] else @as(?*Type, null); + } + pub inline fn first_(this: ListType) Type { return this.ptr[0]; } @@ -255,8 +260,9 @@ pub fn BabyList(comptime Type: type) type { pub inline fn @"[0]"(this: ListType) Type { return this.ptr[0]; } + const OOM = error{OutOfMemory}; - pub fn push(this: *ListType, allocator: std.mem.Allocator, value: Type) !void { + pub fn push(this: *ListType, allocator: std.mem.Allocator, value: Type) OOM!void { var list_ = this.list(); try list_.append(allocator, value); this.update(list_); @@ -962,9 +968,30 @@ pub const E = struct { is_single_line: bool = false, is_parenthesized: bool = false, + pub fn push(this: *Array, allocator: std.mem.Allocator, item: Expr) !void { + try this.items.push(allocator, item); + } + pub inline fn slice(this: Array) []Expr { return this.items.slice(); } + + pub fn toJS(this: @This(), ctx: JSC.C.JSContextRef, exception: JSC.C.ExceptionRef) JSC.C.JSValueRef { + var stack = std.heap.stackFallback(32 * @sizeOf(ExprNodeList), JSC.getAllocator(ctx)); + var allocator = stack.get(); + var results = allocator.alloc(JSC.C.JSValueRef, this.items.len) catch { + return JSC.C.JSValueMakeUndefined(ctx); + }; + defer if (stack.fixed_buffer_allocator.end_index >= stack.fixed_buffer_allocator.buffer.len - 1) allocator.free(results); + + var i: usize = 0; + const items = this.items.slice(); + while (i < results.len) : (i += 1) { + results[i] = items[i].toJS(ctx, exception); + } + + return JSC.C.JSObjectMakeArray(ctx, results.len, results.ptr, exception); + } }; pub const Unary = struct { @@ -978,7 +1005,12 @@ pub const E = struct { op: Op.Code, }; - pub const Boolean = struct { value: bool }; + pub const Boolean = struct { + value: bool, + pub fn toJS(this: @This(), ctx: JSC.C.JSContextRef, _: JSC.C.ExceptionRef) JSC.C.JSValueRef { + return JSC.C.JSValueMakeBoolean(ctx, this.value); + } + }; pub const Super = struct {}; pub const Null = struct {}; pub const This = struct {}; @@ -1208,6 +1240,10 @@ pub const E = struct { pub fn jsonStringify(self: *const Number, opts: anytype, o: anytype) !void { return try std.json.stringify(self.value, opts, o); } + + pub fn toJS(this: @This(), _: JSC.C.JSContextRef, _: JSC.C.ExceptionRef) JSC.C.JSValueRef { + return JSC.JSValue.jsNumber(this.value).asObjectRef(); + } }; pub const BigInt = struct { @@ -1218,6 +1254,11 @@ pub const E = struct { pub fn jsonStringify(self: *const @This(), opts: anytype, o: anytype) !void { return try std.json.stringify(self.value, opts, o); } + + pub fn toJS(_: @This(), _: JSC.C.JSContextRef, _: JSC.C.ExceptionRef) JSC.C.JSValueRef { + // TODO: + return JSC.JSValue.jsNumber(0); + } }; pub const Object = struct { @@ -1226,6 +1267,226 @@ pub const E = struct { is_single_line: bool = false, is_parenthesized: bool = false, + pub const Rope = struct { + head: Expr, + next: ?*Rope = null, + const OOM = error{OutOfMemory}; + pub fn append(this: *Rope, expr: Expr, allocator: std.mem.Allocator) OOM!*Rope { + if (this.next) |next| { + return try next.append(expr, allocator); + } + + var rope = try allocator.create(Rope); + rope.* = .{ + .head = expr, + }; + this.next = rope; + return rope; + } + }; + + // pub fn toJS(this: Object, ctx: JSC.C.JSContextRef, exception: JSC.C.ExceptionRef) JSC.C.JSValueRef { + // const Creator = struct { + // object: Object, + // pub fn create(this: *@This(), obj: *JSObject, global: *JSGlobalObject) void { + // var iter = this.query.iter(); + // var str: ZigString = undefined; + // while (iter.next(&query_string_values_buf)) |entry| { + // str = ZigString.init(entry.name); + + // std.debug.assert(entry.values.len > 0); + // if (entry.values.len > 1) { + // var values = query_string_value_refs_buf[0..entry.values.len]; + // for (entry.values) |value, i| { + // values[i] = ZigString.init(value); + // } + // obj.putRecord(global, &str, values.ptr, values.len); + // } else { + // query_string_value_refs_buf[0] = ZigString.init(entry.values[0]); + + // obj.putRecord(global, &str, &query_string_value_refs_buf, 1); + // } + // } + // } + // }; + // } + + pub fn get(self: *const Object, key: string) ?Expr { + return if (asProperty(self, key)) |query| query.expr else @as(?Expr, null); + } + + pub const SetError = error{ OutOfMemory, Clobber }; + + pub fn set(self: *const Object, key: Expr, allocator: std.mem.Allocator, value: Expr) SetError!void { + if (self.hasProperty(key.data.e_string.utf8)) return error.Clobber; + try self.properties.push(allocator, .{ + .key = key, + .value = value, + }); + } + + pub const RopeQuery = struct { + expr: Expr, + rope: *const Rope, + }; + + // this is terribly, shamefully slow + pub fn setRope(self: *Object, rope: *const Rope, allocator: std.mem.Allocator, value: Expr) SetError!void { + if (self.get(rope.head.data.e_string.utf8)) |existing| { + switch (existing.data) { + .e_array => |array| { + if (rope.next == null) { + try array.push(allocator, value); + return; + } + + if (array.items.last()) |last| { + if (last.data != .e_object) { + return error.Clobber; + } + + try last.data.e_object.setRope(rope.next.?, allocator, value); + return; + } + + try array.push(allocator, value); + return; + }, + .e_object => |object| { + if (rope.next != null) { + try object.setRope(rope.next.?, allocator, value); + return; + } + + return error.Clobber; + }, + else => { + return error.Clobber; + }, + } + } + + var value_ = value; + if (rope.next) |next| { + var obj = Expr.init(E.Object, E.Object{ .properties = .{} }, rope.head.loc); + try obj.data.e_object.setRope(next, allocator, value); + value_ = obj; + } + + try self.properties.push(allocator, .{ + .key = rope.head, + .value = value_, + }); + } + + pub fn getOrPutObject(self: *Object, rope: *const Rope, allocator: std.mem.Allocator) SetError!Expr { + if (self.get(rope.head.data.e_string.utf8)) |existing| { + switch (existing.data) { + .e_array => |array| { + if (rope.next == null) { + return error.Clobber; + } + + if (array.items.last()) |last| { + if (last.data != .e_object) { + return error.Clobber; + } + + return try last.data.e_object.getOrPutObject(rope.next.?, allocator); + } + + return error.Clobber; + }, + .e_object => |object| { + if (rope.next == null) { + // success + return existing; + } + + return try object.getOrPutObject(rope.next.?, allocator); + }, + else => { + return error.Clobber; + }, + } + } + + if (rope.next) |next| { + var obj = Expr.init(E.Object, E.Object{ .properties = .{} }, rope.head.loc); + const out = try obj.data.e_object.getOrPutObject(next, allocator); + try self.properties.push(allocator, .{ + .key = rope.head, + .value = obj, + }); + return out; + } + + const out = Expr.init(E.Object, E.Object{}, rope.head.loc); + try self.properties.push(allocator, .{ + .key = rope.head, + .value = out, + }); + return out; + } + + pub fn getOrPutArray(self: *Object, rope: *const Rope, allocator: std.mem.Allocator) SetError!Expr { + if (self.get(rope.head.data.e_string.utf8)) |existing| { + switch (existing.data) { + .e_array => |array| { + if (rope.next == null) { + return existing; + } + + if (array.items.last()) |last| { + if (last.data != .e_object) { + return error.Clobber; + } + + return try last.data.e_object.getOrPutArray(rope.next.?, allocator); + } + + return error.Clobber; + }, + .e_object => |object| { + if (rope.next == null) { + return error.Clobber; + } + + return try object.getOrPutArray(rope.next.?, allocator); + }, + else => { + return error.Clobber; + }, + } + } + + if (rope.next) |next| { + var obj = Expr.init(E.Object, E.Object{ .properties = .{} }, rope.head.loc); + const out = try obj.data.e_object.getOrPutArray(next, allocator); + try self.properties.push(allocator, .{ + .key = rope.head, + .value = obj, + }); + return out; + } + + const out = Expr.init(E.Array, E.Array{}, rope.head.loc); + try self.properties.push(allocator, .{ + .key = rope.head, + .value = out, + }); + return out; + } + + pub fn hasProperty(obj: *const Object, name: string) bool { + for (obj.properties.slice()) |prop| { + const key = prop.key orelse continue; + if (std.meta.activeTag(key.data) != .e_string) continue; + if (key.eql(string, name)) return true; + } + return false; + } + pub fn asProperty(obj: *const Object, name: string) ?Expr.Query { for (obj.properties.slice()) |prop, i| { const value = prop.value orelse continue; @@ -1843,10 +2104,51 @@ pub const Expr = struct { return false; } + pub fn toJS(this: Expr, ctx: JSC.C.JSContextRef, exception: JSC.C.ExceptionRef) JSC.C.JSValueRef { + return this.data.toJS(ctx, exception); + } + pub fn get(expr: *const Expr, name: string) ?Expr { return if (asProperty(expr, name)) |query| query.expr else null; } + pub fn getRope(self: *const Expr, rope: *const E.Object.Rope) ?E.Object.RopeQuery { + if (self.get(rope.head.data.e_string.utf8)) |existing| { + switch (existing.data) { + .e_array => |array| { + if (rope.next) |next| { + if (array.items.last()) |end| { + return end.getRope(next); + } + } + + return E.Object.RopeQuery{ + .expr = existing, + .rope = rope, + }; + }, + .e_object => { + if (rope.next) |next| { + if (existing.getRope(next)) |end| { + return end; + } + } + + return E.Object.RopeQuery{ + .expr = existing, + .rope = rope, + }; + }, + else => return E.Object.RopeQuery{ + .expr = existing, + .rope = rope, + }, + } + } + + return null; + } + // Making this comptime bloats the binary and doesn't seem to impact runtime performance. pub fn asProperty(expr: *const Expr, name: string) ?Query { if (std.meta.activeTag(expr.data) != .e_object) return null; @@ -3005,6 +3307,22 @@ pub const Expr = struct { // If it ends up in JSParser or JSPrinter, it is a bug. inline_identifier: i32, + pub fn toJS(this: Data, ctx: JSC.C.JSContextRef, exception: JSC.C.ExceptionRef) JSC.C.JSValueRef { + return switch (this) { + .e_array => |e| e.toJS(ctx, exception), + .e_null => |e| e.toJS(ctx, exception), + .e_undefined => |e| e.toJS(ctx, exception), + .e_object => |e| e.toJS(ctx, exception), + .e_boolean => |e| e.toJS(ctx, exception), + .e_number => |e| e.toJS(ctx, exception), + .e_big_int => |e| e.toJS(ctx, exception), + .e_string => |e| e.toJS(ctx, exception), + else => { + return JSC.C.JSValueMakeUndefined(ctx); + }, + }; + } + pub const Store = struct { const often = 512; const medium = 256; @@ -3933,7 +4251,6 @@ pub fn printmem(comptime format: string, args: anytype) void { pub const Macro = struct { const JavaScript = @import("./javascript/jsc/javascript.zig"); - const JSC = @import("./javascript/jsc/bindings/bindings.zig"); const JSCBase = @import("./javascript/jsc/base.zig"); const Resolver = @import("./resolver/resolver.zig").Resolver; const isPackagePath = @import("./resolver/resolver.zig").isPackagePath; diff --git a/src/js_lexer.zig b/src/js_lexer.zig index 5191d7095..45c8dd982 100644 --- a/src/js_lexer.zig +++ b/src/js_lexer.zig @@ -1574,6 +1574,7 @@ pub fn NewLexer(comptime json_options: JSONOptions) type { // this code is so hot that if you save lexer.raw() into a temporary variable // it shows up in profiling lexer.identifier = lexer.raw(); + // switching to strings.ExactSizeMatcher doesn't seem to have an impact here lexer.token = Keywords.get(lexer.identifier) orelse T.t_identifier; } else { const scan_result = try lexer.scanIdentifierWithEscapes(.normal); diff --git a/src/options.zig b/src/options.zig index 4702506e1..17da46b2f 100644 --- a/src/options.zig +++ b/src/options.zig @@ -621,7 +621,7 @@ pub const Platform = enum { }; }; -pub const Loader = enum(u3) { +pub const Loader = enum(u4) { jsx, js, ts, @@ -629,6 +629,7 @@ pub const Loader = enum(u3) { css, file, json, + toml, pub const Map = std.EnumArray(Loader, string); pub const stdin_name: Map = brk: { var map = Map.initFill(""); @@ -639,6 +640,7 @@ pub const Loader = enum(u3) { map.set(Loader.css, "input.css"); map.set(Loader.file, "input"); map.set(Loader.json, "input.json"); + map.set(Loader.toml, "input.toml"); break :brk map; }; @@ -659,7 +661,7 @@ pub const Loader = enum(u3) { if (zig_str.len == 0) return null; return fromString(zig_str.slice()) orelse { - JSC.throwInvalidArguments("invalid loader – must be js, jsx, tsx, ts, css, file, or json", .{}, global.ref(), exception); + JSC.throwInvalidArguments("invalid loader – must be js, jsx, tsx, ts, css, file, toml, or json", .{}, global.ref(), exception); return null; }; } @@ -679,6 +681,7 @@ pub const Loader = enum(u3) { LoaderMatcher.case("css") => Loader.css, LoaderMatcher.case("file") => Loader.file, LoaderMatcher.case("json") => Loader.json, + LoaderMatcher.case("toml") => Loader.toml, else => null, }; } @@ -698,6 +701,7 @@ pub const Loader = enum(u3) { .tsx => .tsx, .css => .css, .json => .json, + .toml => .toml, else => .file, }; } @@ -745,6 +749,8 @@ pub const defaultLoaders = std.ComptimeStringMap(Loader, .{ .{ ".mts", Loader.ts }, .{ ".cts", Loader.ts }, + + .{ ".toml", Loader.toml }, }); // https://webpack.js.org/guides/package-exports/#reference-syntax @@ -1019,6 +1025,7 @@ pub fn loadersFromTransformOptions(allocator: std.mem.Allocator, _loaders: ?Api. .css => Loader.css, .tsx => Loader.tsx, .json => Loader.json, + .toml => Loader.toml, else => unreachable, }; @@ -1032,13 +1039,15 @@ pub fn loadersFromTransformOptions(allocator: std.mem.Allocator, _loaders: ?Api. loader_values, ); const default_loader_ext = comptime [_]string{ - ".jsx", ".json", - ".js", ".mjs", - ".cjs", ".css", + ".jsx", ".json", + ".js", ".mjs", + ".cjs", ".css", // https://devblogs.microsoft.com/typescript/announcing-typescript-4-5-beta/#new-file-extensions - ".ts", ".tsx", - ".mts", ".cts", + ".ts", ".tsx", + ".mts", ".cts", + + ".toml", }; inline for (default_loader_ext) |ext| { diff --git a/src/toml/toml_lexer.zig b/src/toml/toml_lexer.zig new file mode 100644 index 000000000..35c457a86 --- /dev/null +++ b/src/toml/toml_lexer.zig @@ -0,0 +1,1211 @@ +const std = @import("std"); +const logger = @import("../logger.zig"); +const js_ast = @import("../js_ast.zig"); + +const _global = @import("../global.zig"); +const string = _global.string; +const Output = _global.Output; +const Global = _global.Global; +const Environment = _global.Environment; +const strings = _global.strings; +const CodePoint = _global.CodePoint; +const MutableString = _global.MutableString; +const stringZ = _global.stringZ; +const default_allocator = _global.default_allocator; + +pub const T = enum { + t_end_of_file, + + t_open_paren, + t_close_paren, + t_open_bracket, + t_open_bracket_double, + + t_close_bracket, + t_close_bracket_double, + + t_open_brace, + t_close_brace, + + t_numeric_literal, + + t_comma, + + t_string_literal, + t_dot, + + t_equal, + + t_true, + t_false, + + t_colon, + + t_identifier, + + t_plus, + t_minus, +}; + +pub const Lexer = struct { + source: logger.Source, + log: *logger.Log, + start: usize = 0, + end: usize = 0, + current: usize = 0, + + allocator: std.mem.Allocator, + + code_point: CodePoint = -1, + identifier: []const u8 = "", + number: f64 = 0.0, + prev_error_loc: logger.Loc = logger.Loc.Empty, + string_literal_slice: string = "", + line_number: u32 = 0, + token: T = T.t_end_of_file, + allow_double_bracket: bool = true, + + has_newline_before: bool = false, + + pub inline fn loc(self: *const Lexer) logger.Loc { + return logger.usize2Loc(self.start); + } + + pub fn syntaxError(self: *Lexer) !void { + @setCold(true); + + self.addError(self.start, "Syntax Error!!", .{}, true); + return Error.SyntaxError; + } + + pub fn addError(self: *Lexer, _loc: usize, comptime format: []const u8, args: anytype, _: bool) void { + @setCold(true); + + var __loc = logger.usize2Loc(_loc); + if (__loc.eql(self.prev_error_loc)) { + return; + } + + self.log.addErrorFmt(&self.source, __loc, self.log.msgs.allocator, format, args) catch unreachable; + self.prev_error_loc = __loc; + } + + pub fn addDefaultError(self: *Lexer, msg: []const u8) !void { + @setCold(true); + + self.addError(self.start, "{s}", .{msg}, true); + return Error.SyntaxError; + } + + pub fn addSyntaxError(self: *Lexer, _loc: usize, comptime fmt: []const u8, args: anytype) !void { + @setCold(true); + self.addError(_loc, fmt, args, false); + return Error.SyntaxError; + } + + pub fn addRangeError(self: *Lexer, r: logger.Range, comptime format: []const u8, args: anytype, _: bool) !void { + @setCold(true); + + if (self.prev_error_loc.eql(r.loc)) { + return; + } + + const errorMessage = std.fmt.allocPrint(self.log.msgs.allocator, format, args) catch unreachable; + try self.log.addRangeError(&self.source, r, errorMessage); + self.prev_error_loc = r.loc; + + // if (panic) { + // return Error.ParserError; + // } + } + + /// Look ahead at the next n codepoints without advancing the iterator. + /// If fewer than n codepoints are available, then return the remainder of the string. + fn peek(it: *Lexer, n: usize) string { + const original_i = it.current; + defer it.current = original_i; + + var end_ix = original_i; + var found: usize = 0; + while (found < n) : (found += 1) { + const next_codepoint = it.nextCodepointSlice(); + if (next_codepoint.len == 0) break; + end_ix += next_codepoint.len; + } + + return it.source.contents[original_i..end_ix]; + } + + inline fn nextCodepointSlice(it: *Lexer) []const u8 { + const cp_len = strings.wtf8ByteSequenceLength(it.source.contents.ptr[it.current]); + return if (!(cp_len + it.current > it.source.contents.len)) it.source.contents[it.current .. cp_len + it.current] else ""; + } + + inline fn nextCodepoint(it: *Lexer) CodePoint { + const cp_len = strings.wtf8ByteSequenceLength(it.source.contents.ptr[it.current]); + const slice = if (!(cp_len + it.current > it.source.contents.len)) it.source.contents[it.current .. cp_len + it.current] else ""; + + const code_point = switch (slice.len) { + 0 => -1, + 1 => @as(CodePoint, slice[0]), + else => strings.decodeWTF8RuneTMultibyte(slice.ptr[0..4], @intCast(u3, slice.len), CodePoint, strings.unicode_replacement), + }; + + it.end = it.current; + + it.current += if (code_point != strings.unicode_replacement) + cp_len + else + 1; + + return code_point; + } + + inline fn step(lexer: *Lexer) void { + lexer.code_point = lexer.nextCodepoint(); + + lexer.line_number += @as(u32, @boolToInt(lexer.code_point == '\n')); + } + + pub const Error = error{ + UTF8Fail, + OutOfMemory, + SyntaxError, + UnexpectedSyntax, + JSONStringsMustUseDoubleQuotes, + ParserError, + }; + + fn parseNumericLiteralOrDot(lexer: *Lexer) !void { + // Number or dot; + var first = lexer.code_point; + const start = lexer.current; + lexer.step(); + + // Dot without a digit after it; + if (first == '.' and (lexer.code_point < '0' or lexer.code_point > '9')) { + + // "." + lexer.token = T.t_dot; + return; + } + + var underscoreCount: usize = 0; + var lastUnderscoreEnd: usize = 0; + var hasDotOrExponent = first == '.'; + var base: f32 = 0.0; + + var is_legacy_octal_literal = false; + + // Assume this is a number, but potentially change to a date/time later; + lexer.token = T.t_numeric_literal; + + // Check for binary, octal, or hexadecimal literal; + if (first == '0') { + switch (lexer.code_point) { + 'b', 'B' => { + base = 2; + }, + + 'o', 'O' => { + base = 8; + }, + + 'x', 'X' => { + base = 16; + }, + + '0'...'7', '_' => { + base = 8; + is_legacy_octal_literal = true; + }, + else => {}, + } + } + + if (base != 0) { + // Integer literal; + var isFirst = true; + var isInvalidLegacyOctalLiteral = false; + lexer.number = 0; + if (!is_legacy_octal_literal) { + lexer.step(); + } + + integerLiteral: while (true) { + switch (lexer.code_point) { + '_' => { + // Cannot have multiple underscores in a row; + if (lastUnderscoreEnd > 0 and lexer.end == lastUnderscoreEnd + 1) { + try lexer.syntaxError(); + } + + // The first digit must exist; + if (isFirst or is_legacy_octal_literal) { + try lexer.syntaxError(); + } + + lastUnderscoreEnd = lexer.end; + underscoreCount += 1; + }, + + '0', '1' => { + lexer.number = lexer.number * base + float64(lexer.code_point - '0'); + }, + + '2', '3', '4', '5', '6', '7' => { + if (base == 2) { + try lexer.syntaxError(); + } + lexer.number = lexer.number * base + float64(lexer.code_point - '0'); + }, + '8', '9' => { + if (is_legacy_octal_literal) { + isInvalidLegacyOctalLiteral = true; + } else if (base < 10) { + try lexer.syntaxError(); + } + lexer.number = lexer.number * base + float64(lexer.code_point - '0'); + }, + 'A', 'B', 'C', 'D', 'E', 'F' => { + if (base != 16) { + try lexer.syntaxError(); + } + lexer.number = lexer.number * base + float64(lexer.code_point + 10 - 'A'); + }, + + 'a', 'b', 'c', 'd', 'e', 'f' => { + if (base != 16) { + try lexer.syntaxError(); + } + lexer.number = lexer.number * base + float64(lexer.code_point + 10 - 'a'); + }, + else => { + // The first digit must exist; + if (isFirst) { + try lexer.syntaxError(); + } + + break :integerLiteral; + }, + } + + lexer.step(); + isFirst = false; + } + + var isBigIntegerLiteral = lexer.code_point == 'n' and !hasDotOrExponent; + + // Slow path: do we need to re-scan the input as text? + if (isBigIntegerLiteral or isInvalidLegacyOctalLiteral) { + var text = lexer.raw(); + + // Can't use a leading zero for bigint literals; + if (isBigIntegerLiteral and is_legacy_octal_literal) { + try lexer.syntaxError(); + } + + // Filter out underscores; + if (underscoreCount > 0) { + var bytes = lexer.allocator.alloc(u8, text.len - underscoreCount) catch unreachable; + var i: usize = 0; + for (text) |char| { + if (char != '_') { + bytes[i] = char; + i += 1; + } + } + } + + // Store bigints as text to avoid precision loss; + if (isBigIntegerLiteral) { + lexer.identifier = text; + } else if (isInvalidLegacyOctalLiteral) { + if (std.fmt.parseFloat(f64, text)) |num| { + lexer.number = num; + } else |_| { + try lexer.addSyntaxError(lexer.start, "Invalid number {s}", .{text}); + } + } + } + } else { + // Floating-point literal; + var isInvalidLegacyOctalLiteral = first == '0' and (lexer.code_point == '8' or lexer.code_point == '9'); + + // Initial digits; + while (true) { + if (lexer.code_point < '0' or lexer.code_point > '9') { + switch (lexer.code_point) { + // '-' => { + // if (lexer.raw().len == 5) { + // // Is this possibly a datetime literal that begins with a 4 digit year? + // lexer.step(); + // while (!lexer.has_newline_before) { + // switch (lexer.code_point) { + // ',' => { + // lexer.string_literal_slice = lexer.raw(); + // lexer.token = T.t_string_literal; + // break; + // }, + // } + // } + // } + // }, + '_' => {}, + else => break, + } + if (lexer.code_point != '_') { + break; + } + + // Cannot have multiple underscores in a row; + if (lastUnderscoreEnd > 0 and lexer.end == lastUnderscoreEnd + 1) { + try lexer.syntaxError(); + } + + // The specification forbids underscores in this case; + if (isInvalidLegacyOctalLiteral) { + try lexer.syntaxError(); + } + + lastUnderscoreEnd = lexer.end; + underscoreCount += 1; + } + lexer.step(); + } + + // Fractional digits; + if (first != '.' and lexer.code_point == '.') { + // An underscore must not come last; + if (lastUnderscoreEnd > 0 and lexer.end == lastUnderscoreEnd + 1) { + lexer.end -= 1; + try lexer.syntaxError(); + } + + hasDotOrExponent = true; + lexer.step(); + if (lexer.code_point == '_') { + try lexer.syntaxError(); + } + while (true) { + if (lexer.code_point < '0' or lexer.code_point > '9') { + if (lexer.code_point != '_') { + break; + } + + // Cannot have multiple underscores in a row; + if (lastUnderscoreEnd > 0 and lexer.end == lastUnderscoreEnd + 1) { + try lexer.syntaxError(); + } + + lastUnderscoreEnd = lexer.end; + underscoreCount += 1; + } + lexer.step(); + } + } + + // Exponent; + if (lexer.code_point == 'e' or lexer.code_point == 'E') { + // An underscore must not come last; + if (lastUnderscoreEnd > 0 and lexer.end == lastUnderscoreEnd + 1) { + lexer.end -= 1; + try lexer.syntaxError(); + } + + hasDotOrExponent = true; + lexer.step(); + if (lexer.code_point == '+' or lexer.code_point == '-') { + lexer.step(); + } + if (lexer.code_point < '0' or lexer.code_point > '9') { + try lexer.syntaxError(); + } + while (true) { + if (lexer.code_point < '0' or lexer.code_point > '9') { + if (lexer.code_point != '_') { + break; + } + + // Cannot have multiple underscores in a row; + if (lastUnderscoreEnd > 0 and lexer.end == lastUnderscoreEnd + 1) { + try lexer.syntaxError(); + } + + lastUnderscoreEnd = lexer.end; + underscoreCount += 1; + } + lexer.step(); + } + } + + // Take a slice of the text to parse; + var text = lexer.raw(); + + // Filter out underscores; + if (underscoreCount > 0) { + var i: usize = 0; + if (lexer.allocator.alloc(u8, text.len - underscoreCount)) |bytes| { + for (text) |char| { + if (char != '_') { + bytes[i] = char; + i += 1; + } + } + text = bytes; + } else |_| { + try lexer.addSyntaxError(lexer.start, "Out of Memory Wah Wah Wah", .{}); + return; + } + } + + if (!hasDotOrExponent and lexer.end - lexer.start < 10) { + // Parse a 32-bit integer (very fast path); + var number: u32 = 0; + for (text) |c| { + number = number * 10 + @intCast(u32, c - '0'); + } + lexer.number = @intToFloat(f64, number); + } else { + // Parse a double-precision floating-point number; + if (std.fmt.parseFloat(f64, text)) |num| { + lexer.number = num; + } else |_| { + try lexer.addSyntaxError(lexer.start, "Invalid number", .{}); + } + } + } + + // if it's a space, it might be a date timestamp + if (isIdentifierPart(lexer.code_point) or lexer.code_point == ' ') {} + } + + pub inline fn expect(self: *Lexer, comptime token: T) !void { + if (self.token != token) { + try self.expected(token); + } + + try self.next(); + } + + pub inline fn expectAssignment(self: *Lexer) !void { + switch (self.token) { + .t_equal, .t_colon => {}, + else => { + try self.expected(T.t_equal); + }, + } + + try self.next(); + } + + pub fn next(lexer: *Lexer) !void { + lexer.has_newline_before = lexer.end == 0; + + while (true) { + lexer.start = lexer.end; + lexer.token = T.t_end_of_file; + + switch (lexer.code_point) { + -1 => { + lexer.token = T.t_end_of_file; + }, + + '\r', '\n', 0x2028, 0x2029 => { + lexer.step(); + lexer.has_newline_before = true; + continue; + }, + + '\t', ' ' => { + lexer.step(); + continue; + }, + + '[' => { + lexer.step(); + lexer.token = T.t_open_bracket; + if (lexer.code_point == '[' and lexer.allow_double_bracket) { + lexer.step(); + lexer.token = T.t_open_bracket_double; + } + }, + ']' => { + lexer.step(); + lexer.token = T.t_close_bracket; + + if (lexer.code_point == ']' and lexer.allow_double_bracket) { + lexer.step(); + lexer.token = T.t_close_bracket_double; + } + }, + '+' => { + lexer.step(); + lexer.token = T.t_plus; + }, + '-' => { + lexer.step(); + lexer.token = T.t_minus; + }, + + '{' => { + lexer.step(); + lexer.token = T.t_open_brace; + }, + '}' => { + lexer.step(); + lexer.token = T.t_close_brace; + }, + + '=' => { + lexer.step(); + lexer.token = T.t_equal; + }, + ':' => { + lexer.step(); + lexer.token = T.t_colon; + }, + ',' => { + lexer.step(); + lexer.token = T.t_comma; + }, + ';' => { + if (lexer.has_newline_before) { + lexer.step(); + + singleLineComment: while (true) { + lexer.step(); + switch (lexer.code_point) { + '\r', '\n', 0x2028, 0x2029 => { + break :singleLineComment; + }, + -1 => { + break :singleLineComment; + }, + else => {}, + } + } + continue; + } + + try lexer.addDefaultError("Unexpected semicolon"); + }, + '#' => { + lexer.step(); + + singleLineComment: while (true) { + lexer.step(); + switch (lexer.code_point) { + '\r', '\n', 0x2028, 0x2029 => { + break :singleLineComment; + }, + -1 => { + break :singleLineComment; + }, + else => {}, + } + } + continue; + }, + + // unescaped string + '\'' => { + lexer.step(); + const start = lexer.end; + var is_multiline_string_literal = false; + + if (lexer.code_point == '\'') { + lexer.step(); + // it's a multiline string literal + if (lexer.code_point == '\'') { + lexer.step(); + is_multiline_string_literal = true; + } else { + // it's an empty string + lexer.token = T.t_string_literal; + lexer.string_literal_slice = lexer.source.contents[start..start]; + return; + } + } + + if (is_multiline_string_literal) { + while (true) { + switch (lexer.code_point) { + -1 => { + try lexer.addDefaultError("Unterminated string literal"); + }, + '\'' => { + const end = lexer.end; + lexer.step(); + if (lexer.code_point != '\'') continue; + lexer.step(); + if (lexer.code_point != '\'') continue; + lexer.step(); + lexer.token = T.t_string_literal; + lexer.string_literal_slice = lexer.source.contents[start + 2 .. end]; + return; + }, + else => {}, + } + lexer.step(); + } + } else { + while (true) { + switch (lexer.code_point) { + '\r', '\n', 0x2028, 0x2029 => { + try lexer.addDefaultError("Unterminated string literal (single-line)"); + }, + -1 => { + try lexer.addDefaultError("Unterminated string literal"); + }, + '\'' => { + lexer.step(); + lexer.token = T.t_string_literal; + lexer.string_literal_slice = lexer.source.contents[start .. lexer.end - 1]; + return; + }, + else => {}, + } + lexer.step(); + } + } + }, + '"' => { + lexer.step(); + var needs_slow_pass = false; + const start = lexer.end; + var is_multiline_string_literal = false; + + if (lexer.code_point == '"') { + lexer.step(); + // it's a multiline basic string + if (lexer.code_point == '"') { + lexer.step(); + is_multiline_string_literal = true; + } else { + // it's an empty string + lexer.token = T.t_string_literal; + lexer.string_literal_slice = lexer.source.contents[start..start]; + return; + } + } + + if (is_multiline_string_literal) { + while (true) { + switch (lexer.code_point) { + -1 => { + try lexer.addDefaultError("Unterminated basic string"); + }, + '\\' => { + lexer.step(); + needs_slow_pass = true; + if (lexer.code_point == '"') { + lexer.step(); + continue; + } + }, + '"' => { + const end = lexer.end; + lexer.step(); + if (lexer.code_point != '"') continue; + lexer.step(); + if (lexer.code_point != '"') continue; + lexer.step(); + + lexer.token = T.t_string_literal; + lexer.string_literal_slice = lexer.source.contents[start + 2 .. end]; + if (needs_slow_pass) break; + return; + }, + else => {}, + } + lexer.step(); + } + } else { + while (true) { + switch (lexer.code_point) { + '\r', '\n', 0x2028, 0x2029 => { + try lexer.addDefaultError("Unterminated basic string (single-line)"); + }, + -1 => { + try lexer.addDefaultError("Unterminated basic string"); + }, + '\\' => { + lexer.step(); + needs_slow_pass = true; + if (lexer.code_point == '"') { + lexer.step(); + continue; + } + }, + '"' => { + lexer.step(); + + lexer.token = T.t_string_literal; + lexer.string_literal_slice = lexer.source.contents[start .. lexer.end - 1]; + if (needs_slow_pass) break; + return; + }, + else => {}, + } + lexer.step(); + } + } + + lexer.start = start; + if (needs_slow_pass) { + const text = lexer.string_literal_slice; + var array_list = try std.ArrayList(u8).initCapacity(lexer.allocator, text.len); + if (is_multiline_string_literal) { + try lexer.decodeEscapeSequences(start, text, true, @TypeOf(array_list), &array_list); + } else { + try lexer.decodeEscapeSequences(start, text, false, @TypeOf(array_list), &array_list); + } + lexer.string_literal_slice = array_list.toOwnedSlice(); + } + + lexer.token = T.t_string_literal; + }, + + '.', '0'...'9' => { + try lexer.parseNumericLiteralOrDot(); + }, + + 'a'...'z', 'A'...'Z', '$', '_' => { + lexer.step(); + while (isIdentifierPart(lexer.code_point)) { + lexer.step(); + } + lexer.identifier = lexer.raw(); + lexer.token = switch (lexer.identifier.len) { + 4 => if (strings.eqlComptimeIgnoreLen(lexer.identifier, "true")) T.t_true else T.t_identifier, + 5 => if (strings.eqlComptimeIgnoreLen(lexer.identifier, "false")) T.t_false else T.t_identifier, + else => T.t_identifier, + }; + }, + + else => try lexer.unexpected(), + } + return; + } + } + + fn decodeEscapeSequences(lexer: *Lexer, start: usize, text: string, comptime allow_multiline: bool, comptime BufType: type, buf_: *BufType) !void { + var buf = buf_.*; + defer buf_.* = buf; + + const iterator = strings.CodepointIterator{ .bytes = text[start..], .i = 0 }; + var iter = strings.CodepointIterator.Cursor{}; + while (iterator.next(&iter)) { + const width = iter.width; + switch (iter.c) { + '\r' => { + + // Convert '\r\n' into '\n' + if (iter.i < text.len and text[iter.i] == '\n') { + iter.i += 1; + } + + // Convert '\r' into '\n' + buf.append('\n') catch unreachable; + continue; + }, + + '\\' => { + _ = iterator.next(&iter) or return; + + const c2 = iter.c; + + const width2 = iter.width; + switch (c2) { + // https://mathiasbynens.be/notes/javascript-escapes#single + 'b' => { + buf.append(8) catch unreachable; + continue; + }, + 'f' => { + buf.append(9) catch unreachable; + continue; + }, + 'n' => { + buf.append(10) catch unreachable; + continue; + }, + 'v' => { + // Vertical tab is invalid JSON + // We're going to allow it. + // if (comptime is_json) { + // lexer.end = start + iter.i - width2; + // try lexer.syntaxError(); + // } + buf.append(11) catch unreachable; + continue; + }, + 't' => { + buf.append(12) catch unreachable; + continue; + }, + 'r' => { + buf.append(13) catch unreachable; + continue; + }, + + // legacy octal literals + '0'...'7' => { + const octal_start = (iter.i + width2) - 2; + + // 1-3 digit octal + var is_bad = false; + var value: i64 = c2 - '0'; + var restore = iter; + + _ = iterator.next(&iter) or { + if (value == 0) { + try buf.append(0); + return; + } + + try lexer.syntaxError(); + return; + }; + + const c3: CodePoint = iter.c; + + switch (c3) { + '0'...'7' => { + value = value * 8 + c3 - '0'; + restore = iter; + _ = iterator.next(&iter) or return lexer.syntaxError(); + + const c4 = iter.c; + switch (c4) { + '0'...'7' => { + const temp = value * 8 + c4 - '0'; + if (temp < 256) { + value = temp; + } else { + iter = restore; + } + }, + '8', '9' => { + is_bad = true; + }, + else => { + iter = restore; + }, + } + }, + '8', '9' => { + is_bad = true; + }, + else => { + iter = restore; + }, + } + + iter.c = @intCast(i32, value); + if (is_bad) { + lexer.addRangeError( + logger.Range{ .loc = .{ .start = @intCast(i32, octal_start) }, .len = @intCast(i32, iter.i - octal_start) }, + "Invalid legacy octal literal", + .{}, + false, + ) catch unreachable; + } + }, + '8', '9' => { + iter.c = c2; + }, + // 2-digit hexadecimal + 'x' => { + if (comptime allow_multiline) { + lexer.end = start + iter.i - width2; + try lexer.syntaxError(); + } + + var value: CodePoint = 0; + var c3: CodePoint = 0; + var width3: u3 = 0; + + _ = iterator.next(&iter) or return lexer.syntaxError(); + c3 = iter.c; + width3 = iter.width; + switch (c3) { + '0'...'9' => { + value = value * 16 | (c3 - '0'); + }, + 'a'...'f' => { + value = value * 16 | (c3 + 10 - 'a'); + }, + 'A'...'F' => { + value = value * 16 | (c3 + 10 - 'A'); + }, + else => { + lexer.end = start + iter.i - width3; + return lexer.syntaxError(); + }, + } + + _ = iterator.next(&iter) or return lexer.syntaxError(); + c3 = iter.c; + width3 = iter.width; + switch (c3) { + '0'...'9' => { + value = value * 16 | (c3 - '0'); + }, + 'a'...'f' => { + value = value * 16 | (c3 + 10 - 'a'); + }, + 'A'...'F' => { + value = value * 16 | (c3 + 10 - 'A'); + }, + else => { + lexer.end = start + iter.i - width3; + return lexer.syntaxError(); + }, + } + + iter.c = value; + }, + 'u' => { + // We're going to make this an i64 so we don't risk integer overflows + // when people do weird things + var value: i64 = 0; + + _ = iterator.next(&iter) or return lexer.syntaxError(); + var c3 = iter.c; + var width3 = iter.width; + + // variable-length + if (c3 == '{') { + const hex_start = iter.i - width - width2 - width3; + var is_first = true; + var is_out_of_range = false; + variableLength: while (true) { + _ = iterator.next(&iter) or break :variableLength; + c3 = iter.c; + + switch (c3) { + '0'...'9' => { + value = value * 16 | (c3 - '0'); + }, + 'a'...'f' => { + value = value * 16 | (c3 + 10 - 'a'); + }, + 'A'...'F' => { + value = value * 16 | (c3 + 10 - 'A'); + }, + '}' => { + if (is_first) { + lexer.end = start + iter.i - width3; + return lexer.syntaxError(); + } + break :variableLength; + }, + else => { + lexer.end = start + iter.i - width3; + return lexer.syntaxError(); + }, + } + + // '\U0010FFFF + // copied from golang utf8.MaxRune + if (value > 1114111) { + is_out_of_range = true; + } + is_first = false; + } + + if (is_out_of_range) { + try lexer.addRangeError( + .{ .loc = .{ .start = @intCast(i32, start + hex_start) }, .len = @intCast(i32, (iter.i - hex_start)) }, + "Unicode escape sequence is out of range", + .{}, + true, + ); + return; + } + + // fixed-length + } else { + // Fixed-length + // comptime var j: usize = 0; + var j: usize = 0; + while (j < 4) : (j += 1) { + switch (c3) { + '0'...'9' => { + value = value * 16 | (c3 - '0'); + }, + 'a'...'f' => { + value = value * 16 | (c3 + 10 - 'a'); + }, + 'A'...'F' => { + value = value * 16 | (c3 + 10 - 'A'); + }, + else => { + lexer.end = start + iter.i - width3; + return lexer.syntaxError(); + }, + } + + if (j < 3) { + _ = iterator.next(&iter) or return lexer.syntaxError(); + c3 = iter.c; + + width3 = iter.width; + } + } + } + + iter.c = @truncate(CodePoint, value); + }, + '\r' => { + if (comptime !allow_multiline) { + lexer.end = start + iter.i - width2; + try lexer.addDefaultError("Unexpected end of line"); + } + + // Ignore line continuations. A line continuation is not an escaped newline. + if (iter.i < text.len and text[iter.i + 1] == '\n') { + // Make sure Windows CRLF counts as a single newline + iter.i += 1; + } + continue; + }, + '\n', 0x2028, 0x2029 => { + // Ignore line continuations. A line continuation is not an escaped newline. + if (comptime !allow_multiline) { + lexer.end = start + iter.i - width2; + try lexer.addDefaultError("Unexpected end of line"); + } + continue; + }, + else => { + iter.c = c2; + }, + } + }, + else => {}, + } + + switch (iter.c) { + -1 => return try lexer.addDefaultError("Unexpected end of file"), + 0...127 => { + buf.append(@intCast(u8, iter.c)) catch unreachable; + }, + else => { + var part: [4]u8 = undefined; + const len = strings.encodeWTF8Rune(&part, iter.c); + try buf.appendSlice(part[0..len]); + }, + } + } + } + + pub fn expected(self: *Lexer, token: T) !void { + try self.expectedString(std.mem.span(@tagName(token))); + } + + pub fn unexpected(lexer: *Lexer) !void { + const found = finder: { + lexer.start = std.math.min(lexer.start, lexer.end); + + if (lexer.start == lexer.source.contents.len) { + break :finder "end of file"; + } else { + break :finder lexer.raw(); + } + }; + + try lexer.addRangeError(lexer.range(), "Unexpected {s}", .{found}, true); + } + + pub fn expectedString(self: *Lexer, text: string) !void { + const found = finder: { + if (self.source.contents.len != self.start) { + break :finder self.raw(); + } else { + break :finder "end of file"; + } + }; + + try self.addRangeError(self.range(), "Expected {s} but found {s}", .{ text, found }, true); + } + + pub fn range(self: *Lexer) logger.Range { + return logger.Range{ + .loc = logger.usize2Loc(self.start), + .len = std.math.lossyCast(i32, self.end - self.start), + }; + } + + pub fn init(log: *logger.Log, source: logger.Source, allocator: std.mem.Allocator) !Lexer { + var lex = Lexer{ + .log = log, + .source = source, + .prev_error_loc = logger.Loc.Empty, + .allocator = allocator, + }; + lex.step(); + try lex.next(); + + return lex; + } + + pub inline fn toEString(lexer: *Lexer) js_ast.E.String { + return js_ast.E.String{ .utf8 = lexer.string_literal_slice }; + } + + pub fn raw(self: *Lexer) []const u8 { + return self.source.contents[self.start..self.end]; + } +}; + +pub fn isIdentifierPart(code_point: CodePoint) bool { + return switch (code_point) { + '0'...'9', + 'a'...'z', + 'A'...'Z', + '$', + '_', + '-', + => true, + else => false, + }; +} + +pub fn isLatin1Identifier(comptime Buffer: type, name: Buffer) bool { + if (name.len == 0) return false; + + switch (name[0]) { + 'a'...'z', + 'A'...'Z', + '$', + '1'...'9', + '_', + '-', + => {}, + else => return false, + } + + if (name.len > 0) { + for (name[1..]) |c| { + switch (c) { + '0'...'9', + 'a'...'z', + 'A'...'Z', + '$', + '_', + '-', + => {}, + else => return false, + } + } + } + + return true; +} + +inline fn float64(num: anytype) f64 { + return @intToFloat(f64, num); +} diff --git a/src/toml/toml_parser.zig b/src/toml/toml_parser.zig new file mode 100644 index 000000000..9fb01882f --- /dev/null +++ b/src/toml/toml_parser.zig @@ -0,0 +1,396 @@ +const std = @import("std"); +const logger = @import("../logger.zig"); +const toml_lexer = @import("./toml_lexer.zig"); +const Lexer = toml_lexer.Lexer; +const importRecord = @import("../import_record.zig"); +const js_ast = @import("../js_ast.zig"); +const options = @import("../options.zig"); + +const fs = @import("../fs.zig"); +const _global = @import("../global.zig"); +const string = _global.string; +const Output = _global.Output; +const Global = _global.Global; +const Environment = _global.Environment; +const strings = _global.strings; +const MutableString = _global.MutableString; +const stringZ = _global.stringZ; +const default_allocator = _global.default_allocator; +const C = _global.C; +const expect = std.testing.expect; +const ImportKind = importRecord.ImportKind; +const BindingNodeIndex = js_ast.BindingNodeIndex; + +const StmtNodeIndex = js_ast.StmtNodeIndex; +const ExprNodeIndex = js_ast.ExprNodeIndex; +const ExprNodeList = js_ast.ExprNodeList; +const StmtNodeList = js_ast.StmtNodeList; +const BindingNodeList = js_ast.BindingNodeList; +const assert = std.debug.assert; + +const LocRef = js_ast.LocRef; +const S = js_ast.S; +const B = js_ast.B; +const G = js_ast.G; +const T = toml_lexer.T; +const E = js_ast.E; +const Stmt = js_ast.Stmt; +const Expr = js_ast.Expr; +const Binding = js_ast.Binding; +const Symbol = js_ast.Symbol; +const Level = js_ast.Op.Level; +const Op = js_ast.Op; +const Scope = js_ast.Scope; +const locModuleScope = logger.Loc.Empty; + +const LEXER_DEBUGGER_WORKAROUND = false; +const IdentityContext = @import("../identity_context.zig").IdentityContext; + +const HashMapPool = struct { + const HashMap = std.HashMap(u64, void, IdentityContext, 80); + const LinkedList = std.SinglyLinkedList(HashMap); + threadlocal var list: LinkedList = undefined; + threadlocal var loaded: bool = false; + + pub fn get(_: std.mem.Allocator) *LinkedList.Node { + if (loaded) { + if (list.popFirst()) |node| { + node.data.clearRetainingCapacity(); + return node; + } + } + + var new_node = default_allocator.create(LinkedList.Node) catch unreachable; + new_node.* = LinkedList.Node{ .data = HashMap.initContext(default_allocator, IdentityContext{}) }; + return new_node; + } + + pub fn release(node: *LinkedList.Node) void { + if (loaded) { + list.prepend(node); + return; + } + + list = LinkedList{ .first = node }; + loaded = true; + } +}; + +pub const TOML = struct { + lexer: Lexer, + log: *logger.Log, + allocator: std.mem.Allocator, + + pub fn init(allocator: std.mem.Allocator, source_: logger.Source, log: *logger.Log) !TOML { + return TOML{ + .lexer = try Lexer.init(log, source_, allocator), + .allocator = allocator, + .log = log, + }; + } + + pub inline fn source(p: *const TOML) *const logger.Source { + return &p.lexer.source; + } + + pub fn e(_: *TOML, t: anytype, loc: logger.Loc) Expr { + const Type = @TypeOf(t); + if (@typeInfo(Type) == .Pointer) { + return Expr.init(std.meta.Child(Type), t.*, loc); + } else { + return Expr.init(Type, t, loc); + } + } + + const Rope = js_ast.E.Object.Rope; + + pub fn parseKeySegment(p: *TOML) anyerror!?Expr { + const loc = p.lexer.loc(); + + switch (p.lexer.token) { + .t_string_literal => { + const str = p.lexer.toEString(); + try p.lexer.next(); + return p.e(str, loc); + }, + .t_identifier => { + const str = E.String{ .utf8 = p.lexer.identifier }; + try p.lexer.next(); + return p.e(str, loc); + }, + .t_false => { + try p.lexer.next(); + return p.e( + E.String{ + .utf8 = "false", + }, + loc, + ); + }, + .t_true => { + try p.lexer.next(); + return p.e( + E.String{ + .utf8 = "true", + }, + loc, + ); + }, + // what we see as a number here could actually be a string + .t_numeric_literal => { + const literal = p.lexer.raw(); + try p.lexer.next(); + return p.e(E.String{ .utf8 = literal }, loc); + }, + + else => return null, + } + } + + pub fn parseKey(p: *TOML, allocator: std.mem.Allocator) anyerror!*Rope { + var rope = try allocator.create(Rope); + var head = rope; + rope.* = .{ + .head = (try p.parseKeySegment()) orelse { + try p.lexer.expectedString("key"); + return error.SyntaxError; + }, + .next = null, + }; + while (p.lexer.token == .t_dot) { + try p.lexer.next(); + + rope = try rope.append((try p.parseKeySegment()) orelse break, allocator); + } + + return head; + } + + pub fn parse(source_: *const logger.Source, log: *logger.Log, allocator: std.mem.Allocator) !Expr { + switch (source_.contents.len) { + // This is to be consisntent with how disabled JS files are handled + 0 => { + return Expr{ .loc = logger.Loc{ .start = 0 }, .data = Expr.init(E.Object, E.Object{}, logger.Loc.Empty).data }; + }, + else => {}, + } + + var parser = try TOML.init(allocator, source_.*, log); + + return try parser.runParser(); + } + + fn runParser(p: *TOML) anyerror!Expr { + var root = p.e(E.Object{}, p.lexer.loc()); + var head = root.data.e_object; + + var stack = std.heap.stackFallback(@sizeOf(Rope) * 6, p.allocator); + var key_allocator = stack.get(); + + while (true) { + const loc = p.lexer.loc(); + switch (p.lexer.token) { + .t_end_of_file => { + return root; + }, + // child table + .t_open_bracket => { + try p.lexer.next(); + var key = try p.parseKey(key_allocator); + + try p.lexer.expect(.t_close_bracket); + if (!p.lexer.has_newline_before) { + try p.lexer.expectedString("line break"); + } + + var parent_object = root.data.e_object.getOrPutObject(key, p.allocator) catch |err| { + switch (err) { + error.Clobber => { + try p.lexer.addDefaultError("Table already defined"); + return error.SyntaxError; + }, + else => return err, + } + }; + head = parent_object.data.e_object; + stack.fixed_buffer_allocator.reset(); + }, + // child table array + .t_open_bracket_double => { + try p.lexer.next(); + var key = try p.parseKey(key_allocator); + + try p.lexer.expect(.t_close_bracket); + if (!p.lexer.has_newline_before) { + try p.lexer.expectedString("line break"); + } + + var array = root.data.e_object.getOrPutArray(key, p.allocator) catch |err| { + switch (err) { + error.Clobber => { + try p.lexer.addDefaultError("Cannot overwrite table array"); + return error.SyntaxError; + }, + else => return err, + } + }; + var new_head = p.e(E.Object{}, loc); + try array.data.e_array.push(p.allocator, new_head); + head = new_head.data.e_object; + stack.fixed_buffer_allocator.reset(); + }, + else => { + try p.parseAssignment(head, key_allocator); + stack.fixed_buffer_allocator.reset(); + }, + } + } + } + + pub fn parseAssignment(p: *TOML, obj: *E.Object, allocator: std.mem.Allocator) anyerror!void { + p.lexer.allow_double_bracket = false; + var rope = try p.parseKey(allocator); + try p.lexer.expectAssignment(); + + obj.setRope(rope, p.allocator, try p.parseValue()) catch |err| { + switch (err) { + error.Clobber => { + try p.lexer.addDefaultError("Cannot redefine key"); + return error.SyntaxError; + }, + else => return err, + } + }; + p.lexer.allow_double_bracket = true; + } + + pub fn parseValue(p: *TOML) anyerror!Expr { + const loc = p.lexer.loc(); + + switch (p.lexer.token) { + .t_false => { + try p.lexer.next(); + return p.e(E.Boolean{ + .value = false, + }, loc); + }, + .t_true => { + try p.lexer.next(); + return p.e(E.Boolean{ + .value = true, + }, loc); + }, + .t_string_literal => { + var str: E.String = p.lexer.toEString(); + + try p.lexer.next(); + return p.e(str, loc); + }, + .t_identifier => { + var str: E.String = E.String{ .utf8 = p.lexer.identifier }; + + try p.lexer.next(); + return p.e(str, loc); + }, + .t_numeric_literal => { + const value = p.lexer.number; + try p.lexer.next(); + return p.e(E.Number{ .value = value }, loc); + }, + .t_minus => { + try p.lexer.next(); + const value = p.lexer.number; + + try p.lexer.expect(.t_numeric_literal); + return p.e(E.Number{ .value = -value }, loc); + }, + .t_plus => { + try p.lexer.next(); + const value = p.lexer.number; + + try p.lexer.expect(.t_numeric_literal); + return p.e(E.Number{ .value = value }, loc); + }, + .t_open_brace => { + try p.lexer.next(); + var is_single_line = !p.lexer.has_newline_before; + var properties = std.ArrayList(G.Property).init(p.allocator); + var stack = std.heap.stackFallback(@sizeOf(Rope) * 6, p.allocator); + var key_allocator = stack.get(); + var expr = p.e(E.Object{}, loc); + var obj = expr.data.e_object; + + while (p.lexer.token != .t_close_brace) { + if (properties.items.len > 0) { + if (p.lexer.has_newline_before) { + is_single_line = false; + } + if (!try p.parseMaybeTrailingComma(.t_close_brace)) { + break; + } + if (p.lexer.has_newline_before) { + is_single_line = false; + } + } + + try p.parseAssignment(obj, key_allocator); + p.lexer.allow_double_bracket = false; + stack.fixed_buffer_allocator.reset(); + } + + if (p.lexer.has_newline_before) { + is_single_line = false; + } + try p.lexer.expect(.t_close_brace); + return expr; + }, + .t_open_bracket => { + try p.lexer.next(); + var is_single_line = !p.lexer.has_newline_before; + var array_ = p.e(E.Array{}, loc); + var array = array_.data.e_array; + const allocator = p.allocator; + p.lexer.allow_double_bracket = false; + + while (p.lexer.token != .t_close_bracket) { + if (array.items.len > 0) { + if (p.lexer.has_newline_before) { + is_single_line = false; + } + + if (!try p.parseMaybeTrailingComma(.t_close_bracket)) { + break; + } + + if (p.lexer.has_newline_before) { + is_single_line = false; + } + } + + array.push(allocator, try p.parseValue()) catch unreachable; + } + + if (p.lexer.has_newline_before) { + is_single_line = false; + } + try p.lexer.expect(.t_close_bracket); + p.lexer.allow_double_bracket = true; + return array_; + }, + else => { + try p.lexer.unexpected(); + return error.SyntaxError; + }, + } + } + + pub fn parseMaybeTrailingComma(p: *TOML, closer: T) !bool { + try p.lexer.expect(.t_comma); + + if (p.lexer.token == closer) { + return false; + } + + return true; + } +}; |