diff options
author | 2021-04-19 20:53:54 -0700 | |
---|---|---|
committer | 2021-04-19 20:53:54 -0700 | |
commit | 5ae24b75aec557136058e227a45f3662b7e36f9f (patch) | |
tree | 42d8d6c8211f13d13eef50be43abfa05cf5f4e78 | |
parent | 17df86ca17e9a16d6dfee1767877f25c45e28e91 (diff) | |
download | bun-5ae24b75aec557136058e227a45f3662b7e36f9f.tar.gz bun-5ae24b75aec557136058e227a45f3662b7e36f9f.tar.zst bun-5ae24b75aec557136058e227a45f3662b7e36f9f.zip |
lots
-rw-r--r-- | build.zig | 9 | ||||
-rw-r--r-- | src/bundler.zig | 13 | ||||
-rw-r--r-- | src/flags.zig | 3 | ||||
-rw-r--r-- | src/fs.zig | 60 | ||||
-rw-r--r-- | src/fs_impl.zig | 10 | ||||
-rw-r--r-- | src/fs_impl_native.zig | 3 | ||||
-rw-r--r-- | src/fs_impl_wasm.zig | 0 | ||||
-rw-r--r-- | src/js_ast.zig | 1193 | ||||
-rw-r--r-- | src/js_lexer.zig | 14 | ||||
-rw-r--r-- | src/js_parser.zig | 6 | ||||
-rw-r--r-- | src/logger.zig | 37 | ||||
-rw-r--r-- | src/main.zig | 13 | ||||
-rw-r--r-- | src/main_wasm.zig | 31 | ||||
-rw-r--r-- | src/options.zig | 66 | ||||
-rw-r--r-- | src/string_immutable.zig | 28 | ||||
-rw-r--r-- | src/string_mutable.zig | 126 | ||||
-rw-r--r-- | src/string_types.zig | 2 | ||||
-rw-r--r-- | src/strings.zig | 13 |
18 files changed, 1435 insertions, 192 deletions
@@ -11,9 +11,16 @@ pub fn build(b: *std.build.Builder) void { // between Debug, ReleaseSafe, ReleaseFast, and ReleaseSmall. const mode = b.standardReleaseOptions(); - const exe = b.addExecutable("esdev", "src/main.zig"); + var exe: *std.build.LibExeObjStep = undefined; + if (target.getCpuArch().isWasm()) { + exe = b.addExecutable("esdev", "src/main_wasm.zig"); + } else { + exe = b.addExecutable("esdev", "src/main.zig"); + } + exe.setTarget(target); exe.setBuildMode(mode); + exe.addLibPath("/usr/local/lib"); exe.install(); diff --git a/src/bundler.zig b/src/bundler.zig index b249c266a..6afbd402b 100644 --- a/src/bundler.zig +++ b/src/bundler.zig @@ -1,10 +1,15 @@ const std = @import("std"); const options = @import("options.zig"); +const logger = @import("logger.zig"); +const js_ast = @import("js_ast.zig"); pub const Bundler = struct { - options: options.TransformOptions, + options: options.TransformOptions, + logger: logger.Log, + pub fn init(options: options.TransformOptions, allocator: *std.mem.Allocator) Bundler { + var log = logger.Log{ .msgs = ArrayList(Msg).init(allocator) }; + } - pub fn - -}
\ No newline at end of file + pub fn scan() void {} +}; diff --git a/src/flags.zig b/src/flags.zig new file mode 100644 index 000000000..162f61602 --- /dev/null +++ b/src/flags.zig @@ -0,0 +1,3 @@ +const std = @import("std"); + +pub const isWasm = std.Target.Os.Tag.freestanding == std.Target.current.os.tag; diff --git a/src/fs.zig b/src/fs.zig index 6b4d390cc..afcceeb38 100644 --- a/src/fs.zig +++ b/src/fs.zig @@ -1,8 +1,12 @@ const std = @import("std"); -const strings = @import("strings.zig"); + +usingnamespace @import("strings.zig"); + const alloc = @import("alloc.zig"); const expect = std.testing.expect; +// pub const FilesystemImplementation = @import("fs_impl.zig"); + pub const FileSystem = struct { tree: std.AutoHashMap(FileSystemEntry) }; pub const FileSystemEntry = union(enum) { @@ -10,22 +14,44 @@ pub const FileSystemEntry = union(enum) { directory: Directory, }; -pub const File = struct { path: Path, mtime: ?usize, contents: ?[]u8 }; +pub const File = struct { + path: Path, + mtime: ?usize, + contents: ?string, +}; pub const Directory = struct { path: Path, mtime: ?usize, contents: []FileSystemEntry }; pub const PathName = struct { - base: []u8, - dir: []u8, - ext: []u8, + base: string, + dir: string, + ext: string, + + // For readability, the names of certain automatically-generated symbols are + // derived from the file name. For example, instead of the CommonJS wrapper for + // a file being called something like "require273" it can be called something + // like "require_react" instead. This function generates the part of these + // identifiers that's specific to the file path. It can take both an absolute + // path (OS-specific) and a path in the source code (OS-independent). + // + // Note that these generated names do not at all relate to the correctness of + // the code as far as avoiding symbol name collisions. These names still go + // through the renaming logic that all other symbols go through to avoid name + // collisions. + pub fn nonUniqueNameString(self: *PathName, allocator: *std.mem.Allocator) !string { + if (strings.eql("index", self.base)) { + if (self.dir.len > 0) { + return MutableString.ensureValidIdentifier(PathName.init(self.dir), allocator); + } + } - pub fn init(_path: []const u8, allocator: *std.mem.Allocator) PathName { - // TODO: leak. - var path: []u8 = allocator.alloc(u8, _path.len) catch unreachable; - std.mem.copy(u8, path, _path); + return MutableString.ensureValidIdentifier(self.base, allocator); + } + pub fn init(_path: string) PathName { + var path = _path; var base = path; - var dir = path; var ext = path; + var dir = path; var _i = strings.lastIndexOfChar(path, '/'); while (_i) |i| { @@ -58,13 +84,13 @@ pub const PathName = struct { }; pub const Path = struct { - pretty_path: []const u8, - text: []const u8, - namespace: []const u8, + pretty_path: string, + text: string, + namespace: string, name: PathName, - pub fn init(text: []const u8, allocator: *std.mem.Allocator) Path { - return Path{ .pretty_path = text, .text = text, .namespace = "file", .name = PathName.init(text, allocator) }; + pub fn init(text: string) Path { + return Path{ .pretty_path = text, .text = text, .namespace = "file", .name = PathName.init(text) }; } pub fn isBefore(a: *Path, b: Path) bool { @@ -77,7 +103,9 @@ pub const Path = struct { test "PathName.init" { var file = "/root/directory/file.ext".*; - const res = PathName.init(&file, std.heap.page_allocator); + const res = PathName.init( + &file, + ); std.testing.expectEqualStrings(res.dir, "/root/directory"); std.testing.expectEqualStrings(res.base, "file"); std.testing.expectEqualStrings(res.ext, ".ext"); diff --git a/src/fs_impl.zig b/src/fs_impl.zig new file mode 100644 index 000000000..312d67171 --- /dev/null +++ b/src/fs_impl.zig @@ -0,0 +1,10 @@ +const std = @import("std"); +usingnamespace @import("flags.zig"); + +pub const FS = comptime { + if (isWASM) { + return @import("fs_impl_wasm.zig"); + } else { + return @import("fs_impl_native.zig"); + } +}; diff --git a/src/fs_impl_native.zig b/src/fs_impl_native.zig new file mode 100644 index 000000000..c9fb6bef2 --- /dev/null +++ b/src/fs_impl_native.zig @@ -0,0 +1,3 @@ +const std = @import("std"); + +const fs = std.fs; diff --git a/src/fs_impl_wasm.zig b/src/fs_impl_wasm.zig new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/src/fs_impl_wasm.zig diff --git a/src/js_ast.zig b/src/js_ast.zig index cb2f2b150..c20439476 100644 --- a/src/js_ast.zig +++ b/src/js_ast.zig @@ -1,6 +1,12 @@ const std = @import("std"); const logger = @import("logger.zig"); +usingnamespace @import("strings.zig"); + +const ast = @import("import_record.zig"); + +pub const JavascriptStringValue = []const u16; + pub const NodeIndex = u32; pub const NodeIndexNone = 4294967293; @@ -8,10 +14,42 @@ pub const DataIndex = u16; pub const DataIndexNone = 65533; pub const BindingNodeIndex = NodeIndex; -pub const StmtNodeIndex = NodeIndex; -pub const ExprNodeIndex = NodeIndex; +pub const StmtNodeIndex = Stmt; +pub const ExprNodeIndex = Expr; + +// TODO: figure out if we actually need this +// -- original comment -- +// Files are parsed in parallel for speed. We want to allow each parser to +// generate symbol IDs that won't conflict with each other. We also want to be +// able to quickly merge symbol tables from all files into one giant symbol +// table. +// +// We can accomplish both goals by giving each symbol ID two parts: a source +// index that is unique to the parser goroutine, and an inner index that +// increments as the parser generates new symbol IDs. Then a symbol map can +// be an array of arrays indexed first by source index, then by inner index. +// The maps can be merged quickly by creating a single outer array containing +// all inner arrays from all parsed files. +pub const Ref = struct { + source_index: u32 = 0, + inner_index: u32, + + const None = Ref{ .source_index = std.math.maxInt(u32), .inner_index = std.math.maxInt(u32) }; +}; + +pub const ImportItemStatus = enum(u8) { + none, + + // The linker doesn't report import/export mismatch errors + generated, + // The printer will replace this import with "undefined" -pub const Comment = struct { text: []u8 }; + missing, +}; + +pub const LocRef = struct { loc: logger.Loc, ref: ?Ref }; + +pub const Comment = struct { text: string }; pub const FnBody = struct { loc: logger.Loc, @@ -19,83 +57,871 @@ pub const FnBody = struct { }; pub const Fn = struct { - name: NodeIndex = NodeIndexNone, + name: ?LocRef, open_parens_loc: logger.Loc, args: []Arg, body: FnBody, + arguments_ref: ?Ref, - is_async: bool, - is_generator: bool, - has_rest_arg: bool, - has_if_scope: bool, + is_async: bool = false, + is_generator: bool = false, + has_rest_arg: bool = false, + has_if_scope: bool = false, // This is true if the function is a method - is_unique_formal_parameters: bool, + is_unique_formal_parameters: bool = false, }; -pub const BindingType = enum { - b_missing, - b_identifier, - b_array, - b_object, -}; +pub const Binding = struct { + type_: Type = Type.b_missing, + data: B, -pub const Property = struct { - pub const Kind = enum { - normal, - get, - set, - spread, + pub const Type = enum { + b_missing, + b_identifier, + b_array, + b_object, + }; + + pub const Identifier = struct { + ref: Ref, }; - key: NodeIndex, - value: NodeIndex = NodeIndexNone, - initializer: Kind = Kind.normal, - is_computed: bool, - is_method: bool, - is_static: bool, - was_shorthand: bool, + pub const Object = struct { properties: []Property }; + + pub const Property = struct { + pub const Kind = enum { + normal, + get, + set, + spread, + }; + + key: NodeIndex, + value: NodeIndex = NodeIndexNone, + initializer: Kind = Kind.normal, + is_computed: bool = false, + is_method: bool = false, + is_static: bool = false, + was_shorthand: bool = false, + }; + + pub const Array = struct { + binding: B, + }; +}; + +pub const B = union(enum) { + identifier: Binding.Identifier, + array: Binding.Array, + property: Binding.Property, + object: Binding.Object, + missing: Binding.Missing, }; pub const Arg = struct { - ts_decorators: []NodeIndex, - binding: Binding, - default: NodeIndex = NodeIndexNone, + ts_decorators: ?[]ExprNodeIndex, + binding: B, + default: ?ExprNodeIndex, // "constructor(public x: boolean) {}" - is_typescript_ctor_field: bool, + is_typescript_ctor_field: bool = false, }; -pub const Try = struct {}; -pub const Binding = struct {}; - pub const Class = struct { class_keyword: logger.Range, - ts_decorators: []NodeIndex, + ts_decorators: ?[]ExprNodeIndex, name: logger.Loc, - extends: NodeIndex = NodeIndexNone, + extends: ?ExprNodeIndex, body_loc: logger.Loc, - properties: []Property, + properties: ?[]Property, }; +const _Class = Class; -pub const Expr = struct { +pub const ClauseItem = struct { + alias: string, + alias_loc: logger.Loc, + name: LocRef, + + // This is the original name of the symbol stored in "Name". It's needed for + // "SExportClause" statements such as this: + // + // export {foo as bar} from 'path' + // + // In this case both "foo" and "bar" are aliases because it's a re-export. + // We need to preserve both aliases in case the symbol is renamed. In this + // example, "foo" is "OriginalName" and "bar" is "Alias". + original_name: string, +}; + +pub const Decl = struct { + binding: Binding, + value: ?ExprNodeIndex, +}; + +pub const Symbol = struct { + // This is the name that came from the parser. Printed names may be renamed + // during minification or to avoid name collisions. Do not use the original + // name during printing. + original_name: string, + + // This is used for symbols that represent items in the import clause of an + // ES6 import statement. These should always be referenced by EImportIdentifier + // instead of an EIdentifier. When this is present, the expression should + // be printed as a property access off the namespace instead of as a bare + // identifier. + // + // For correctness, this must be stored on the symbol instead of indirectly + // associated with the Ref for the symbol somehow. In ES6 "flat bundling" + // mode, re-exported symbols are collapsed using MergeSymbols() and renamed + // symbols from other files that end up at this symbol must be able to tell + // if it has a namespace alias. + namespace_alias: *NamespaceAlias, + + // Used by the parser for single pass parsing. Symbols that have been merged + // form a linked-list where the last link is the symbol to use. This link is + // an invalid ref if it's the last link. If this isn't invalid, you need to + // FollowSymbols to get the real one. + link: ?Ref, + + // An estimate of the number of uses of this symbol. This is used to detect + // whether a symbol is used or not. For example, TypeScript imports that are + // unused must be removed because they are probably type-only imports. This + // is an estimate and may not be completely accurate due to oversights in the + // code. But it should always be non-zero when the symbol is used. + use_count_estimate: u32 = 0, + + // This is for generating cross-chunk imports and exports for code splitting. + chunk_index: ?u32, + + // This is used for minification. Symbols that are declared in sibling scopes + // can share a name. A good heuristic (from Google Closure Compiler) is to + // assign names to symbols from sibling scopes in declaration order. That way + // local variable names are reused in each global function like this, which + // improves gzip compression: + // + // function x(a, b) { ... } + // function y(a, b, c) { ... } + // + // The parser fills this in for symbols inside nested scopes. There are three + // slot namespaces: regular symbols, label symbols, and private symbols. + nested_scope_slot: ?u32, + + kind: Kind, + + // Certain symbols must not be renamed or minified. For example, the + // "arguments" variable is declared by the runtime for every function. + // Renaming can also break any identifier used inside a "with" statement. + must_not_be_renamed: bool, + + // We automatically generate import items for property accesses off of + // namespace imports. This lets us remove the expensive namespace imports + // while bundling in many cases, replacing them with a cheap import item + // instead: + // + // import * as ns from 'path' + // ns.foo() + // + // That can often be replaced by this, which avoids needing the namespace: + // + // import {foo} from 'path' + // foo() + // + // However, if the import is actually missing then we don't want to report a + // compile-time error like we do for real import items. This status lets us + // avoid this. We also need to be able to replace such import items with + // undefined, which this status is also used for. + import_item_status: ImportItemStatus, + + // Sometimes we lower private symbols even if they are supported. For example, + // consider the following TypeScript code: + // + // class Foo { + // #foo = 123 + // bar = this.#foo + // } + // + // If "useDefineForClassFields: false" is set in "tsconfig.json", then "bar" + // must use assignment semantics instead of define semantics. We can compile + // that to this code: + // + // class Foo { + // constructor() { + // this.#foo = 123; + // this.bar = this.#foo; + // } + // #foo; + // } + // + // However, we can't do the same for static fields: + // + // class Foo { + // static #foo = 123 + // static bar = this.#foo + // } + // + // Compiling these static fields to something like this would be invalid: + // + // class Foo { + // static #foo; + // } + // Foo.#foo = 123; + // Foo.bar = Foo.#foo; + // + // Thus "#foo" must be lowered even though it's supported. Another case is + // when we're converting top-level class declarations to class expressions + // to avoid the TDZ and the class shadowing symbol is referenced within the + // class body: + // + // class Foo { + // static #foo = Foo + // } + // + // This cannot be converted into something like this: + // + // var Foo = class { + // static #foo; + // }; + // Foo.#foo = Foo; + // + private_symbol_must_be_lowered: bool = false, + + pub const Kind = enum { + + // An unbound symbol is one that isn't declared in the file it's referenced + // in. For example, using "window" without declaring it will be unbound. + unbound, + + // This has special merging behavior. You're allowed to re-declare these + // symbols more than once in the same scope. These symbols are also hoisted + // out of the scope they are declared in to the closest containing function + // or module scope. These are the symbols with this kind: + // + // - Function arguments + // - Function statements + // - Variables declared using "var" + // + hoisted, + hoisted_function, + + // There's a weird special case where catch variables declared using a simple + // identifier (i.e. not a binding pattern) block hoisted variables instead of + // becoming an error: + // + // var e = 0; + // try { throw 1 } catch (e) { + // print(e) // 1 + // var e = 2 + // print(e) // 2 + // } + // print(e) // 0 (since the hoisting stops at the catch block boundary) + // + // However, other forms are still a syntax error: + // + // try {} catch (e) { let e } + // try {} catch ({e}) { var e } + // + // This symbol is for handling this weird special case. + catch_identifier, + + // Generator and async functions are not hoisted, but still have special + // properties such as being able to overwrite previous functions with the + // same name + generator_or_async_function, + + // This is the special "arguments" variable inside functions + arguments, + + // Classes can merge with TypeScript namespaces. + class, + + // A class-private identifier (i.e. "#foo"). + private_field, + private_method, + private_get, + private_set, + private_get_set_pair, + private_static_field, + private_static_method, + private_static_get, + private_static_set, + private_static_get_set_pair, + + // Labels are in their own namespace + label, + + // TypeScript enums can merge with TypeScript namespaces and other TypeScript + // enums. + ts_enum, + + // TypeScript namespaces can merge with classes, functions, TypeScript enums, + // and other TypeScript namespaces. + ts_namespace, + + // In TypeScript, imports are allowed to silently collide with symbols within + // the module. Presumably this is because the imports may be type-only. + import, + + // Assigning to a "const" symbol will throw a TypeError at runtime + cconst, + + // This annotates all other symbols that don't have special behavior. + other, + }; + + pub const Use = struct { + count_estimate: u32, + }; + + pub const Map = struct { + // This could be represented as a "map[Ref]Symbol" but a two-level array was + // more efficient in profiles. This appears to be because it doesn't involve + // a hash. This representation also makes it trivial to quickly merge symbol + // maps from multiple files together. Each file only generates symbols in a + // single inner array, so you can join the maps together by just make a + // single outer array containing all of the inner arrays. See the comment on + // "Ref" for more detail. + symbols_for_source: [][]Symbol = undefined, + + pub fn get(self: *Map, ref: Ref) ?Symbol { + self.symbols_for_source[ref.source_index][ref.inner_index]; + } + + pub fn init(sourceCount: usize, allocator: *std.mem.Allocator) !Map { + var symbols_for_source: [][]Symbol = try allocator.alloc([]Symbol, sourceCount); + return Map{ .symbols_for_source = symbols_for_source }; + } + }; + + pub fn isKindPrivate(kind: Symbol.Kind) bool { + return kind >= Symbol.Kind.private_field and kind <= Symbol.Kind.private_static_get_set_pair; + } + + pub fn isKindHoisted(kind: Symbol.Kind) bool { + return kind == Symbol.Kind.hoisted or kind == Symbol.Kind.hoisted_function; + } + + pub fn isKindHoistedOrFunction(kind: Symbol.Kind) bool { + return isKindHoisted(kind) or kind == Symbol.Kind.generator_or_async_function; + } + + pub fn isKindFunction(kind: Symbol.Kind) bool { + return kind == Symbol.Kind.hoisted_function or kind == Symbol.Kind.generator_or_async_function; + } +}; + +pub const OptionalChain = enum { + +// "a?.b" +start, + +// "a?.b.c" => ".c" is OptionalChainContinue +// "(a?.b).c" => ".c" is OptionalChain null +ccontinue }; + +pub const E = struct { pub const Array = struct { items: []ExprNodeIndex, comma_after_spread: logger.Loc, + is_single_line: bool, is_parenthesized: bool, }; pub const Unary = struct { op: Op.Code, + value: Expr, + }; + + pub const Binary = struct { + left: ExprNodeIndex, + right: ExprNodeIndex, + op: Op.Code, + }; + + pub const Boolean = struct { value: bool }; + pub const Super = struct {}; + pub const Null = struct {}; + pub const Undefined = struct {}; + pub const New = struct { + target: ExprNodeIndex, + args: []ExprNodeIndex, + + // True if there is a comment containing "@__PURE__" or "#__PURE__" preceding + // this call expression. See the comment inside ECall for more details. + can_be_unwrapped_if_unused: bool = false, + }; + pub const NewTarget = struct {}; + pub const ImportMeta = struct {}; + + pub const Call = struct { + // Node: + target: ExprNodeIndex, + args: []ExprNodeIndex, + optional_chain: OptionalChain, + is_direct_eval: bool = false, + + // True if there is a comment containing "@__PURE__" or "#__PURE__" preceding + // this call expression. This is an annotation used for tree shaking, and + // means that the call can be removed if it's unused. It does not mean the + // call is pure (e.g. it may still return something different if called twice). + // + // Note that the arguments are not considered to be part of the call. If the + // call itself is removed due to this annotation, the arguments must remain + // if they have side effects. + can_be_unwrapped_if_unused: bool = false, + + pub fn hasSameFlagsAs(a: *Call, b: *Call) bool { + return (a.optional_chain == b.optional_chain and + a.is_direct_eval == b.is_direct_eval and + a.can_be_unwrapped_if_unused == b.can_be_unwrapped_if_unused); + } }; - // TODO: THIS IS WHERE YOU LEFT OFF! - // pub const Binary = {} + pub const Dot = struct { + // target is Node + name: string, + name_loc: logger.Loc, + optional_chain: ?OptionalChain, + + // If true, this property access is known to be free of side-effects. That + // means it can be removed if the resulting value isn't used. + can_be_removed_if_unused: bool = false, + + // If true, this property access is a function that, when called, can be + // unwrapped if the resulting value is unused. Unwrapping means discarding + // the call target but keeping any arguments with side effects. + call_can_be_unwrapped_if_unused: bool = false, + + pub fn hasSameFlagsAs(a: *Dot, b: *Dot) bool { + return (a.optional_chain == b.optional_chain and + a.is_direct_eval == b.is_direct_eval and + a.can_be_unwrapped_if_unused == b.can_be_unwrapped_if_unused and a.call_can_be_unwrapped_if_unused == b.call_can_be_unwrapped_if_unused); + } + }; + + pub const Index = struct { + index: ExprNodeIndex, + optional_chain: ?OptionalChain, + + pub fn hasSameFlagsAs(a: *Index, b: *Index) bool { + return (a.optional_chain == b.optional_chain); + } + }; + + pub const Arrow = struct { + args: []Arg, + body: FnBody, + + is_async: bool = false, + has_rest_arg: bool = false, + prefer_expr: bool = false, // Use shorthand if true and "Body" is a single return statement + }; + + pub const Function = Fn; + pub const Class = _Class; + + pub const Identifier = struct { + ref: Ref = Ref.None, + + // If we're inside a "with" statement, this identifier may be a property + // access. In that case it would be incorrect to remove this identifier since + // the property access may be a getter or setter with side effects. + must_keep_due_to_with_stmt: bool = false, + + // If true, this identifier is known to not have a side effect (i.e. to not + // throw an exception) when referenced. If false, this identifier may or may + // not have side effects when referenced. This is used to allow the removal + // of known globals such as "Object" if they aren't used. + can_be_removed_if_unused: bool = false, + + // If true, this identifier represents a function that, when called, can be + // unwrapped if the resulting value is unused. Unwrapping means discarding + // the call target but keeping any arguments with side effects. + call_can_be_unwrapped_if_unused: bool = false, + }; + + // This is similar to an EIdentifier but it represents a reference to an ES6 + // import item. + // + // Depending on how the code is linked, the file containing this EImportIdentifier + // may or may not be in the same module group as the file it was imported from. + // + // If it's the same module group than we can just merge the import item symbol + // with the corresponding symbol that was imported, effectively renaming them + // to be the same thing and statically binding them together. + // + // But if it's a different module group, then the import must be dynamically + // evaluated using a property access off the corresponding namespace symbol, + // which represents the result of a require() call. + // + // It's stored as a separate type so it's not easy to confuse with a plain + // identifier. For example, it'd be bad if code trying to convert "{x: x}" into + // "{x}" shorthand syntax wasn't aware that the "x" in this case is actually + // "{x: importedNamespace.x}". This separate type forces code to opt-in to + // doing this instead of opt-out. + pub const ImportIdentifier = struct { + ref: Ref, + + // If true, this was originally an identifier expression such as "foo". If + // false, this could potentially have been a member access expression such + // as "ns.foo" off of an imported namespace object. + was_originally_identifier: bool = false, + }; + + // This is similar to EIdentifier but it represents class-private fields and + // methods. It can be used where computed properties can be used, such as + // EIndex and Property. + pub const PrivateIdentifier = struct { + ref: Ref, + }; + + pub const JSXElement = struct { + tag: ?ExprNodeIndex, + properties: []Property, + children: []Expr, + }; + + pub const Missing = struct {}; + + pub const Number = struct { value: f64 }; + + pub const BigInt = struct { + value: string, + }; + + pub const Object = struct { + properties: []Property, + comma_after_spread: logger.Loc, + is_single_line: bool, + is_parenthesized: bool, + }; + + pub const Spread = struct { value: Expr }; + + pub const String = struct { + value: JavascriptStringValue, + legacy_octal_loc: logger.Loc, + prefer_template: bool, + }; + + // value is in the Node + pub const TemplatePart = struct { + value: Expr, + tail_loc: logger.Loc, + tail: JavascriptStringValue, + tail_raw: string, + }; + + pub const Template = struct { tag: ?ExprNodeIndex, head: JavascriptStringValue, head_raw: string, // This is only filled out for tagged template literals + parts: ?[]TemplatePart, legacy_octal_loc: logger.Loc }; + + pub const RegExp = struct { + value: string, + }; + + pub const Await = struct { value: Expr }; + + pub const Yield = struct { + value: ?Expr, + is_star: bool, + }; + + pub const If = struct { + test_: Expr, + yes: Expr, + no: Expr, + }; + + pub const RequireOrRequireResolve = struct { + import_record_index: u32, + }; + + pub const Import = struct { + expr: Expr, + import_record_index: u32, + + // Comments inside "import()" expressions have special meaning for Webpack. + // Preserving comments inside these expressions makes it possible to use + // esbuild as a TypeScript-to-JavaScript frontend for Webpack to improve + // performance. We intentionally do not interpret these comments in esbuild + // because esbuild is not Webpack. But we do preserve them since doing so is + // harmless, easy to maintain, and useful to people. See the Webpack docs for + // more info: https://webpack.js.org/api/module-methods/#magic-comments. + leading_interior_comments: []Comment, + }; +}; + +pub const Stmt = struct { + loc: logger.Loc, + data: Data, + + const Data = union(enum) { + s_block: S.Block, + s_comment: S.Comment, + s_directive: S.Directive, + s_export_clause: S.ExportClause, + s_empty: S.Empty, + s_type_script: S.TypeScript, + s_debugger: S.Debugger, + s_export_from: S.ExportFrom, + s_export_default: S.ExportDefault, + s_enum: S.Enum, + s_namespace: S.Namespace, + s_function: S.Function, + s_class: S.Class, + s_if: S.If, + s_for: S.For, + s_for_in: S.ForIn, + s_for_of: S.ForOf, + s_do_while: S.DoWhile, + s_while: S.While, + s_with: S.With, + s_try: S.Try, + s_switch: S.Switch, + s_import: S.Import, + s_return: S.Return, + s_throw: S.Throw, + s_local: S.Local, + s_break: S.Break, + s_continue: S.Continue, + }; +}; + +pub const Expr = struct { + loc: logger.Loc, + data: Data, + + pub const Data = union(enum) { + e_array: E.Array, + e_unary: E.Unary, + e_binary: E.Binary, + e_boolean: E.Boolean, + e_super: E.Super, + e_null: E.Null, + e_undefined: E.Undefined, + e_new: E.New, + e_new_target: E.NewTarget, + e_import_meta: E.ImportMeta, + e_call: E.Call, + e_dot: E.Dot, + e_index: E.Index, + e_arrow: E.Arrow, + e_identifier: E.Identifier, + e_import_identifier: E.ImportIdentifier, + e_private_identifier: E.PrivateIdentifier, + e_jsx_element: E.JSXElement, + e_missing: E.Missing, + e_number: E.Number, + e_big_int: E.BigInt, + e_object: E.Object, + e_spread: E.Spread, + e_string: E.String, + e_template_part: E.TemplatePart, + e_template: E.Template, + e_reg_exp: E.RegExp, + e_await: E.Await, + e_yield: E.Yield, + e_if: E.If, + e_require_or_require_resolve: E.RequireOrRequireResolve, + e_import: E.Import, + + pub fn isOptionalChain(self: *Expr) bool { + return switch (self) { + Expr.e_dot => |dot| dot.optional_chain != null, + Expr.e_index => |dot| dot.optional_chain != null, + Expr.e_call => |dot| dot.optional_chain != null, + else => false, + }; + } + + pub fn isBooleanValue(self: *Expr) bool { + // TODO: + return false; + // return switch (self) { + // Expr.e_boolean => |dot| true, + // Expr.e_if => |dot| dot.optional_chain != OptionalChain.none, + // Expr.e_call => |dot| dot.optional_chain != OptionalChain.none, + // else => false, + // }; + } + + pub fn isNumericValue(self: *Expr) bool { + // TODO: + + return false; + } + + pub fn isStringValue(self: *Expr) bool { + // TODO: + return false; + } + }; +}; + +pub const EnumValue = struct { + loc: logger.Loc, + ref: Ref, + name: []u16, + value: ?ExprNodeIndex, +}; + +pub const S = struct { + pub const Block = struct { stmts: []StmtNodeIndex }; + + pub const Comment = struct { text: string }; + + pub const Directive = struct { value: JavascriptStringValue, legacy_octal_loc: logger.Loc }; + + pub const ExportClause = struct { items: []ClauseItem }; + + pub const Empty = struct {}; + + // This is a stand-in for a TypeScript type declaration + pub const TypeScript = struct {}; + + pub const Debugger = struct {}; + + pub const ExportFrom = struct { + items: []ClauseItem, + namespace_ref: Ref, + import_record_index: u32, + is_single_line: bool, + }; + + pub const ExportDefault = struct { + default_name: LocRef, // value may be a SFunction or SClass + }; + + pub const Enum = struct { + name: LocRef, + arg: Ref, + values: []EnumValue, + is_export: bool, + }; + + pub const Namespace = struct { + name: LocRef, + arg: Ref, + stmts: []StmtNodeIndex, + is_export: bool, + }; + + pub const Function = struct { + func: Fn, + is_export: bool, + }; + + pub const Class = struct { + class: _Class, + is_export: bool, + }; + + pub const If = struct { + test_: ExprNodeIndex, + yes: StmtNodeIndex, + no: StmtNodeIndex = NodeIndexNone, + }; + + pub const For = struct { + // May be a SConst, SLet, SVar, or SExpr + init: StmtNodeIndex, test_: ?ExprNodeIndex, update: ?ExprNodeIndex, body: StmtNodeIndex }; + + pub const ForIn = struct { + // May be a SConst, SLet, SVar, or SExpr + init: StmtNodeIndex, value: ExprNodeIndex, body: StmtNodeIndex }; + + pub const ForOf = struct { is_await: bool, + // May be a SConst, SLet, SVar, or SExpr + init: StmtNodeIndex, value: ExprNodeIndex, body: StmtNodeIndex }; + + pub const DoWhile = struct { body: StmtNodeIndex, test_: ExprNodeIndex }; + + pub const While = struct { + test_: ExprNodeIndex, + body: StmtNodeIndex, + }; + + pub const With = struct { + value: ExprNodeIndex, + body: StmtNodeIndex, + body_loc: logger.Log, + }; + + pub const Try = struct { + body: []StmtNodeIndex, + body_loc: logger.Log, + catch_: ?Catch, + finally: ?Finally, + }; + + pub const Switch = struct { + test_: ExprNodeIndex, + body_loc: logger.Loc, + cases: []Case, + }; + + // This object represents all of these types of import statements: + // + // import 'path' + // import {item1, item2} from 'path' + // import * as ns from 'path' + // import defaultItem, {item1, item2} from 'path' + // import defaultItem, * as ns from 'path' + // + // Many parts are optional and can be combined in different ways. The only + // restriction is that you cannot have both a clause and a star namespace. + pub const Import = struct { + // If this is a star import: This is a Ref for the namespace symbol. The Loc + // for the symbol is StarLoc. + // + // Otherwise: This is an auto-generated Ref for the namespace representing + // the imported file. In this case StarLoc is nil. The NamespaceRef is used + // when converting this module to a CommonJS module. + namespace_ref: Ref, default_name: *LocRef, items: *[]ClauseItem, star_name_loc: *logger.Loc, import_record_index: uint32, is_single_line: bool }; + + pub const Return = struct {}; + pub const Throw = struct {}; + + pub const Local = struct { + kind: Kind = Kind.k_var, + decls: []Decl, + is_export: bool = false, + // The TypeScript compiler doesn't generate code for "import foo = bar" + // statements where the import is never used. + was_ts_import_equals: bool = false, + + pub const Kind = enum { + k_var, + k_let, + k_const, + }; + }; + + pub const Break = struct { + label: *LocRef, + }; + + pub const Continue = struct { + label: *LocRef, + }; +}; + +pub const Catch = struct { + loc: logger.Loc, + binding: *B, + body: []StmtNodeIndex, }; +pub const Finally = struct { + loc: logger.Loc, + stmts: []StmtNodeIndex, +}; + +pub const Case = struct { loc: logger.Loc, value: ?ExprNodeIndex, body: []StmtNodeIndex }; + pub const Op = struct { // If you add a new token, remember to add it to "OpTable" too - const Code = enum { + pub const Code = enum { // Prefix un_pos, un_neg, @@ -162,7 +988,7 @@ pub const Op = struct { bin_logical_and_assign, }; - const Level = enum { + pub const Level = enum { lowest, comma, spread, @@ -188,7 +1014,7 @@ pub const Op = struct { member, }; - text: string, + text: []const u8, level: Level, is_keyword: bool, @@ -261,100 +1087,54 @@ pub const Op = struct { }; pub const ArrayBinding = struct { - binding: BindingNodeIndex, - default_value: ExprNodeIndex = NodeIndexNone, + binding: Binding, + default_value: ?ExprNodeIndex, }; -pub const Node = struct { - pub const Tag = enum { - s_block, - s_comment, - s_debugger, - s_directive, - s_empty, - s_type_script, - s_export_clause, - s_export_from, - s_export_default, - s_export_star, - s_export_equals, - s_lazy_export, - s_expr, - s_enum, - s_namespace, - s_function, - s_class, - s_label, - s_if, - s_for, - s_for_in, - s_for_of, - s_do_while, - s_while, - s_with, - s_try, - s_switch, - s_import, - s_return, - s_throw, - s_local, - s_break, - s_continue, - - e_array, - e_unary, - e_binary, - e_boolean, - e_super, - e_null, - e_undefined, - e_this, - e_new, - e_new_target, - e_import_meta, - e_call, - e_dot, - e_index, - e_arrow, - e_function, - e_class, - e_identifier, - e_import_identifier, - e_private_identifier, - ejsx_element, - e_missing, - e_number, - e_big_int, - e_object, - e_spread, - e_string, - e_template, - e_reg_exp, - e_await, - e_yield, - e_if, - e_require, - e_require_resolve, - e_import, - }; - - // Source code location of the AST node. - loc: logger.Loc, - // this is relatively common. - is_single_line: bool, +pub const Ast = struct { + approximate_line_count: i32 = 0, + has_lazy_export = false, - // - child: NodeIndex = NodeIndexNone, - extra_data: ?[]NodeIndex, - data_index: u16, -}; + // This is a list of CommonJS features. When a file uses CommonJS features, + // it's not a candidate for "flat bundling" and must be wrapped in its own + // closure. + has_top_level_return: bool = false, + uses_exports_ref: bool = false, + uses_module_ref: bool = false, + exports_kind: ExportsKind = ExportsKind.none, + + // This is a list of ES6 features. They are ranges instead of booleans so + // that they can be used in log messages. Check to see if "Len > 0". + import_keyword: logger.Range = logger.Range.Empty, // Does not include TypeScript-specific syntax or "import()" + export_keyword: logger.Range = logger.Range.Empty, // Does not include TypeScript-specific syntax + top_level_await_keyword: logger.Range = logger.Range.Empty, + + // These are stored at the AST level instead of on individual AST nodes so + // they can be manipulated efficiently without a full AST traversal + import_records: []ast.ImportRecord, + + hashbang: ?string, + directive: ?string, + url_for_css: ?string, + parts: std.ArrayList([]Part), + symbols: std.ArrayList([]Symbol), + module_scope: ?Scope, + // char_freq: *CharFreq, + exports_ref: ?Ref, + module_ref: ?Ref, + wrapper_ref: ?Ref, -pub const AST = struct { - node_tags: std.ArrayList(Node.Tag), + // These are used when bundling. They are filled in during the parser pass + // since we already have to traverse the AST then anyway and the parser pass + // is conveniently fully parallelized. + named_imports: std.AutoHashMap(Ref, NamedImport), + named_exports: std.AutoHashMap(string, NamedExport), + top_level_symbol_to_parts: std.AutoHashMap(Ref, []u32), + export_star_import_records: std.ArrayList([]u32), }; pub const Span = struct { - text: []u8, + text: string, range: logger.Range, }; @@ -388,3 +1168,154 @@ esm_with_dyn }; pub fn isDynamicExport(exp: ExportsKind) bool { return kind == .cjs || kind == .esm_with_dyn; } + +pub const DeclaredSymbol = struct { + ref: Ref, + is_top_level: bool = false, +}; + +pub const Dependency = struct { + source_index: u32 = 0, + part_index: u32 = 0, +}; + +// Each file is made up of multiple parts, and each part consists of one or +// more top-level statements. Parts are used for tree shaking and code +// splitting analysis. Individual parts of a file can be discarded by tree +// shaking and can be assigned to separate chunks (i.e. output files) by code +// splitting. +pub const Part = struct { + stmts: []StmtNodeIndex, + scopes: []*Scope, + + // Each is an index into the file-level import record list + import_record_indices: std.ArrayList(u32), + + // All symbols that are declared in this part. Note that a given symbol may + // have multiple declarations, and so may end up being declared in multiple + // parts (e.g. multiple "var" declarations with the same name). Also note + // that this list isn't deduplicated and may contain duplicates. + declared_symbols: std.ArrayList(DeclaredSymbol), + + // An estimate of the number of uses of all symbols used within this part. + symbol_uses: std.AutoHashMap(Ref, Symbol.Use), + + // The indices of the other parts in this file that are needed if this part + // is needed. + dependencies: std.ArrayList(Dependency), + + // If true, this part can be removed if none of the declared symbols are + // used. If the file containing this part is imported, then all parts that + // don't have this flag enabled must be included. + can_be_removed_if_unused: bool = false, + + // This is used for generated parts that we don't want to be present if they + // aren't needed. This enables tree shaking for these parts even if global + // tree shaking isn't enabled. + force_tree_shaking: bool = false, + + // This is true if this file has been marked as live by the tree shaking + // algorithm. + is_live: bool = false, +}; + +pub const StmtOrExpr = union(enum) { + stmt: Stmt, + expr: Expr, +}; + +pub const NamedImport = struct { + // Parts within this file that use this import + local_parts_with_uses: ?[]u32, + + alias: ?string, + alias_loc: ?logger.Loc, + namespace_ref: ?Ref, + import_record_index: u32, + + // If true, the alias refers to the entire export namespace object of a + // module. This is no longer represented as an alias called "*" because of + // the upcoming "Arbitrary module namespace identifier names" feature: + // https://github.com/tc39/ecma262/pull/2154 + alias_is_star: bool = false, + + // It's useful to flag exported imports because if they are in a TypeScript + // file, we can't tell if they are a type or a value. + is_exported: bool = false, +}; + +pub const NamedExport = struct { + ref: Ref, + alias_loc: logger.Loc, +}; + +pub const StrictModeKind = enum { + sloppy_mode, + explicit_strict_mode, + implicit_strict_mode_import, + implicit_strict_mode_export, + implicit_strict_mode_top_level_await, + implicit_strict_mode_class, +}; + +pub const Scope = struct { + kind: Kind = Kind.block, + parent: ?Scope, + children: []*Scope, + members: std.AutoHashMap(string, Member), + generated: ?[]Ref, + + // This is used to store the ref of the label symbol for ScopeLabel scopes. + label_ref: ?Ref, + label_stmt_is_loop: bool = false, + + // If a scope contains a direct eval() expression, then none of the symbols + // inside that scope can be renamed. We conservatively assume that the + // evaluated code might reference anything that it has access to. + contains_direct_eval: bool = false, + + // This is to help forbid "arguments" inside class body scopes + forbid_arguments: bool = false, + + strict_mode: StrictModeKind = StrictModeKind.explicit_strict_mode, + + pub const Member = struct { ref: Ref, loc: logger.Loc }; + pub const Kind = enum(u8) { + block, + with, + label, + class_name, + class_body, + + // The scopes below stop hoisted variables from extending into parent scopes + entry, // This is a module, TypeScript enum, or TypeScript namespace + function_args, + function_body, + }; +}; + + +pub fn ensureValidIdentifier(base: string, allocator: *std.mem.Allocator) string { + // Convert it to an ASCII identifier. Note: If you change this to a non-ASCII + // identifier, you're going to potentially cause trouble with non-BMP code + // points in target environments that don't support bracketed Unicode escapes. + var needsGap = false; + var str = MutableString.initCopy(allocator: *std.mem.Allocator, str: anytype) + for (base) |c| { + if (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (len(bytes) > 0 && c >= '0' && c <= '9') { + if needsGap { + bytes = append(bytes, '_') + needsGap = false + } + bytes = append(bytes, byte(c)) + } else if len(bytes) > 0 { + needsGap = true + } + } + + // Make sure the name isn't empty + if len(bytes) == 0 { + return "_" + } + return string(bytes) +}
\ No newline at end of file diff --git a/src/js_lexer.zig b/src/js_lexer.zig index 586806e8f..3606e4eca 100644 --- a/src/js_lexer.zig +++ b/src/js_lexer.zig @@ -4,6 +4,8 @@ const tables = @import("js_lexer_tables.zig"); const alloc = @import("alloc.zig"); const build_options = @import("build_options"); +usingnamespace @import("strings.zig"); + const _f = @import("./test/fixtures.zig"); const unicode = std.unicode; @@ -18,8 +20,6 @@ pub const jsxEntity = tables.jsxEntity; // TODO: JSON const IS_JSON_FILE = false; -const string = []const u8; - pub const Lexer = struct { // pub const Error = error{ // UnexpectedToken, @@ -1158,7 +1158,10 @@ fn test_lexer(contents: []const u8) Lexer { .msgs = msgs, }; - const source = logger.Source.initPathString("index.js", contents, std.heap.page_allocator); + const source = logger.Source.initPathString( + "index.js", + contents, + ); return Lexer.init(log, source, alloc.dynamic) catch unreachable; } @@ -1208,7 +1211,10 @@ test "Lexer.step()" { }; defer std.testing.allocator.free(msgs.items); - const source = logger.Source.initPathString("index.js", "for (let i = 0; i < 100; i++) { console.log('hi'); }", std.heap.page_allocator); + const source = logger.Source.initPathString( + "index.js", + "for (let i = 0; i < 100; i++) { console.log('hi'); }", + ); var lex = try Lexer.init(log, source, std.testing.allocator); std.testing.expect('f' == lex.code_point); diff --git a/src/js_parser.zig b/src/js_parser.zig index 8c9ff1e1a..877216bc5 100644 --- a/src/js_parser.zig +++ b/src/js_parser.zig @@ -2,9 +2,3 @@ const std = @import("std"); const logger = @import("logger.zig"); const lexer = @import("lexer.zig"); const ast = @import("js_ast.zig"); - -pub fn Parse( - log: logger.Log, - source: logger.Source, - -)
\ No newline at end of file diff --git a/src/logger.zig b/src/logger.zig index a69a8dfa0..cd27811fb 100644 --- a/src/logger.zig +++ b/src/logger.zig @@ -1,5 +1,7 @@ const std = @import("std"); -const strings = @import("strings.zig"); + +usingnamespace @import("strings.zig"); + const fs = @import("fs.zig"); const unicode = std.unicode; @@ -13,7 +15,7 @@ pub const Kind = enum { note, debug, - pub fn string(self: Kind) []const u8 { + pub fn string(self: Kind) string { return switch (self) { .err => "error", .warn => "warn", @@ -26,13 +28,13 @@ pub const Kind = enum { pub const Loc = i32; pub const Location = struct { - file: []const u8, - namespace: []const u8 = "file", + file: string, + namespace: string = "file", line: i32 = 1, // 1-based column: i32 = 0, // 0-based, in bytes length: usize = 0, // in bytes - line_text: ?[]const u8 = null, - suggestion: ?[]const u8 = null, + line_text: ?string = null, + suggestion: ?string = null, pub fn init(file: []u8, namespace: []u8, line: i32, column: i32, length: u32, line_text: ?[]u8, suggestion: ?[]u8) Location { return Location{ @@ -62,7 +64,7 @@ pub const Location = struct { } } - pub fn init_file(file: []const u8, line: i32, column: i32, length: u32, line_text: ?[]u8, suggestion: ?[]u8) Location { + pub fn init_file(file: string, line: i32, column: i32, length: u32, line_text: ?[]u8, suggestion: ?[]u8) Location { var namespace = "file".*; return Location{ @@ -84,7 +86,11 @@ pub const Msg = struct { data: Data, }; -pub const Range = struct { loc: Loc = 0, len: i32 = 0 }; +pub const Range = struct { + loc: Loc = 0, + len: i32 = 0, + const Empty = Range{ .loc = 0, .len = 0 }; +}; pub const Log = struct { debug: bool = false, @@ -178,17 +184,22 @@ pub fn usize2Loc(loc: usize) Loc { pub const Source = struct { path: fs.Path, index: u32 = 0, - contents: []const u8, + contents: string, // An identifier that is mixed in to automatically-generated symbol names to // improve readability. For example, if the identifier is "util" then the // symbol for an "export default" statement will be called "util_default". - identifier_name: []u8, + identifier_name: string, pub const ErrorPosition = struct { line_start: usize, line_end: usize, column_count: usize, line_count: usize }; - pub fn initPathString(pathString: []const u8, contents: []const u8, allocator: *std.mem.Allocator) Source { - const path = fs.Path.init(pathString, allocator); + pub fn initFile(file: fs.File, allocator: *std.mem.Allocator) Source { + std.debug.assert(file.contents != null); + return Source{ .path = path, .identifier_name = file.path.name.nonUniqueNameString(allocator) catch unreachable, .contents = file.contents }; + } + + pub fn initPathString(pathString: string, contents: string) Source { + const path = fs.Path.init(pathString); return Source{ .path = path, .identifier_name = path.name.base, .contents = contents }; } @@ -256,7 +267,7 @@ pub const Source = struct { } }; -pub fn rangeData(source: ?Source, r: Range, text: []u8) Data { +fn rangeData(source: ?Source, r: Range, text: []u8) Data { return Data{ .text = text, .location = Location.init_or_nil(source, r) }; } diff --git a/src/main.zig b/src/main.zig index b8ee34f6e..e3619f8fd 100644 --- a/src/main.zig +++ b/src/main.zig @@ -1,7 +1,18 @@ const std = @import("std"); const lex = @import("js_lexer.zig"); +const alloc = @import("alloc.zig"); pub fn main() anyerror!void { - std.log.info("All your codebase are belong to us. {s}", .{lex.Keywords.get("hey")}); + const args = try std.process.argsAlloc(alloc.dynamic); + const stdout = std.io.getStdOut(); + const stderr = std.io.getStdErr(); + if (args.len < 1) { + const len = stderr.write("Pass a file path"); + return; + } + + // const path = try std.fs.realpathAlloc(alloc.dynamic, args[args.len - 1]); + // const file = try std.fs.openFileAbsolute(path, std.fs.File.OpenFlags{}); + // const bytes = try file.readToEndAlloc(alloc.dynamic, std.math.maxInt(usize)); } diff --git a/src/main_wasm.zig b/src/main_wasm.zig new file mode 100644 index 000000000..2adcbdce5 --- /dev/null +++ b/src/main_wasm.zig @@ -0,0 +1,31 @@ +const std = @import("std"); +const lex = @import("js_lexer.zig"); +const logger = @import("logger.zig"); +const alloc = @import("alloc.zig"); + +pub fn main() anyerror!void { + try alloc.setup(std.heap.page_allocator); + // const args = try std.process.argsAlloc(alloc.dynamic); + // // const stdout = std.io.getStdOut(); + // // const stderr = std.io.getStdErr(); + + // // if (args.len < 1) { + // // const len = stderr.write("Pass a file"); + // // return; + // // } + + // // alloc + const msgs = std.ArrayList(logger.Msg).init(alloc.dynamic); + const log = logger.Log{ + .msgs = msgs, + }; + + const source = logger.Source.initPathString("index.js", "for (let i = 0; i < 100; i++) { console.log('hi') aposkdpoaskdpokasdpokasdpokasdpokasdpoaksdpoaksdpoaskdpoaksdpoaksdpoaskdpoaskdpoasdk; }", alloc.dynamic); + + var lexer = try lex.Lexer.init(log, source, alloc.dynamic); + lexer.next(); + while (lexer.token != lex.T.t_end_of_file) { + lexer.next(); + } + const v = try std.io.getStdOut().write("Finished"); +} diff --git a/src/options.zig b/src/options.zig index 3ffd24ab6..f49a42b51 100644 --- a/src/options.zig +++ b/src/options.zig @@ -1,26 +1,74 @@ const std = @import("std"); const log = @import("logger.zig"); +const fs = @import("fs.zig"); + +usingnamespace @import("strings.zig"); + +const assert = std.debug.assert; pub const Loader = enum { jsx, js, ts, tsx, + css, + file, }; +pub const defaultLoaders = std.ComptimeStringMap(Loader, .{ + .{ ".jsx", Loader.jsx }, + .{ ".js", Loader.js }, + .{ ".mjs", Loader.js }, + .{ ".css", Loader.css }, + .{ ".ts", Loader.ts }, + .{ ".tsx", Loader.tsx }, +}); + pub const TransformOptions = struct { - footer: []u8 = "", - banner: []u8 = "", - define: std.StringHashMap([]u8), + footer: []const u8 = "", + banner: []const u8 = "", + define: std.StringHashMap(string), loader: Loader = Loader.tsx, - resolve_dir: []u8 = "/", + resolve_dir: []const u8 = "/", react_fast_refresh: bool = false, - jsx_factory: []u8 = "React.createElement", - jsx_pragma: []u8 = "jsx", - inject: [][]u8, - public_url: []u8, + jsx_factory: []const u8 = "React.createElement", + jsx_pragma: []const u8 = "jsx", + inject: ?[][]const u8 = null, + public_url: []const u8 = "/", filesystem_cache: std.StringHashMap(fs.File), - entry_point: fs.File, + entry_point: *fs.File, + + pub fn initUncached(allocator: *std.mem.Allocator, entryPointName: string, code: string) !TransformOptions { + assert(entryPointName.len > 0); + + const filesystemCache = std.StringHashMap(string).init(allocator); + + var entryPoint = !allocator.Create(fs.file); + entryPoint.path = fs.Path.init(entryPointName, allocator); + entryPoint.contents = code; + + const define = std.StringHashMap(string).init(allocator); + try define.ensureCapacity(1); + + define.putAssumeCapacity("process.env.NODE_ENV", "development"); + + var loader = Loader.file; + if (defaultLoaders.get(entryPoint.path.name.ext)) |defaultLoader| { + loader = defaultLoader; + } + + assert(loader != .file); + assert(code.len > 0); + try filesystemCache.put(entryPointName, entryPoint); + + return TransformOptions{ + .entry_point = entryPoint, + .define = define, + .loader = loader, + .filesystem_cache = filesystemCache, + .resolve_dir = entryPoint.path.name.dir, + }; + } }; pub const OutputFile = struct { diff --git a/src/string_immutable.zig b/src/string_immutable.zig new file mode 100644 index 000000000..7613509c0 --- /dev/null +++ b/src/string_immutable.zig @@ -0,0 +1,28 @@ +const std = @import("std"); +const expect = std.testing.expect; + +usingnamespace @import("string_types.zig"); + +pub fn containsChar(self: string, char: u8) bool { + return std.mem(char) != null; +} + +pub fn indexOfChar(self: string, char: u8) ?usize { + return std.mem.indexOfScalar(@TypeOf(char), self, char); +} + +pub fn lastIndexOfChar(self: string, char: u8) ?usize { + return std.mem.lastIndexOfScalar(u8, self, char); +} + +pub fn lastIndexOf(self: string, str: u8) ?usize { + return std.mem.lastIndexOf(u8, self, str); +} + +pub fn indexOf(self: string, str: u8) ?usize { + return std.mem.indexOf(u8, self, str); +} + +pub fn eql(self: string, other: anytype) bool { + return std.mem.eql(u8, self, other); +} diff --git a/src/string_mutable.zig b/src/string_mutable.zig new file mode 100644 index 000000000..d846bf312 --- /dev/null +++ b/src/string_mutable.zig @@ -0,0 +1,126 @@ +const std = @import("std"); +const expect = std.testing.expect; + +usingnamespace @import("string_types.zig"); + +pub const MutableString = struct { + allocator: *std.mem.Allocator, + list: std.ArrayListUnmanaged(u8), + + pub fn init(allocator: *std.mem.Allocator, capacity: usize) !MutableString { + return MutableString{ .allocator = allocator, .list = try std.ArrayListUnmanaged(u8).initCapacity(allocator, capacity) }; + } + + pub fn initCopy(allocator: *std.mem.Allocator, str: anytype) !MutableString { + var mutable = try MutableString.init(allocator, std.mem.len(str)); + try mutable.copy(str); + return mutable; + } + + // Convert it to an ASCII identifier. Note: If you change this to a non-ASCII + // identifier, you're going to potentially cause trouble with non-BMP code + // points in target environments that don't support bracketed Unicode escapes. + + pub fn ensureValidIdentifier(str: string, allocator: *std.mem.Allocator) !string { + if (str.len == 0) { + return "_"; + } + + var mutable = try MutableString.init(allocator, 0); + + var needsGap = false; + for (str) |c| { + if (std.ascii.isLower(c) or std.ascii.isUpper(c) or (mutable.len() > 0 and std.ascii.isAlNum(c))) { + if (needsGap) { + try mutable.appendChar('_'); + needsGap = false; + } + try mutable.appendChar(c); + } else if (!needsGap) { + needsGap = true; + } + } + + if (mutable.len() > 0) { + return mutable.list.toOwnedSlice(allocator); + } else { + return str; + } + } + + pub fn len(self: *MutableString) usize { + return self.list.items.len; + } + + pub fn copy(self: *MutableString, str: anytype) !void { + try self.list.ensureCapacity(self.allocator, std.mem.len(str[0..])); + + if (self.list.items.len == 0) { + try self.list.insertSlice(self.allocator, 0, str); + } else { + try self.list.replaceRange(self.allocator, 0, std.mem.len(str[0..]), str[0..]); + } + } + + pub fn deinit(self: *MutableString) !void { + self.list.deinit(self.allocator); + } + + pub fn appendChar(self: *MutableString, char: u8) !void { + try self.list.append(self.allocator, char); + } + + pub fn appendCharAssumeCapacity(self: *MutableString, char: u8) !void { + try self.list.appendAssumeCapacity(self.allocator, char); + } + + pub fn append(self: *MutableString, char: []const u8) !void { + try self.list.appendSlice(self.allocator, char); + } + + pub fn appendAssumeCapacity(self: *MutableString, char: []const u8) !void { + try self.list.appendSliceAssumeCapacity(self.allocator, char); + } + + // pub fn deleteAt(self: *MutableString, i: usize) { + // self.list.swapRemove(i); + // } + + pub fn containsChar(self: *MutableString, char: u8) bool { + return self.indexOfChar(char) != null; + } + + pub fn indexOfChar(self: *MutableString, char: u8) ?usize { + return std.mem.indexOfScalar(@TypeOf(char), self.list.items, char); + } + + pub fn lastIndexOfChar(self: *MutableString, char: u8) ?usize { + return std.mem.lastIndexOfScalar(@TypeOf(char), self.list.items, char); + } + + pub fn lastIndexOf(self: *MutableString, str: u8) ?usize { + return std.mem.lastIndexOf(u8, self.list.items, str); + } + + pub fn indexOf(self: *MutableString, str: u8) ?usize { + return std.mem.indexOf(u8, self.list.items, str); + } + + pub fn eql(self: *MutableString, other: anytype) bool { + return std.mem.eql(u8, self.list.items, other); + } +}; + +test "MutableString" { + const alloc = std.heap.page_allocator; + + var str = try MutableString.initCopy(alloc, "hello"); + expect(str.eql("hello")); +} + +test "MutableString.ensureValidIdentifier" { + const alloc = std.heap.page_allocator; + + std.testing.expectEqualStrings("jquery", try MutableString.ensureValidIdentifier("jquery", alloc)); + std.testing.expectEqualStrings("jquery_foo", try MutableString.ensureValidIdentifier("jquery😋foo", alloc)); +} diff --git a/src/string_types.zig b/src/string_types.zig new file mode 100644 index 000000000..1dc6b211a --- /dev/null +++ b/src/string_types.zig @@ -0,0 +1,2 @@ +pub const string = []const u8; +pub const stringMutable = []u8; diff --git a/src/strings.zig b/src/strings.zig index 02ec4180b..2565acc1f 100644 --- a/src/strings.zig +++ b/src/strings.zig @@ -1,8 +1,7 @@ -const std = @import("std"); -pub fn indexOfChar(contents: []u8, char: u8) callconv(.Inline) ?usize { - return std.mem.indexOfScalar(u8, contents, char); -} +const mutable = @import("string_mutable.zig"); -pub fn lastIndexOfChar(contents: []u8, char: u8) callconv(.Inline) ?usize { - return std.mem.lastIndexOfScalar(u8, contents, char); -} +pub usingnamespace @import("string_types.zig"); + +pub const strings = @import("string_immutable.zig"); + +pub const MutableString = mutable.MutableString; |