WIP

author: Jarred Sumner <709451+Jarred-Sumner@users.noreply.github.com> 2023-02-05 02:26:20 -0800
committer: Jarred Sumner <709451+Jarred-Sumner@users.noreply.github.com> 2023-02-05 02:26:20 -0800
commit: f913a468c2d9d57e6e21ed9f5b07d6cce12cc09b (patch)
tree: c058ac9f09f58bed54997dad1753d77a18dcde83
parent: bd4d8bdb6a7fa3a67a0e7efee3fa375b831317b7 (diff)
download: bun-f913a468c2d9d57e6e21ed9f5b07d6cce12cc09b.tar.gz
bun-f913a468c2d9d57e6e21ed9f5b07d6cce12cc09b.tar.zst
bun-f913a468c2d9d57e6e21ed9f5b07d6cce12cc09b.zip
4 files changed, 304 insertions, 67 deletions
diff --git a/src/bit_set.zig b/src/bit_set.zig
index f334ae826..f3ca18df7 100644
--- a/src/bit_set.zig
+++ b/src/bit_set.zig
@@ -781,10 +781,8 @@ pub const DynamicBitSetUnmanaged = struct {
         return (self.masks[maskIndex(index)] & maskBit(index)) != 0;
     }
 
-    pub fn byteRange(self: Self, start: usize, len: usize) []const u8 {
-        assert(start < self.bit_length);
-        const offset = maskIndex(start);
-        return std.mem.sliceAsBytes(self.masks[offset .. offset + len]);
+    pub fn bytes(self: Self) []const u8 {
+        return std.mem.sliceAsBytes(self.masks[0 .. numMasks(self.bit_length) + 1]);
     }
 
     /// Returns the total number of set bits in this bit set.
@@ -1056,6 +1054,62 @@ pub const DynamicBitSetUnmanaged = struct {
     }
 };
 
+pub const AutoBitSet = union(enum) {
+    const Static = ArrayBitSet(usize, (@bitSizeOf(DynamicBitSetUnmanaged) - 1));
+
+    static: Static,
+    dynamic: DynamicBitSetUnmanaged,
+
+    pub fn needsDynamic(bit_length: usize) bool {
+        return bit_length > Static.bit_length;
+    }
+
+    pub fn initEmpty(allocator: Allocator, bit_length: usize) !AutoBitSet {
+        if (bit_length <= Static.bit_length) {
+            return AutoBitSet{ .static = Static.initEmpty() };
+        } else {
+            return AutoBitSet{ .dynamic = try DynamicBitSetUnmanaged.initEmpty(allocator, bit_length) };
+        }
+    }
+
+    pub fn isSet(this: *const AutoBitSet, index: usize) bool {
+        return switch (std.meta.activeTag(this.*)) {
+            .static => this.static.isSet(index),
+            .dynamic => this.dynamic.*.isSet(index),
+        };
+    }
+
+    pub fn clone(this: *const AutoBitSet, allocator: std.mem.Allocator) AutoBitSet {
+        return switch (std.meta.activeTag(this.*)) {
+            .static => AutoBitSet{ .static = this.static },
+            .dynamic => AutoBitSet{ .dynamic = this.dynamic.*.clone(allocator) },
+        };
+    }
+
+    pub fn set(this: *const AutoBitSet, index: usize) void {
+        switch (std.meta.activeTag(this.*)) {
+            .static => this.static.set(index),
+            .dynamic => this.dynamic.set(index),
+        }
+    }
+
+    pub fn bytes(this: *const AutoBitSet) []const u8 {
+        return switch (std.meta.activeTag(this.*)) {
+            .static => std.mem.asBytes(&this.static.masks),
+            .dynamic => this.dynamic.*.bytes(),
+        };
+    }
+
+    pub fn deinit(this: *AutoBitSet, allocator: std.mem.Allocator) void {
+        switch (std.meta.activeTag(this.*)) {
+            .static => {},
+            .dynamic => {
+                this.dynamic.deinit(allocator);
+            },
+        }
+    }
+};
+
 /// A bit set with runtime-known size, backed by an allocated slice
 /// of usize.  Thin wrapper around DynamicBitSetUnmanaged which keeps
 /// track of the allocator instance.
diff --git a/src/bun.zig b/src/bun.zig
index a9528aa64..2845721c7 100644
--- a/src/bun.zig
+++ b/src/bun.zig
@@ -444,6 +444,11 @@ pub fn hash(content: []const u8) u64 {
     return std.hash.Wyhash.hash(0, content);
 }
 
+pub fn hash32(content: []const u8) u32 {
+    const res = hash(content);
+    return @truncate(u32, res);
+}
+
 pub const HiveArray = @import("./hive_array.zig").HiveArray;
 
 pub fn rand(bytes: []u8) void {
@@ -606,6 +611,20 @@ pub const StringArrayHashMapContext = struct {
     pub fn eql(_: @This(), a: []const u8, b: []const u8, _: usize) bool {
         return strings.eqlLong(a, b, true);
     }
+
+    pub const Prehashed = struct {
+        value: u32,
+        input: []const u8,
+        pub fn hash(this: @This(), s: []const u8) u32 {
+            if (s.ptr == this.input.ptr and s.len == this.input.len)
+                return this.value;
+            return @truncate(u32, std.hash.Wyhash.hash(0, s));
+        }
+
+        pub fn eql(_: @This(), a: []const u8, b: []const u8) bool {
+            return strings.eqlLong(a, b, true);
+        }
+    };
 };
 
 pub const StringHashMapContext = struct {
@@ -953,3 +972,5 @@ pub const fast_debug_build_mode = false and
     Environment.isDebug;
 
 pub const MultiArrayList = @import("./multi_array_list.zig").MultiArrayList;
+
+pub const Joiner = @import("./string_joiner.zig");
diff --git a/src/bundler/bundle_v2.zig b/src/bundler/bundle_v2.zig
index 26c335d60..ebb63e589 100644
--- a/src/bundler/bundle_v2.zig
+++ b/src/bundler/bundle_v2.zig
@@ -69,6 +69,10 @@ const Batcher = bun.Batcher;
 const Symbol = js_ast.Symbol;
 const EventLoop = bun.JSC.AnyEventLoop;
 const MultiArrayList = bun.MultiArrayList;
+const Stmt = js_ast.Stmt;
+const Expr = js_ast.Expr;
+const Binding = js_ast.Binding;
+const AutoBitSet = bun.bit_set.AutoBitSet;
 
 pub const ThreadPool = struct {
     pool: ThreadPoolLib = undefined,
@@ -331,7 +335,6 @@ pub const BundleV2 = struct {
                 .loop = event_loop,
                 .graph = .{
                     .allocator = undefined,
-                    .file_entry_bits = undefined,
                 },
             },
         };
@@ -1246,55 +1249,6 @@ const EntryPoint = struct {
     };
 };
 
-/// Two-dimensional bitset
-/// Quickly lets us know which files are visible for which entry points
-const Bitmap = struct {
-    bitset: bun.bit_set.DynamicBitSetUnmanaged = undefined,
-    file_count: usize = 0,
-    entry_point_count: usize = 0,
-
-    pub fn init(file_count: usize, entry_point_count: usize, allocator: std.mem.Allocator) !Bitmap {
-        return Bitmap{
-            .file_count = file_count,
-            .entry_point_count = entry_point_count,
-            .bitset = try bun.bit_set.DynamicBitSetUnmanaged.initEmpty(allocator, file_count * entry_point_count * entry_point_count),
-        };
-    }
-
-    pub fn isSet(this: *Bitmap, file_id: usize, entry_point_id: usize) bool {
-        return this.bitset.isSet((this.file_count * file_id) + entry_point_id);
-    }
-
-    pub fn set(this: *Bitmap, file_id: usize, entry_point_id: usize) void {
-        this.bitset.set((this.file_count * file_id) + entry_point_id);
-    }
-
-    pub fn fileEntry(this: *Bitmap, file_id: usize) []const u8 {
-        return this.bitset.byteRange((this.file_count * file_id), this.entry_point_count);
-    }
-
-    pub fn setter(this: *Bitmap, file_id: usize) Setter {
-        return Setter{
-            .offset = file_id * this.file_count,
-            .bitset = this.bitset,
-        };
-    }
-
-    // turn add add and a multiply into an add
-    pub const Setter = struct {
-        offset: usize = 0,
-        bitset: bun.bit_set.DynamicBitSetUnmanaged,
-
-        pub fn isSet(this: Bitmap.Setter, x: usize) bool {
-            return this.bitset.isSet(this.offset + x);
-        }
-
-        pub fn set(this: Bitmap.Setter, x: usize) void {
-            this.bitset.set(this.offset + x);
-        }
-    };
-};
-
 const AstSourceIDMapping = struct {
     id: Index.Int,
     source_index: Index.Int,
@@ -1321,12 +1275,8 @@ const LinkerGraph = struct {
 
     stable_source_indices: []const u32 = &[_]u32{},
 
-    // This holds all entry points that can reach a file
-    // it is a 2 dimensional bitset
-    file_entry_bits: Bitmap,
-
-    pub fn init(allocator: std.mem.Allocator, file_count: usize, entry_point_count: usize) !LinkerGraph {
-        return LinkerGraph{ .allocator = allocator, .files_live = try bun.bit_set.DynamicBitSetUnmanaged.initEmpty(allocator, file_count), .file_entry_bits = try Bitmap.init(file_count, entry_point_count, allocator) };
+    pub fn init(allocator: std.mem.Allocator, file_count: usize) !LinkerGraph {
+        return LinkerGraph{ .allocator = allocator, .files_live = try bun.bit_set.DynamicBitSetUnmanaged.initEmpty(allocator, file_count) };
     }
 
     pub fn generateNewSymbol(this: *LinkerGraph, source_index: u32, kind: Symbol.Kind, original_name: string) Ref {
@@ -1465,8 +1415,6 @@ const LinkerGraph = struct {
     }
 
     pub fn load(this: *LinkerGraph, entry_points: []const Index, sources: []const Logger.Source) !void {
-        this.file_entry_bits = try Bitmap.init(sources.len, entry_points.len, this.allocator);
-
         try this.files.ensureTotalCapacity(this.allocator, sources.len);
         this.files.zero();
         this.files_live = try bun.bit_set.DynamicBitSetUnmanaged.initEmpty(
@@ -1525,6 +1473,8 @@ const LinkerGraph = struct {
     }
 
     pub const File = struct {
+        entry_bits: AutoBitSet = undefined,
+
         input_file: Index = Index.source(0),
 
         /// The minimum number of links in the module graph to get from an entry point
@@ -1565,11 +1515,11 @@ const LinkerContext = struct {
     cycle_detector: std.ArrayList(ImportTracker) = undefined,
     swap_cycle_detector: std.ArrayList(ImportTracker) = undefined,
 
-    // We may need to refer to the "__esm" and/or "__commonJS" runtime symbols
+    /// We may need to refer to the "__esm" and/or "__commonJS" runtime symbols
     cjs_runtime_ref: Ref = Ref.None,
     esm_runtime_ref: Ref = Ref.None,
 
-    // We may need to refer to the CommonJS "module" symbol for exports
+    /// We may need to refer to the CommonJS "module" symbol for exports
     unbound_module_ref: Ref = Ref.None,
 
     options: LinkerOptions = LinkerOptions{},
@@ -1613,7 +1563,7 @@ const LinkerContext = struct {
         this.ambiguous_result_pool = std.ArrayList(MatchImport).init(this.allocator);
     }
 
-    pub fn link(this: *LinkerContext, bundle: *BundleV2, entry_points: []Index, reachable: []Index) !void {
+    pub noinline fn link(this: *LinkerContext, bundle: *BundleV2, entry_points: []Index, reachable: []Index) !void {
         try this.load(bundle, entry_points, reachable);
 
         try this.scanImportsAndExports();
@@ -1625,9 +1575,90 @@ const LinkerContext = struct {
 
         try this.treeShakingAndCodeSplitting();
 
+        const chunks = try this.computeChunks();
+
         this.graph.symbols.followAll();
     }
 
+    pub noinline fn computeChunks(this: *LinkerContext) ![]Chunk {
+        var stack_fallback = std.heap.stackFallback(4096, this.allocator);
+        var stack_all = stack_fallback.get();
+        var arena = std.heap.ArenaAllocator.init(stack_all);
+        defer arena.deinit();
+
+        var temp_allocator = arena.allocator();
+        var js_chunks = bun.StringArrayHashMap(Chunk).init(this.allocator);
+        try js_chunks.ensureUnusedCapacity(this.graph.entry_points.len);
+
+        var entry_source_indices = this.graph.entry_points.items(.source_index);
+
+        // Create chunks for entry points
+        for (entry_source_indices) |source_index, entry_id_| {
+            const entry_bit = @truncate(Chunk.EntryPoint.ID, entry_id_);
+
+            var entry_bits = try bun.bit_set.DynamicBitSetUnmanaged.initEmpty(this.allocator, this.graph.entry_points.len);
+            entry_bits.set(entry_bit);
+
+            // Create a chunk for the entry point here to ensure that the chunk is
+            // always generated even if the resulting file is empty
+            var js_chunk_entry = try js_chunks.getOrPut(try temp_allocator.dupe(u8, entry_bits.bytes()));
+
+            js_chunk_entry.value_ptr.* = .{
+                .entry_point = .{
+                    .id = entry_bit,
+                    .source_index = source_index,
+                    .is_entry_point = true,
+                },
+                .entry_bits = entry_bits,
+                .source_index = source_index.get(),
+                .content = .{
+                    .javascript = .{},
+                },
+            };
+        }
+        var file_entry_bits: []AutoBitSet = this.graph.files.items(.entry_bits);
+
+        // Figure out which JS files are in which chunk
+        for (this.graph.reachable_files) |source_index| {
+            if (this.graph.files_live.isSet(source_index.get())) {
+                var entry_bits: *AutoBitSet = &file_entry_bits[source_index.get()];
+                var js_chunk_entry = try js_chunks.getOrPut(
+                    try temp_allocator.dupe(u8, entry_bits.bytes()),
+                );
+
+                if (!js_chunk_entry.found_existing) {
+                    js_chunk_entry.value_ptr.* = .{
+                        .entry_bits = try entry_bits.clone(this.allocator),
+                        .source_index = source_index.get(),
+                        .content = .{
+                            .javascript = .{},
+                        },
+                    };
+                }
+
+                try js_chunk_entry.value_ptr.files_with_parts_in_chunk.getOrPut(this.allocator, source_index.get());
+            }
+        }
+
+        js_chunks.sort(strings.StringArrayByIndexSorter.init(js_chunks.keys()));
+
+        var chunks: []Chunk = js_chunks.values();
+
+        var entry_point_chunk_indices: []u32 = this.graph.files.items(.entry_point_chunk_index);
+        // Map from the entry point file to this chunk. We will need this later if
+        // a file contains a dynamic import to this entry point, since we'll need
+        // to look up the path for this chunk to use with the import.
+        for (chunks) |*chunk, chunk_id| {
+            if (chunk.entry_point.is_entry_point) {
+                entry_point_chunk_indices[chunk.entry_point.source_index] = @truncate(u32, chunk_id);
+            }
+        }
+
+        // TODO: -- Rest of this function ---
+
+        return chunks;
+    }
+
     pub fn scanImportsAndExports(this: *LinkerContext) !void {
         const reachable = this.graph.reachable_files;
         const output_format = this.options.output_format;
@@ -2758,6 +2789,12 @@ const LinkerContext = struct {
         // has to happen after tree shaking because there is an implicit dependency
         // between live parts within the same file. All liveness has to be computed
         // first before determining which entry points can reach which files.
+        var file_entry_bits: []AutoBitSet = c.graph.files.items(.entry_bits);
+        if (AutoBitSet.needsDynamic(entry_points.len)) {
+            for (file_entry_bits) |*bits| {
+                bits.* = try AutoBitSet.initEmpty(c.allocator, entry_points.len);
+            }
+        }
 
         for (entry_points) |entry_point, i| {
             c.markFileReachableForCodeSplitting(
@@ -2767,6 +2804,7 @@ const LinkerContext = struct {
                 0,
                 parts,
                 import_records,
+                file_entry_bits,
             );
         }
     }
@@ -2779,6 +2817,7 @@ const LinkerContext = struct {
         distance: u32,
         parts: []bun.BabyList(js_ast.Part),
         import_records: []bun.BabyList(bun.ImportRecord),
+        file_entry_bits: []AutoBitSet,
     ) void {
         if (!c.graph.files_live.isSet(source_index))
             return;
@@ -2791,10 +2830,10 @@ const LinkerContext = struct {
         const out_dist = distance + 1;
 
         // Don't mark this file more than once
-        if (c.graph.file_entry_bits.isSet(source_index, entry_points_count) and !traverse_again)
+        if (file_entry_bits[source_index].isSet(source_index, entry_points_count) and !traverse_again)
             return;
 
-        c.graph.file_entry_bits.set(source_index, entry_points_count);
+        file_entry_bits[source_index].set(source_index, entry_points_count);
 
         if (comptime bun.Environment.allow_assert)
             debug(
@@ -2816,6 +2855,7 @@ const LinkerContext = struct {
                     out_dist,
                     parts,
                     import_records,
+                    file_entry_bits,
                 );
             }
         }
@@ -2830,6 +2870,7 @@ const LinkerContext = struct {
                         out_dist,
                         parts,
                         import_records,
+                        file_entry_bits,
                     );
                 }
             }
@@ -3787,3 +3828,111 @@ pub const ImportTracker = struct {
         tracker: *ImportTracker,
     };
 };
+
+pub const PathTemplate = struct {
+    data: string = "",
+    placeholder: Placeholder,
+
+    pub const Placeholder = struct {
+        dir: []const u8 = null,
+        name: []const u8 = null,
+        ext: []const u8 = null,
+        hash: []const u8 = null,
+    };
+};
+
+pub const Chunk = struct {
+    /// This is a random string and is used to represent the output path of this
+    /// chunk before the final output path has been computed.
+    unique_key: string = "",
+
+    files_with_parts_in_chunk: std.AutoArrayHashMapUnmanaged(Index.Int, void) = .{},
+    entry_bits: AutoBitSet = .{},
+
+    final_rel_path: string = "",
+
+    /// For code splitting
+    cross_chunk_imports: []Import = &.{},
+
+    content: Content,
+
+    entry_point: Chunk.EntryPoint = .{},
+
+    is_executable: bool = false,
+
+    pub const IntermediateOutput = union(enum) {
+        /// If the chunk has references to other chunks, then "pieces" contains the
+        /// contents of the chunk. Another joiner
+        /// will have to be constructed later when merging the pieces together.
+        pieces: bun.BabyList(OutputPiece),
+
+        /// If the chunk doesn't have any references to other chunks, then
+        /// "joiner" contains the contents of the chunk. This is more efficient
+        /// because it avoids doing a join operation twice.
+        joiner: *bun.Joiner,
+    };
+
+    pub const OutputPiece = struct {
+        // layed out like this so it takes up the same amount of space as a []const u8
+        data_ptr: [*]const u8 = undefined,
+        data_len: u32 = 0,
+
+        index: OutputPieceIndex = .{},
+
+        pub inline fn data(this: OutputPiece) []const u8 {
+            return this.data_ptr[0..this.data_len];
+        }
+    };
+
+    pub const OutputPieceIndex = packed struct {
+        index: u30 = 0,
+
+        kind: Kind = Kind.none,
+
+        pub const Kind = enum(u2) {
+            /// The "kind" may be "none" in which case there is one piece
+            /// with data and no chunk index. For example, the chunk may not contain any
+            /// imports.
+            none,
+
+            asset,
+            chunk,
+        };
+    };
+
+    pub const EntryPoint = packed struct(u64) {
+        source_index: Index.Int = 0,
+        entry_point_id: ID = 0,
+        is_entry_point: bool = false,
+
+        // so it fits in a 64-bit integer
+        pub const ID = u31;
+    };
+
+    pub const Import = struct {
+        chunk_index: Index.Int = 0,
+        import_kind: ImportKind,
+    };
+
+    pub const CrossChunkImportItem = struct {
+        export_alias: string = "",
+        ref: Ref = Ref.None,
+
+        pub const List = []CrossChunkImportItem;
+    };
+
+    pub const JavaScriptChunk = struct {
+        files_in_chunk_order: []const Index.Int = &.{},
+        parts_in_chunk_in_order: []const PartRange = &.{},
+
+        // for code splitting
+        exports_to_other_chunks: std.ArrayHashMapUnmanaged(Ref, string, Ref.ArrayHashCtx, false) = .{},
+        imports_from_other_chunks: std.ArrayHashMapUnmanaged(Index.Int, CrossChunkImportItem.List, Index.ArrayHashCtx, false) = .{},
+        cross_chunk_prefix_stmts: BabyList(Stmt) = .{},
+        cross_chunk_suffix_stmts: BabyList(Stmt) = .{},
+    };
+
+    pub const Content = union(enum) {
+        javascript: JavaScriptChunk,
+    };
+};
diff --git a/src/string_immutable.zig b/src/string_immutable.zig
index caf09ae02..d24edb99a 100644
--- a/src/string_immutable.zig
+++ b/src/string_immutable.zig
@@ -3815,6 +3815,19 @@ pub fn sortDesc(in: []string) void {
     std.sort.sort([]const u8, in, {}, cmpStringsDesc);
 }
 
+pub const StringArrayByIndexSorter = struct {
+    keys: []const []const u8,
+    pub fn lessThan(sorter: *@This(), a: usize, b: usize) bool {
+        return strings.order(sorter.keys[a], sorter.keys[b]) == .lt;
+    }
+
+    pub fn init(keys: []const []const u8) @This() {
+        return .{
+            .keys = keys,
+        };
+    }
+};
+
 pub fn isASCIIHexDigit(c: u8) bool {
     return std.ascii.isHex(c);
 }
author	Jarred Sumner <709451+Jarred-Sumner@users.noreply.github.com>	2023-02-05 02:26:20 -0800
committer	Jarred Sumner <709451+Jarred-Sumner@users.noreply.github.com>	2023-02-05 02:26:20 -0800
commit	f913a468c2d9d57e6e21ed9f5b07d6cce12cc09b (patch)
tree	c058ac9f09f58bed54997dad1753d77a18dcde83
parent	bd4d8bdb6a7fa3a67a0e7efee3fa375b831317b7 (diff)
download	bun-f913a468c2d9d57e6e21ed9f5b07d6cce12cc09b.tar.gz bun-f913a468c2d9d57e6e21ed9f5b07d6cce12cc09b.tar.zst bun-f913a468c2d9d57e6e21ed9f5b07d6cce12cc09b.zip