diff options
Diffstat (limited to 'src/renamer.zig')
-rw-r--r-- | src/renamer.zig | 392 |
1 files changed, 392 insertions, 0 deletions
diff --git a/src/renamer.zig b/src/renamer.zig index 777b3beb5..2775986ff 100644 --- a/src/renamer.zig +++ b/src/renamer.zig @@ -11,6 +11,7 @@ const default_allocator = bun.default_allocator; const C = bun.C; const std = @import("std"); const Ref = @import("./ast/base.zig").Ref; +const RefCtx = @import("./ast/base.zig").RefCtx; const logger = @import("root").bun.logger; const JSLexer = @import("./js_lexer.zig"); @@ -48,6 +49,7 @@ pub const NoOpRenamer = struct { pub const Renamer = union(enum) { NumberRenamer: *NumberRenamer, NoOpRenamer: *NoOpRenamer, + MinifyRenamer: *MinifyRenamer, pub fn symbols(this: Renamer) js_ast.Symbol.Map { return switch (this) { @@ -70,11 +72,394 @@ pub const Renamer = union(enum) { pub fn deinit(renamer: Renamer) void { switch (renamer) { .NumberRenamer => |r| r.deinit(), + .MinifyRenamer => |r| r.deinit(), else => {}, } } }; +pub const SymbolSlot = struct { + // Most minified names are under 15 bytes + // Instead of allocating a string for every symbol slot + // We can store the string inline! + // But we have to be very careful of where it's used. + // Or we WILL run into memory bugs. + name: TinyString = TinyString{ .string = "" }, + count: u32 = 0, + needs_capital_for_jsx: bool = false, + + pub const List = std.EnumArray(js_ast.Symbol.SlotNamespace, std.ArrayList(SymbolSlot)); + + pub const InlineString = struct { + bytes: [15]u8 = [_]u8{0} ** 15, + len: u8 = 0, + + pub fn init(str: []const u8) InlineString { + var this: InlineString = .{}; + this.len = @intCast(u8, @min(str.len, 15)); + for (this.bytes[0..this.len], str[0..this.len]) |*b, c| { + b.* = c; + } + return this; + } + + // do not make this *const or you will run into memory bugs. + // we cannot let the compiler decide to copy this struct because + // that would cause this to become a pointer to stack memory. + pub fn slice(this: *InlineString) string { + return this.bytes[0..this.len]; + } + }; + + pub const TinyString = union(enum) { + inline_string: InlineString, + string: string, + + pub fn init(input: string, allocator: std.mem.Allocator) !TinyString { + if (input.len <= 15) { + return TinyString{ .inline_string = InlineString.init(input) }; + } else { + return TinyString{ .string = try allocator.dupe(u8, input) }; + } + } + + // do not make this *const or you will run into memory bugs. + // we cannot let the compiler decide to copy this struct because + // that would cause this to become a pointer to stack memory. + pub fn slice(this: *TinyString) string { + return switch (this.*) { + .inline_string => this.inline_string.slice(), + .string => this.string, + }; + } + }; +}; + +pub const MinifyRenamer = struct { + reserved_names: bun.StringHashMapUnmanaged(u32), + allocator: std.mem.Allocator, + slots: SymbolSlot.List = undefined, + top_level_symbol_to_slot: TopLevelSymbolSlotMap, + symbols: js_ast.Symbol.Map, + + pub const TopLevelSymbolSlotMap = std.HashMapUnmanaged(Ref, usize, RefCtx, 80); + + pub fn init( + allocator: std.mem.Allocator, + symbols: js_ast.Symbol.Map, + first_top_level_slots: js_ast.SlotCounts, + reserved_names: bun.StringHashMapUnmanaged(u32), + ) !*MinifyRenamer { + var renamer = try allocator.create(MinifyRenamer); + var slots = SymbolSlot.List.initUndefined(); + + for (first_top_level_slots.slots.values, 0..) |count, ns| { + slots.values[ns] = try std.ArrayList(SymbolSlot).initCapacity(allocator, count); + slots.values[ns].items.len = count; + std.mem.set(SymbolSlot, slots.values[ns].items[0..count], SymbolSlot{}); + } + + renamer.* = MinifyRenamer{ + .symbols = symbols, + .reserved_names = reserved_names, + .slots = slots, + .top_level_symbol_to_slot = TopLevelSymbolSlotMap{}, + .allocator = allocator, + }; + + return renamer; + } + + pub fn deinit(this: *MinifyRenamer) void { + _ = this; + } + + pub fn toRenamer(this: *MinifyRenamer) Renamer { + return .{ + .MinifyRenamer = this, + }; + } + + pub fn nameForSymbol(this: *MinifyRenamer, _ref: Ref) string { + const ref = this.symbols.follow(_ref); + const symbol = this.symbols.get(ref).?; + + const ns = symbol.slotNamespace(); + if (ns == .must_not_be_renamed) { + return symbol.original_name; + } + + const i = symbol.nestedScopeSlot() orelse + this.top_level_symbol_to_slot.get(ref) orelse + return symbol.original_name; + + // This has to be a pointer because the string might be stored inline + return this.slots.getPtr(ns).items[i].name.slice(); + } + + pub fn originalName(this: *MinifyRenamer, ref: Ref) ?string { + _ = ref; + _ = this; + return null; + } + + pub fn accumulateSymbolUseCounts( + this: *MinifyRenamer, + top_level_symbols: *StableSymbolCount.Array, + symbol_uses: js_ast.Part.SymbolUseMap, + stable_source_indices: []const u32, + ) !void { + // NOTE: This function is run in parallel. Make sure to avoid data races. + var iter = symbol_uses.iterator(); + while (iter.next()) |value| { + try this.accumulateSymbolUseCount(top_level_symbols, value.key_ptr.*, value.value_ptr.*.count_estimate, stable_source_indices); + } + } + + pub fn accumulateSymbolUseCount( + this: *MinifyRenamer, + top_level_symbols: *StableSymbolCount.Array, + _ref: Ref, + count: u32, + stable_source_indices: []const u32, + ) !void { + var ref = this.symbols.follow(_ref); + var symbol = this.symbols.get(ref).?; + + while (symbol.namespace_alias != null) { + const ref_ = this.symbols.follow(symbol.namespace_alias.?.namespace_ref); + if (ref_.eql(ref)) break; + ref = ref_; + symbol = this.symbols.get(ref_).?; + } + + const ns = symbol.slotNamespace(); + if (ns == .must_not_be_renamed) return; + + if (symbol.nestedScopeSlot()) |i| { + var slot = &this.slots.getPtr(ns).items[i]; + slot.count += count; + if (symbol.must_start_with_capital_letter_for_jsx) { + slot.needs_capital_for_jsx = true; + } + return; + } + + try top_level_symbols.append(StableSymbolCount{ + .stable_source_index = stable_source_indices[ref.sourceIndex()], + .ref = ref, + .count = count, + }); + } + + pub fn allocateTopLevelSymbolSlots(this: *MinifyRenamer, top_level_symbols: StableSymbolCount.Array) !void { + for (top_level_symbols.items) |stable| { + const symbol = this.symbols.get(stable.ref).?; + var slots = this.slots.getPtr(symbol.slotNamespace()); + + var existing = try this.top_level_symbol_to_slot.getOrPut(this.allocator, stable.ref); + if (existing.found_existing) { + var slot = &slots.items[existing.value_ptr.*]; + slot.count += stable.count; + if (symbol.must_start_with_capital_letter_for_jsx) { + slot.needs_capital_for_jsx = true; + } + } else { + existing.value_ptr.* = slots.items.len; + try slots.append(SymbolSlot{ + .count = stable.count, + .needs_capital_for_jsx = symbol.must_start_with_capital_letter_for_jsx, + }); + } + } + } + + pub fn assignNamesByFrequency(this: *MinifyRenamer, name_minifier: *js_ast.NameMinifier) !void { + var name_buf = try std.ArrayList(u8).initCapacity(this.allocator, 64); + defer name_buf.deinit(); + + var sorted = SlotAndCount.Array.init(this.allocator); + defer sorted.deinit(); + + inline for (comptime std.enums.values(js_ast.Symbol.SlotNamespace)) |ns| { + var slots = this.slots.getPtr(ns); + sorted.clearRetainingCapacity(); + try sorted.ensureUnusedCapacity(slots.items.len); + sorted.items.len = slots.items.len; + + for (sorted.items, 0..) |*slot, i| { + slot.* = SlotAndCount{ + .slot = @intCast(u32, i), + .count = slot.count, + }; + } + std.sort.sort(SlotAndCount, sorted.items, {}, SlotAndCount.lessThan); + + var next_name: isize = 0; + + for (sorted.items) |data| { + var slot = &slots.items[data.slot]; + + try name_minifier.numberToMinifiedName(&name_buf, next_name); + next_name += 1; + + // Make sure we never generate a reserved name. We only have to worry + // about collisions with reserved identifiers for normal symbols, and we + // only have to worry about collisions with keywords for labels. We do + // not have to worry about either for private names because they start + // with a "#" character. + switch (comptime ns) { + .default => { + while (this.reserved_names.contains(name_buf.items)) { + try name_minifier.numberToMinifiedName(&name_buf, next_name); + next_name += 1; + } + + if (slot.needs_capital_for_jsx) { + while (name_buf.items[0] >= 'a' and name_buf.items[0] <= 'z') { + try name_minifier.numberToMinifiedName(&name_buf, next_name); + next_name += 1; + } + } + }, + .label => { + while (JSLexer.Keywords.get(name_buf.items)) |_| { + try name_minifier.numberToMinifiedName(&name_buf, next_name); + next_name += 1; + } + }, + .private_name => { + try name_buf.insert(0, '#'); + }, + else => {}, + } + + slot.name = SymbolSlot.TinyString.init(name_buf.items, this.allocator) catch unreachable; + } + } + } +}; + +pub fn assignNestedScopeSlots(allocator: std.mem.Allocator, module_scope: *js_ast.Scope, symbols: []js_ast.Symbol) js_ast.SlotCounts { + var slot_counts = js_ast.SlotCounts{}; + var sorted_members = std.ArrayList(u32).init(allocator); + defer sorted_members.deinit(); + + // Temporarily set the nested scope slots of top-level symbols to valid so + // they aren't renamed in nested scopes. This prevents us from accidentally + // assigning nested scope slots to variables declared using "var" in a nested + // scope that are actually hoisted up to the module scope to become a top- + // level symbol. + const valid_slot: u32 = 0; + var members = module_scope.members.valueIterator(); + while (members.next()) |member| { + symbols[member.ref.innerIndex()].nested_scope_slot = valid_slot; + } + for (module_scope.generated.slice()) |ref| { + symbols[ref.innerIndex()].nested_scope_slot = valid_slot; + } + + for (module_scope.children.slice()) |child| { + slot_counts.unionMax(assignNestedScopeSlotsHelper(&sorted_members, child, symbols, js_ast.SlotCounts{})); + } + + // Then set the nested scope slots of top-level symbols back to zero. Top- + // level symbols are not supposed to have nested scope slots. + members = module_scope.members.valueIterator(); + + while (members.next()) |member| { + symbols[member.ref.innerIndex()].nested_scope_slot = js_ast.Symbol.invalid_nested_scope_slot; + } + for (module_scope.generated.slice()) |ref| { + symbols[ref.innerIndex()].nested_scope_slot = js_ast.Symbol.invalid_nested_scope_slot; + } + + return slot_counts; +} + +pub fn assignNestedScopeSlotsHelper(sorted_members: *std.ArrayList(u32), scope: *js_ast.Scope, symbols: []js_ast.Symbol, slot_to_copy: js_ast.SlotCounts) js_ast.SlotCounts { + var slot = slot_to_copy; + // Sort member map keys for determinism + { + sorted_members.clearRetainingCapacity(); + sorted_members.ensureUnusedCapacity(scope.members.count()) catch unreachable; + sorted_members.items.len = scope.members.count(); + var sorted_members_buf = sorted_members.items; + var members = scope.members.valueIterator(); + var i: usize = 0; + while (members.next()) |member| { + sorted_members_buf[i] = member.ref.innerIndex(); + i += 1; + } + std.sort.sort(u32, sorted_members_buf, {}, std.sort.asc(u32)); + + // Assign slots for this scope's symbols. Only do this if the slot is + // not already assigned. Nested scopes have copies of symbols from parent + // scopes and we want to use the slot from the parent scope, not child scopes. + for (sorted_members_buf) |inner_index| { + var symbol = &symbols[inner_index]; + const ns = symbol.slotNamespace(); + if (ns != .must_not_be_renamed and symbol.nestedScopeSlot() == null) { + symbol.nested_scope_slot = slot.slots.get(ns); + slot.slots.getPtr(ns).* += 1; + } + } + } + + for (scope.generated.slice()) |ref| { + var symbol = &symbols[ref.innerIndex()]; + const ns = symbol.slotNamespace(); + if (ns != .must_not_be_renamed and symbol.nestedScopeSlot() == null) { + symbol.nested_scope_slot = slot.slots.get(ns); + slot.slots.getPtr(ns).* += 1; + } + } + + // Labels are always declared in a nested scope, so we don't need to check. + if (scope.label_ref) |ref| { + var symbol = &symbols[ref.innerIndex()]; + const ns = js_ast.Symbol.SlotNamespace.label; + symbol.nested_scope_slot = slot.slots.get(ns); + slot.slots.getPtr(ns).* += 1; + } + + // Assign slots for the symbols of child scopes + var slot_counts = slot; + for (scope.children.slice()) |child| { + slot_counts.unionMax(assignNestedScopeSlotsHelper(sorted_members, child, symbols, slot)); + } + + return slot_counts; +} + +pub const StableSymbolCount = struct { + stable_source_index: u32, + ref: Ref, + count: u32, + + pub const Array = std.ArrayList(StableSymbolCount); + + pub fn lessThan(_: void, i: StableSymbolCount, j: StableSymbolCount) bool { + if (i.count > j.count) return true; + if (i.count < j.count) return false; + if (i.stable_source_index < j.stable_source_index) return true; + if (i.stable_source_index > j.stable_source_index) return false; + + return i.ref.innerIndex() < j.ref.innerIndex(); + } +}; + +const SlotAndCount = packed struct { + slot: u32, + count: u32, + + pub const Array = std.ArrayList(SlotAndCount); + + pub fn lessThan(_: void, a: SlotAndCount, b: SlotAndCount) bool { + return a.count > b.count or (a.count == b.count and a.slot < b.slot); + } +}; + pub const NumberRenamer = struct { symbols: js_ast.Symbol.Map, names: []bun.BabyList(string) = &.{}, @@ -429,6 +814,7 @@ pub const NumberRenamer = struct { pub const ExportRenamer = struct { string_buffer: bun.MutableString, used: bun.StringHashMap(u32), + count: isize = 0, pub fn init(allocator: std.mem.Allocator) ExportRenamer { return ExportRenamer{ @@ -474,6 +860,12 @@ pub const ExportRenamer = struct { return entry.key_ptr.*; } + + pub fn nextMinifiedName(this: *ExportRenamer, allocator: std.mem.Allocator) !string { + const name = try js_ast.NameMinifier.defaultNumberToMinifiedName(allocator, this.count); + this.count += 1; + return name; + } }; pub fn computeInitialReservedNames( |