1 files changed, 120 insertions, 40 deletions
diff --git a/src/string_immutable.zig b/src/string_immutable.zig
index f4c6fae07..889add550 100644
--- a/src/string_immutable.zig
+++ b/src/string_immutable.zig
@@ -20,7 +20,7 @@ pub inline fn containsChar(self: string, char: u8) bool {
 }
 
 pub inline fn contains(self: string, str: string) bool {
-    return std.mem.indexOf(u8, self, str) != null;
+    return indexOf(self, str) != null;
 }
 
 pub fn toUTF16Literal(comptime str: []const u8) []const u16 {
@@ -41,11 +41,9 @@ pub fn toUTF16Literal(comptime str: []const u8) []const u16 {
 
 const OptionalUsize = std.meta.Int(.unsigned, @bitSizeOf(usize) - 1);
 pub fn indexOfAny(self: string, comptime str: anytype) ?OptionalUsize {
-    for (self, 0..) |c, i| {
-        inline for (str) |a| {
-            if (c == a) {
-                return @intCast(OptionalUsize, i);
-            }
+    inline for (str) |a| {
+        if (indexOfChar(self, a)) |i| {
+            return @intCast(OptionalUsize, i);
         }
     }
 
@@ -148,6 +146,79 @@ pub fn indexOfCharNeg(self: string, char: u8) i32 {
     return -1;
 }
 
+/// Format a string to an ECMAScript identifier.
+/// Unlike the string_mutable.zig version, this always allocate/copy
+pub fn fmtIdentifier(name: string) FormatValidIdentifier {
+    return FormatValidIdentifier{ .name = name };
+}
+
+/// Format a string to an ECMAScript identifier.
+/// Different implementation than string_mutable because string_mutable may avoid allocating
+/// This will always allocate
+pub const FormatValidIdentifier = struct {
+    name: string,
+    const js_lexer = @import("./js_lexer.zig");
+    pub fn format(self: FormatValidIdentifier, comptime _: []const u8, _: std.fmt.FormatOptions, writer: anytype) !void {
+        var iterator = strings.CodepointIterator.init(self.name);
+        var cursor = strings.CodepointIterator.Cursor{};
+
+        var has_needed_gap = false;
+        var needs_gap = false;
+        var start_i: usize = 0;
+
+        if (!iterator.next(&cursor)) {
+            try writer.writeAll("_");
+            return;
+        }
+
+        // Common case: no gap necessary. No allocation necessary.
+        needs_gap = !js_lexer.isIdentifierStart(cursor.c);
+        if (!needs_gap) {
+            // Are there any non-alphanumeric chars at all?
+            while (iterator.next(&cursor)) {
+                if (!js_lexer.isIdentifierContinue(cursor.c) or cursor.width > 1) {
+                    needs_gap = true;
+                    start_i = cursor.i;
+                    break;
+                }
+            }
+        }
+
+        if (needs_gap) {
+            needs_gap = false;
+            if (start_i > 0) try writer.writeAll(self.name[0..start_i]);
+            var slice = self.name[start_i..];
+            iterator = strings.CodepointIterator.init(slice);
+            cursor = strings.CodepointIterator.Cursor{};
+
+            while (iterator.next(&cursor)) {
+                if (js_lexer.isIdentifierContinue(cursor.c) and cursor.width == 1) {
+                    if (needs_gap) {
+                        try writer.writeAll("_");
+                        needs_gap = false;
+                        has_needed_gap = true;
+                    }
+                    try writer.writeAll(slice[cursor.i .. cursor.i + @as(u32, cursor.width)]);
+                } else if (!needs_gap) {
+                    needs_gap = true;
+                    // skip the code point, replace it with a single _
+                }
+            }
+
+            // If it ends with an emoji
+            if (needs_gap) {
+                try writer.writeAll("_");
+                needs_gap = false;
+                has_needed_gap = true;
+            }
+
+            return;
+        }
+
+        try writer.writeAll(self.name);
+    }
+};
+
 pub fn indexOfSigned(self: string, str: string) i32 {
     const i = std.mem.indexOf(u8, self, str) orelse return -1;
     return @intCast(i32, i);
@@ -177,7 +248,9 @@ pub inline fn indexOf(self: string, str: string) ?usize {
 
     const start = bun.C.memmem(self_ptr, self_len, str_ptr, str_len) orelse return null;
 
-    return @ptrToInt(start) - @ptrToInt(self_ptr);
+    const i = @ptrToInt(start) - @ptrToInt(self_ptr);
+    std.debug.assert(i < self_len);
+    return @intCast(usize, i);
 }
 
 pub fn split(self: string, delimiter: string) SplitIterator {
@@ -2899,12 +2972,12 @@ pub const max_16_ascii = @splat(ascii_vector_size, @as(u8, 127));
 pub const min_16_ascii = @splat(ascii_vector_size, @as(u8, 0x20));
 pub const max_u16_ascii = @splat(ascii_u16_vector_size, @as(u16, 127));
 pub const min_u16_ascii = @splat(ascii_u16_vector_size, @as(u16, 0x20));
-pub const AsciiVector = std.meta.Vector(ascii_vector_size, u8);
-pub const AsciiVectorSmall = std.meta.Vector(8, u8);
-pub const AsciiVectorU1 = std.meta.Vector(ascii_vector_size, u1);
-pub const AsciiVectorU1Small = std.meta.Vector(8, u1);
-pub const AsciiVectorU16U1 = std.meta.Vector(ascii_u16_vector_size, u1);
-pub const AsciiU16Vector = std.meta.Vector(ascii_u16_vector_size, u16);
+pub const AsciiVector = @Vector(ascii_vector_size, u8);
+pub const AsciiVectorSmall = @Vector(8, u8);
+pub const AsciiVectorU1 = @Vector(ascii_vector_size, u1);
+pub const AsciiVectorU1Small = @Vector(8, u1);
+pub const AsciiVectorU16U1 = @Vector(ascii_u16_vector_size, u1);
+pub const AsciiU16Vector = @Vector(ascii_u16_vector_size, u16);
 pub const max_4_ascii = @splat(4, @as(u8, 127));
 pub fn isAllASCII(slice: []const u8) bool {
     if (bun.FeatureFlags.use_simdutf)
@@ -3200,34 +3273,15 @@ pub fn indexOfCharZ(sliceZ: [:0]const u8, char: u8) ?u63 {
 }
 
 pub fn indexOfChar(slice: []const u8, char: u8) ?u32 {
-    var remaining = slice;
-    if (remaining.len == 0)
+    if (slice.len == 0)
         return null;
 
-    if (remaining[0] == char)
-        return 0;
-
-    if (comptime Environment.enableSIMD) {
-        while (remaining.len >= ascii_vector_size) {
-            const vec: AsciiVector = remaining[0..ascii_vector_size].*;
-            const cmp = vec == @splat(ascii_vector_size, char);
-
-            if (@reduce(.Max, @bitCast(AsciiVectorU1, cmp)) > 0) {
-                const bitmask = @bitCast(AsciiVectorInt, cmp);
-                const first = @ctz(bitmask);
-                return @intCast(u32, @as(u32, first) + @intCast(u32, slice.len - remaining.len));
-            }
-            remaining = remaining[ascii_vector_size..];
-        }
-    }
-
-    for (remaining, 0..) |c, i| {
-        if (c == char) {
-            return @truncate(u32, i + (slice.len - remaining.len));
-        }
-    }
+    const ptr = bun.C.memchr(slice.ptr, char, slice.len) orelse return null;
+    const i = @ptrToInt(ptr) - @ptrToInt(slice.ptr);
+    std.debug.assert(i < slice.len);
+    std.debug.assert(slice[i] == char);
 
-    return null;
+    return @truncate(u32, i);
 }
 
 test "indexOfChar" {
@@ -3829,25 +3883,51 @@ pub fn join(slices: []const string, delimiter: string, allocator: std.mem.Alloca
     return try std.mem.join(allocator, delimiter, slices);
 }
 
+pub fn order(a: []const u8, b: []const u8) std.math.Order {
+    const len = @min(a.len, b.len);
+    const cmp = bun.C.memcmp(a.ptr, b.ptr, len);
+    return switch (std.math.sign(cmp)) {
+        0 => std.math.order(a.len, b.len),
+        1 => .gt,
+        -1 => .lt,
+        else => unreachable,
+    };
+}
+
 pub fn cmpStringsAsc(_: void, a: string, b: string) bool {
-    return std.mem.order(u8, a, b) == .lt;
+    return order(a, b) == .lt;
 }
 
 pub fn cmpStringsDesc(_: void, a: string, b: string) bool {
-    return std.mem.order(u8, a, b) == .gt;
+    return order(a, b) == .gt;
 }
 
 const sort_asc = std.sort.asc(u8);
 const sort_desc = std.sort.desc(u8);
 
 pub fn sortAsc(in: []string) void {
+    // TODO: experiment with simd to see if it's faster
     std.sort.sort([]const u8, in, {}, cmpStringsAsc);
 }
 
 pub fn sortDesc(in: []string) void {
+    // TODO: experiment with simd to see if it's faster
     std.sort.sort([]const u8, in, {}, cmpStringsDesc);
 }
 
+pub const StringArrayByIndexSorter = struct {
+    keys: []const []const u8,
+    pub fn lessThan(sorter: *const @This(), a: usize, b: usize) bool {
+        return strings.order(sorter.keys[a], sorter.keys[b]) == .lt;
+    }
+
+    pub fn init(keys: []const []const u8) @This() {
+        return .{
+            .keys = keys,
+        };
+    }
+};
+
 pub fn isASCIIHexDigit(c: u8) bool {
     return std.ascii.isHex(c);
 }