aboutsummaryrefslogtreecommitdiff
path: root/src/string_immutable.zig
diff options
context:
space:
mode:
Diffstat (limited to 'src/string_immutable.zig')
-rw-r--r--src/string_immutable.zig160
1 files changed, 120 insertions, 40 deletions
diff --git a/src/string_immutable.zig b/src/string_immutable.zig
index f4c6fae07..889add550 100644
--- a/src/string_immutable.zig
+++ b/src/string_immutable.zig
@@ -20,7 +20,7 @@ pub inline fn containsChar(self: string, char: u8) bool {
}
pub inline fn contains(self: string, str: string) bool {
- return std.mem.indexOf(u8, self, str) != null;
+ return indexOf(self, str) != null;
}
pub fn toUTF16Literal(comptime str: []const u8) []const u16 {
@@ -41,11 +41,9 @@ pub fn toUTF16Literal(comptime str: []const u8) []const u16 {
const OptionalUsize = std.meta.Int(.unsigned, @bitSizeOf(usize) - 1);
pub fn indexOfAny(self: string, comptime str: anytype) ?OptionalUsize {
- for (self, 0..) |c, i| {
- inline for (str) |a| {
- if (c == a) {
- return @intCast(OptionalUsize, i);
- }
+ inline for (str) |a| {
+ if (indexOfChar(self, a)) |i| {
+ return @intCast(OptionalUsize, i);
}
}
@@ -148,6 +146,79 @@ pub fn indexOfCharNeg(self: string, char: u8) i32 {
return -1;
}
+/// Format a string to an ECMAScript identifier.
+/// Unlike the string_mutable.zig version, this always allocate/copy
+pub fn fmtIdentifier(name: string) FormatValidIdentifier {
+ return FormatValidIdentifier{ .name = name };
+}
+
+/// Format a string to an ECMAScript identifier.
+/// Different implementation than string_mutable because string_mutable may avoid allocating
+/// This will always allocate
+pub const FormatValidIdentifier = struct {
+ name: string,
+ const js_lexer = @import("./js_lexer.zig");
+ pub fn format(self: FormatValidIdentifier, comptime _: []const u8, _: std.fmt.FormatOptions, writer: anytype) !void {
+ var iterator = strings.CodepointIterator.init(self.name);
+ var cursor = strings.CodepointIterator.Cursor{};
+
+ var has_needed_gap = false;
+ var needs_gap = false;
+ var start_i: usize = 0;
+
+ if (!iterator.next(&cursor)) {
+ try writer.writeAll("_");
+ return;
+ }
+
+ // Common case: no gap necessary. No allocation necessary.
+ needs_gap = !js_lexer.isIdentifierStart(cursor.c);
+ if (!needs_gap) {
+ // Are there any non-alphanumeric chars at all?
+ while (iterator.next(&cursor)) {
+ if (!js_lexer.isIdentifierContinue(cursor.c) or cursor.width > 1) {
+ needs_gap = true;
+ start_i = cursor.i;
+ break;
+ }
+ }
+ }
+
+ if (needs_gap) {
+ needs_gap = false;
+ if (start_i > 0) try writer.writeAll(self.name[0..start_i]);
+ var slice = self.name[start_i..];
+ iterator = strings.CodepointIterator.init(slice);
+ cursor = strings.CodepointIterator.Cursor{};
+
+ while (iterator.next(&cursor)) {
+ if (js_lexer.isIdentifierContinue(cursor.c) and cursor.width == 1) {
+ if (needs_gap) {
+ try writer.writeAll("_");
+ needs_gap = false;
+ has_needed_gap = true;
+ }
+ try writer.writeAll(slice[cursor.i .. cursor.i + @as(u32, cursor.width)]);
+ } else if (!needs_gap) {
+ needs_gap = true;
+ // skip the code point, replace it with a single _
+ }
+ }
+
+ // If it ends with an emoji
+ if (needs_gap) {
+ try writer.writeAll("_");
+ needs_gap = false;
+ has_needed_gap = true;
+ }
+
+ return;
+ }
+
+ try writer.writeAll(self.name);
+ }
+};
+
pub fn indexOfSigned(self: string, str: string) i32 {
const i = std.mem.indexOf(u8, self, str) orelse return -1;
return @intCast(i32, i);
@@ -177,7 +248,9 @@ pub inline fn indexOf(self: string, str: string) ?usize {
const start = bun.C.memmem(self_ptr, self_len, str_ptr, str_len) orelse return null;
- return @ptrToInt(start) - @ptrToInt(self_ptr);
+ const i = @ptrToInt(start) - @ptrToInt(self_ptr);
+ std.debug.assert(i < self_len);
+ return @intCast(usize, i);
}
pub fn split(self: string, delimiter: string) SplitIterator {
@@ -2899,12 +2972,12 @@ pub const max_16_ascii = @splat(ascii_vector_size, @as(u8, 127));
pub const min_16_ascii = @splat(ascii_vector_size, @as(u8, 0x20));
pub const max_u16_ascii = @splat(ascii_u16_vector_size, @as(u16, 127));
pub const min_u16_ascii = @splat(ascii_u16_vector_size, @as(u16, 0x20));
-pub const AsciiVector = std.meta.Vector(ascii_vector_size, u8);
-pub const AsciiVectorSmall = std.meta.Vector(8, u8);
-pub const AsciiVectorU1 = std.meta.Vector(ascii_vector_size, u1);
-pub const AsciiVectorU1Small = std.meta.Vector(8, u1);
-pub const AsciiVectorU16U1 = std.meta.Vector(ascii_u16_vector_size, u1);
-pub const AsciiU16Vector = std.meta.Vector(ascii_u16_vector_size, u16);
+pub const AsciiVector = @Vector(ascii_vector_size, u8);
+pub const AsciiVectorSmall = @Vector(8, u8);
+pub const AsciiVectorU1 = @Vector(ascii_vector_size, u1);
+pub const AsciiVectorU1Small = @Vector(8, u1);
+pub const AsciiVectorU16U1 = @Vector(ascii_u16_vector_size, u1);
+pub const AsciiU16Vector = @Vector(ascii_u16_vector_size, u16);
pub const max_4_ascii = @splat(4, @as(u8, 127));
pub fn isAllASCII(slice: []const u8) bool {
if (bun.FeatureFlags.use_simdutf)
@@ -3200,34 +3273,15 @@ pub fn indexOfCharZ(sliceZ: [:0]const u8, char: u8) ?u63 {
}
pub fn indexOfChar(slice: []const u8, char: u8) ?u32 {
- var remaining = slice;
- if (remaining.len == 0)
+ if (slice.len == 0)
return null;
- if (remaining[0] == char)
- return 0;
-
- if (comptime Environment.enableSIMD) {
- while (remaining.len >= ascii_vector_size) {
- const vec: AsciiVector = remaining[0..ascii_vector_size].*;
- const cmp = vec == @splat(ascii_vector_size, char);
-
- if (@reduce(.Max, @bitCast(AsciiVectorU1, cmp)) > 0) {
- const bitmask = @bitCast(AsciiVectorInt, cmp);
- const first = @ctz(bitmask);
- return @intCast(u32, @as(u32, first) + @intCast(u32, slice.len - remaining.len));
- }
- remaining = remaining[ascii_vector_size..];
- }
- }
-
- for (remaining, 0..) |c, i| {
- if (c == char) {
- return @truncate(u32, i + (slice.len - remaining.len));
- }
- }
+ const ptr = bun.C.memchr(slice.ptr, char, slice.len) orelse return null;
+ const i = @ptrToInt(ptr) - @ptrToInt(slice.ptr);
+ std.debug.assert(i < slice.len);
+ std.debug.assert(slice[i] == char);
- return null;
+ return @truncate(u32, i);
}
test "indexOfChar" {
@@ -3829,25 +3883,51 @@ pub fn join(slices: []const string, delimiter: string, allocator: std.mem.Alloca
return try std.mem.join(allocator, delimiter, slices);
}
+pub fn order(a: []const u8, b: []const u8) std.math.Order {
+ const len = @min(a.len, b.len);
+ const cmp = bun.C.memcmp(a.ptr, b.ptr, len);
+ return switch (std.math.sign(cmp)) {
+ 0 => std.math.order(a.len, b.len),
+ 1 => .gt,
+ -1 => .lt,
+ else => unreachable,
+ };
+}
+
pub fn cmpStringsAsc(_: void, a: string, b: string) bool {
- return std.mem.order(u8, a, b) == .lt;
+ return order(a, b) == .lt;
}
pub fn cmpStringsDesc(_: void, a: string, b: string) bool {
- return std.mem.order(u8, a, b) == .gt;
+ return order(a, b) == .gt;
}
const sort_asc = std.sort.asc(u8);
const sort_desc = std.sort.desc(u8);
pub fn sortAsc(in: []string) void {
+ // TODO: experiment with simd to see if it's faster
std.sort.sort([]const u8, in, {}, cmpStringsAsc);
}
pub fn sortDesc(in: []string) void {
+ // TODO: experiment with simd to see if it's faster
std.sort.sort([]const u8, in, {}, cmpStringsDesc);
}
+pub const StringArrayByIndexSorter = struct {
+ keys: []const []const u8,
+ pub fn lessThan(sorter: *const @This(), a: usize, b: usize) bool {
+ return strings.order(sorter.keys[a], sorter.keys[b]) == .lt;
+ }
+
+ pub fn init(keys: []const []const u8) @This() {
+ return .{
+ .keys = keys,
+ };
+ }
+};
+
pub fn isASCIIHexDigit(c: u8) bool {
return std.ascii.isHex(c);
}