diff options
Diffstat (limited to '')
-rw-r--r-- | src/comptime_string_map.zig | 386 |
1 files changed, 386 insertions, 0 deletions
diff --git a/src/comptime_string_map.zig b/src/comptime_string_map.zig new file mode 100644 index 000000000..f6ce6db8c --- /dev/null +++ b/src/comptime_string_map.zig @@ -0,0 +1,386 @@ +const std = @import("std"); +const mem = std.mem; +const strings = @import("./string_immutable.zig"); + +/// Comptime string map optimized for small sets of disparate string keys. +/// Works by separating the keys by length at comptime and only checking strings of +/// equal length at runtime. +/// +/// `kvs` expects a list literal containing list literals or an array/slice of structs +/// where `.@"0"` is the `[]const u8` key and `.@"1"` is the associated value of type `V`. +/// TODO: https://github.com/ziglang/zig/issues/4335 +pub fn ComptimeStringMapWithKeyType(comptime KeyType: type, comptime V: type, comptime kvs_list: anytype) type { + const KV = struct { + key: []const KeyType, + value: V, + }; + + const precomputed = comptime blk: { + @setEvalBranchQuota(2000); + + var sorted_kvs: [kvs_list.len]KV = undefined; + const lenAsc = (struct { + fn lenAsc(context: void, a: KV, b: KV) bool { + _ = context; + if (a.key.len != b.key.len) { + return a.key.len < b.key.len; + } + // https://stackoverflow.com/questions/11227809/why-is-processing-a-sorted-array-faster-than-processing-an-unsorted-array + @setEvalBranchQuota(4000); + return std.mem.order(KeyType, a.key, b.key) == .lt; + } + }).lenAsc; + if (KeyType == u8) { + for (kvs_list) |kv, i| { + if (V != void) { + sorted_kvs[i] = .{ .key = kv.@"0", .value = kv.@"1" }; + } else { + sorted_kvs[i] = .{ .key = kv.@"0", .value = {} }; + } + } + } else { + @compileError("Not implemented for this key type"); + } + std.sort.sort(KV, &sorted_kvs, {}, lenAsc); + const min_len = sorted_kvs[0].key.len; + const max_len = sorted_kvs[sorted_kvs.len - 1].key.len; + var len_indexes: [max_len + 1]usize = undefined; + var len: usize = 0; + var i: usize = 0; + while (len <= max_len) : (len += 1) { + // find the first keyword len == len + while (len > sorted_kvs[i].key.len) { + i += 1; + } + len_indexes[len] = i; + } + break :blk .{ + .min_len = min_len, + .max_len = max_len, + .sorted_kvs = sorted_kvs, + .len_indexes = len_indexes, + }; + }; + + return struct { + const len_indexes = precomputed.len_indexes; + pub const kvs = precomputed.sorted_kvs; + + pub fn has(str: []const KeyType) bool { + return get(str) != null; + } + + pub fn getWithLength(str: []const KeyType, comptime len: usize) ?V { + const end = comptime brk: { + var i = len_indexes[len]; + while (i < kvs.len and kvs[i].key.len == len) : (i += 1) {} + break :brk i; + }; + + comptime var i = len_indexes[len]; + + inline while (i < end) : (i += 1) { + if (strings.eqlComptimeCheckLenWithType(KeyType, str, kvs[i].key, false)) { + return kvs[i].value; + } + } + + return null; + } + + pub fn get(str: []const KeyType) ?V { + if (str.len < precomputed.min_len or str.len > precomputed.max_len) + return null; + + comptime var i: usize = precomputed.min_len; + inline while (i <= precomputed.max_len) : (i += 1) { + if (str.len == i) { + return getWithLength(str, i); + } + } + + return null; + } + }; +} + +pub fn ComptimeStringMap(comptime V: type, comptime kvs_list: anytype) type { + return ComptimeStringMapWithKeyType(u8, V, kvs_list); +} + +pub fn ComptimeStringMap16(comptime V: type, comptime kvs_list: anytype) type { + return ComptimeStringMapWithKeyType(u16, V, kvs_list); +} + +const TestEnum = enum { + A, + B, + C, + D, + E, +}; + +test "ComptimeStringMap list literal of list literals" { + const map = ComptimeStringMap(TestEnum, .{ + .{ "these", .D }, + .{ "have", .A }, + .{ "nothing", .B }, + .{ "incommon", .C }, + .{ "samelen", .E }, + }); + + try testMap(map); +} + +test "ComptimeStringMap array of structs" { + const KV = struct { + @"0": []const u8, + @"1": TestEnum, + }; + const map = ComptimeStringMap(TestEnum, [_]KV{ + .{ .@"0" = "these", .@"1" = .D }, + .{ .@"0" = "have", .@"1" = .A }, + .{ .@"0" = "nothing", .@"1" = .B }, + .{ .@"0" = "incommon", .@"1" = .C }, + .{ .@"0" = "samelen", .@"1" = .E }, + }); + + try testMap(map); +} + +test "ComptimeStringMap slice of structs" { + const KV = struct { + @"0": []const u8, + @"1": TestEnum, + }; + const slice: []const KV = &[_]KV{ + .{ .@"0" = "these", .@"1" = .D }, + .{ .@"0" = "have", .@"1" = .A }, + .{ .@"0" = "nothing", .@"1" = .B }, + .{ .@"0" = "incommon", .@"1" = .C }, + .{ .@"0" = "samelen", .@"1" = .E }, + }; + const map = ComptimeStringMap(TestEnum, slice); + + try testMap(map); +} + +fn testMap(comptime map: anytype) !void { + try std.testing.expectEqual(TestEnum.A, map.get("have").?); + try std.testing.expectEqual(TestEnum.B, map.get("nothing").?); + try std.testing.expect(null == map.get("missing")); + try std.testing.expectEqual(TestEnum.D, map.get("these").?); + try std.testing.expectEqual(TestEnum.E, map.get("samelen").?); + + try std.testing.expect(!map.has("missing")); + try std.testing.expect(map.has("these")); +} + +test "ComptimeStringMap void value type, slice of structs" { + const KV = struct { + @"0": []const u8, + }; + const slice: []const KV = &[_]KV{ + .{ .@"0" = "these" }, + .{ .@"0" = "have" }, + .{ .@"0" = "nothing" }, + .{ .@"0" = "incommon" }, + .{ .@"0" = "samelen" }, + }; + const map = ComptimeStringMap(void, slice); + + try testSet(map); +} + +test "ComptimeStringMap void value type, list literal of list literals" { + const map = ComptimeStringMap(void, .{ + .{"these"}, + .{"have"}, + .{"nothing"}, + .{"incommon"}, + .{"samelen"}, + }); + + try testSet(map); +} + +fn testSet(comptime map: anytype) !void { + try std.testing.expectEqual({}, map.get("have").?); + try std.testing.expectEqual({}, map.get("nothing").?); + try std.testing.expect(null == map.get("missing")); + try std.testing.expectEqual({}, map.get("these").?); + try std.testing.expectEqual({}, map.get("samelen").?); + + try std.testing.expect(!map.has("missing")); + try std.testing.expect(map.has("these")); +} + +const TestEnum2 = enum { + A, + B, + C, + D, + E, + F, + G, + H, + I, + J, + K, + L, + M, + N, + O, + P, + Q, + R, + S, + T, + U, + V, + W, + X, + Y, + FZ, + FA, + FB, + FC, + FD, + FE, + FF, + FG, + FH, + FI, + FJ, + FK, + FL, + + pub const map = ComptimeStringMap(TestEnum2, .{ + .{ "these", .A }, + .{ "have", .B }, + .{ "nothing", .C }, + .{ "nothinz", .D }, + .{ "nothinc", .E }, + .{ "nothina", .F }, + .{ "nothinb", .G }, + .{ "nothiaa", .H }, + .{ "nothaaa", .I }, + .{ "notaaaa", .J }, + .{ "noaaaaa", .K }, + .{ "naaaaaa", .L }, + .{ "incommon", .M }, + .{ "ancommon", .N }, + .{ "ab1ommon", .O }, + .{ "ab2ommon", .P }, + .{ "ab3ommon", .Q }, + .{ "ab4ommon", .R }, + .{ "ab5ommon", .S }, + .{ "ab6ommon", .T }, + .{ "ab7ommon", .U }, + .{ "ab8ommon", .V }, + .{ "ab9ommon", .W }, + .{ "abAommon", .X }, + .{ "abBommon", .Y }, + .{ "abCommon", .FZ }, + .{ "abZommon", .FA }, + .{ "abEommon", .FB }, + .{ "abFommon", .FC }, + .{ "ab10omon", .FD }, + .{ "ab11omon", .FE }, + .{ "ab12omon", .FF }, + .{ "ab13omon", .FG }, + .{ "ab14omon", .FH }, + .{ "ab15omon", .FI }, + .{ "ab16omon", .FJ }, + .{ "ab16omon1", .FH }, + .{ "samelen", .FK }, + .{ "0", .FL }, + .{ "00", .FL }, + }); + + pub const official = std.ComptimeStringMap(TestEnum2, .{ + .{ "these", .A }, + .{ "have", .B }, + .{ "naaaaaa", .L }, + .{ "noaaaaa", .K }, + .{ "notaaaa", .J }, + .{ "nothaaa", .I }, + .{ "nothiaa", .H }, + .{ "nothina", .F }, + .{ "nothinb", .G }, + .{ "nothinc", .E }, + .{ "nothing", .C }, + .{ "nothinz", .D }, + .{ "incommon", .M }, + .{ "ancommon", .N }, + .{ "ab1ommon", .O }, + .{ "ab2ommon", .P }, + .{ "ab3ommon", .Q }, + .{ "ab4ommon", .R }, + .{ "ab5ommon", .S }, + .{ "ab6ommon", .T }, + .{ "ab7ommon", .U }, + .{ "ab8ommon", .V }, + .{ "ab9ommon", .W }, + .{ "abAommon", .X }, + .{ "abBommon", .Y }, + .{ "abCommon", .FZ }, + .{ "abZommon", .FA }, + .{ "abEommon", .FB }, + .{ "abFommon", .FC }, + .{ "ab10omon", .FD }, + .{ "ab11omon", .FE }, + .{ "ab12omon", .FF }, + .{ "ab13omon", .FG }, + .{ "ab14omon", .FH }, + .{ "ab15omon", .FI }, + .{ "ab16omon", .FJ }, + .{ "samelen", .FK }, + .{ "ab16omon1", .FH }, + .{ "0", .FL }, + .{ "00", .FL }, + }); +}; + +pub fn compareString(input: []const u8) !void { + var str = try std.heap.page_allocator.dupe(u8, input); + if (TestEnum2.map.has(str) != TestEnum2.official.has(str)) { + std.debug.panic("{s} - TestEnum2.map.has(str) ({d}) != TestEnum2.official.has(str) ({d})", .{ + str, + @boolToInt(TestEnum2.map.has(str)), + @boolToInt(TestEnum2.official.has(str)), + }); + } + + std.debug.print("For string: \"{s}\" (has a match? {d})\n", .{ str, @boolToInt(TestEnum2.map.has(str)) }); + + var i: usize = 0; + var is_eql = false; + var timer = try std.time.Timer.start(); + + while (i < 99999999) : (i += 1) { + is_eql = @call(.{ .modifier = .never_inline }, TestEnum2.map.has, .{str}); + } + const new = timer.lap(); + + std.debug.print("- new {}\n", .{std.fmt.fmtDuration(new)}); + + i = 0; + while (i < 99999999) : (i += 1) { + is_eql = @call(.{ .modifier = .never_inline }, TestEnum2.official.has, .{str}); + } + + const _std = timer.lap(); + + std.debug.print("- std {}\n\n", .{std.fmt.fmtDuration(_std)}); +} + +pub fn main() anyerror!void { + try compareString("naaaaaa"); + try compareString("nothinz"); + try compareString("these"); + try compareString("incommon"); + try compareString("noMatch"); + try compareString("0"); + try compareString("00"); +} |