aboutsummaryrefslogtreecommitdiff
path: root/src/string_immutable.zig
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/string_immutable.zig166
1 files changed, 86 insertions, 80 deletions
diff --git a/src/string_immutable.zig b/src/string_immutable.zig
index 564a4f94d..75bfcfd70 100644
--- a/src/string_immutable.zig
+++ b/src/string_immutable.zig
@@ -649,105 +649,111 @@ pub fn utf8ByteSequenceLength(first_byte: u8) u3 {
};
}
-pub const CodepointIterator = struct {
- bytes: []const u8,
- i: usize,
- width: u3 = 0,
- c: CodePoint = 0,
-
- inline fn nextCodepointSlice(it: *CodepointIterator) []const u8 {
- @setRuntimeSafety(false);
+pub fn NewCodePointIterator(comptime CodePointType: type, comptime zeroValue: comptime_int) type {
+ return struct {
+ const Iterator = @This();
+ bytes: []const u8,
+ i: usize,
+ width: u3 = 0,
+ c: CodePointType = 0,
+
+ inline fn nextCodepointSlice(it: *Iterator) []const u8 {
+ @setRuntimeSafety(false);
- const cp_len = utf8ByteSequenceLength(it.bytes[it.i]);
- it.i += cp_len;
+ const cp_len = utf8ByteSequenceLength(it.bytes[it.i]);
+ it.i += cp_len;
- return if (!(it.i > it.bytes.len)) it.bytes[it.i - cp_len .. it.i] else "";
- }
+ return if (!(it.i > it.bytes.len)) it.bytes[it.i - cp_len .. it.i] else "";
+ }
- pub fn needsUTF8Decoding(slice: string) bool {
- var it = CodepointIterator{ .bytes = slice, .i = 0 };
+ pub fn needsUTF8Decoding(slice: string) bool {
+ var it = Iterator{ .bytes = slice, .i = 0 };
- while (true) {
- const part = it.nextCodepointSlice();
- it.width = @intCast(u3, part.len);
- @setRuntimeSafety(false);
- switch (it.width) {
- 0 => return false,
- 1 => continue,
- else => return true,
+ while (true) {
+ const part = it.nextCodepointSlice();
+ it.width = @intCast(u3, part.len);
+ @setRuntimeSafety(false);
+ switch (it.width) {
+ 0 => return false,
+ 1 => continue,
+ else => return true,
+ }
}
}
- }
- pub fn scanUntilQuotedValueOrEOF(iter: *CodepointIterator, comptime quote: CodePoint) usize {
- @setRuntimeSafety(false);
+ pub fn scanUntilQuotedValueOrEOF(iter: *Iterator, comptime quote: CodePointType) usize {
+ @setRuntimeSafety(false);
- while (iter.c > -1) {
- if (!switch (iter.nextCodepoint()) {
- quote => false,
- '\\' => brk: {
- if (iter.nextCodepoint() == quote) {
- continue;
- }
- break :brk true;
- },
- else => true,
- }) {
- return iter.i + 1;
+ while (iter.c > -1) {
+ if (!switch (iter.nextCodepoint()) {
+ quote => false,
+ '\\' => brk: {
+ if (iter.nextCodepoint() == quote) {
+ continue;
+ }
+ break :brk true;
+ },
+ else => true,
+ }) {
+ return iter.i + 1;
+ }
}
+
+ return iter.i;
}
- return iter.i;
- }
+ pub fn nextCodepoint(it: *Iterator) CodePointType {
+ const slice = it.nextCodepointSlice();
+ it.width = @intCast(u3, slice.len);
+ @setRuntimeSafety(false);
- pub fn nextCodepoint(it: *CodepointIterator) CodePoint {
- const slice = it.nextCodepointSlice();
- it.width = @intCast(u3, slice.len);
- @setRuntimeSafety(false);
+ it.c = switch (it.width) {
+ 0 => zeroValue,
+ 1 => @intCast(CodePointType, slice[0]),
+ 2 => @intCast(CodePointType, std.unicode.utf8Decode2(slice) catch unreachable),
+ 3 => @intCast(CodePointType, std.unicode.utf8Decode3(slice) catch unreachable),
+ 4 => @intCast(CodePointType, std.unicode.utf8Decode4(slice) catch unreachable),
+ else => unreachable,
+ };
- it.c = switch (it.width) {
- 0 => -1,
- 1 => @intCast(CodePoint, slice[0]),
- 2 => @intCast(CodePoint, std.unicode.utf8Decode2(slice) catch unreachable),
- 3 => @intCast(CodePoint, std.unicode.utf8Decode3(slice) catch unreachable),
- 4 => @intCast(CodePoint, std.unicode.utf8Decode4(slice) catch unreachable),
- else => unreachable,
- };
+ return it.c;
+ }
- return it.c;
- }
+ pub fn nextCodepointNoReturn(it: *Iterator) void {
+ const slice = it.nextCodepointSlice();
+ it.width = @intCast(u3, slice.len);
+ @setRuntimeSafety(false);
- pub fn nextCodepointNoReturn(it: *CodepointIterator) void {
- const slice = it.nextCodepointSlice();
- it.width = @intCast(u3, slice.len);
- @setRuntimeSafety(false);
+ it.c = switch (it.width) {
+ 0 => zeroValue,
+ 1 => @intCast(CodePointType, slice[0]),
+ 2 => @intCast(CodePointType, std.unicode.utf8Decode2(slice) catch unreachable),
+ 3 => @intCast(CodePointType, std.unicode.utf8Decode3(slice) catch unreachable),
+ 4 => @intCast(CodePointType, std.unicode.utf8Decode4(slice) catch unreachable),
+ else => unreachable,
+ };
+ }
- it.c = switch (it.width) {
- 0 => -1,
- 1 => @intCast(CodePoint, slice[0]),
- 2 => @intCast(CodePoint, std.unicode.utf8Decode2(slice) catch unreachable),
- 3 => @intCast(CodePoint, std.unicode.utf8Decode3(slice) catch unreachable),
- 4 => @intCast(CodePoint, std.unicode.utf8Decode4(slice) catch unreachable),
- else => unreachable,
- };
- }
+ /// Look ahead at the next n codepoints without advancing the iterator.
+ /// If fewer than n codepoints are available, then return the remainder of the string.
+ pub fn peek(it: *Iterator, n: usize) []const u8 {
+ const original_i = it.i;
+ defer it.i = original_i;
+
+ var end_ix = original_i;
+ var found: usize = 0;
+ while (found < n) : (found += 1) {
+ const next_codepoint = it.nextCodepointSlice() orelse return it.bytes[original_i..];
+ end_ix += next_codepoint.len;
+ }
- /// Look ahead at the next n codepoints without advancing the iterator.
- /// If fewer than n codepoints are available, then return the remainder of the string.
- pub fn peek(it: *CodepointIterator, n: usize) []const u8 {
- const original_i = it.i;
- defer it.i = original_i;
-
- var end_ix = original_i;
- var found: usize = 0;
- while (found < n) : (found += 1) {
- const next_codepoint = it.nextCodepointSlice() orelse return it.bytes[original_i..];
- end_ix += next_codepoint.len;
+ return it.bytes[original_i..end_ix];
}
+ };
+}
- return it.bytes[original_i..end_ix];
- }
-};
+pub const CodepointIterator = NewCodePointIterator(CodePoint, -1);
+pub const UnsignedCodepointIterator = NewCodePointIterator(u32, 0);
test "join" {
var string_list = &[_]string{ "abc", "def", "123", "hello" };