1 files changed, 277 insertions, 235 deletions
diff --git a/src/string_immutable.zig b/src/string_immutable.zig
index 9e4cf3b1c..206642ae7 100644
--- a/src/string_immutable.zig
+++ b/src/string_immutable.zig
@@ -1019,80 +1019,91 @@ pub fn allocateLatin1IntoUTF8WithList(list_: std.ArrayList(u8), offset_into_list
     var list = list_;
     while (latin1.len > 0) {
         try list.ensureUnusedCapacity(latin1.len);
-        // assert our starting capcaicty is at least latin1
         var buf = list.items.ptr[i..list.capacity];
 
         inner: {
-            while (latin1.len >= ascii_vector_size) {
-                const vec: AsciiVector = latin1[0..ascii_vector_size].*;
-
-                if (@reduce(.Max, vec) > 127) {
-                    const Int = u64;
-                    const size = @sizeOf(Int);
-
-                    // zig or LLVM doesn't do @ctz nicely with SIMD
-                    if (comptime ascii_vector_size >= 8) {
-                        {
-                            const bytes = @bitCast(Int, latin1[0..size].*);
-                            // https://dotat.at/@/2022-06-27-tolower-swar.html
-                            const mask = bytes & 0x8080808080808080;
-
-                            if (mask > 0) {
-                                const first_set_byte = @ctz(Int, mask) / 8;
-                                if (comptime Environment.allow_assert) {
-                                    assert(latin1[first_set_byte] >= 127);
-                                    var j: usize = 0;
-                                    while (j < first_set_byte) : (j += 1) {
-                                        assert(latin1[j] < 127);
+            if (latin1.len >= ascii_vector_size) {
+                const start_ptr = @ptrToInt(buf.ptr);
+                const start_ptr_latin1 = @ptrToInt(latin1.ptr);
+                const end_ptr = @ptrToInt(latin1.ptr + latin1.len - (latin1.len % ascii_vector_size));
+
+                while (@ptrToInt(latin1.ptr) < end_ptr) {
+                    const vec: AsciiVector = latin1[0..ascii_vector_size].*;
+
+                    if (@reduce(.Max, vec) > 127) {
+                        const Int = u64;
+                        const size = @sizeOf(Int);
+                        buf.len -= @ptrToInt(buf.ptr) - start_ptr;
+                        latin1.len -= @ptrToInt(latin1.ptr) - start_ptr_latin1;
+
+                        // zig or LLVM doesn't do @ctz nicely with SIMD
+                        if (comptime ascii_vector_size >= 8) {
+                            {
+                                const bytes = @bitCast(Int, latin1[0..size].*);
+                                // https://dotat.at/@/2022-06-27-tolower-swar.html
+                                const mask = bytes & 0x8080808080808080;
+
+                                if (mask > 0) {
+                                    const first_set_byte = @ctz(Int, mask) / 8;
+                                    if (comptime Environment.allow_assert) {
+                                        assert(latin1[first_set_byte] >= 127);
+                                        var j: usize = 0;
+                                        while (j < first_set_byte) : (j += 1) {
+                                            assert(latin1[j] < 127);
+                                        }
                                     }
+
+                                    buf[0..size].* = @bitCast([size]u8, bytes);
+                                    buf = buf[first_set_byte..];
+                                    latin1 = latin1[first_set_byte..];
+                                    break :inner;
                                 }
 
                                 buf[0..size].* = @bitCast([size]u8, bytes);
-                                buf = buf[first_set_byte..];
-                                latin1 = latin1[first_set_byte..];
-                                break :inner;
+                                latin1 = latin1[size..];
+                                buf = buf[size..];
                             }
 
-                            buf[0..size].* = @bitCast([size]u8, bytes);
-                            latin1 = latin1[size..];
-                            buf = buf[size..];
-                        }
-
-                        if (comptime ascii_vector_size >= 16) {
-                            const bytes = @bitCast(Int, latin1[0..size].*);
-                            // https://dotat.at/@/2022-06-27-tolower-swar.html
-                            const mask = bytes & 0x8080808080808080;
-
-                            if (mask > 0) {
-                                const first_set_byte = @ctz(Int, mask) / 8;
-                                if (comptime Environment.allow_assert) {
-                                    assert(latin1[first_set_byte] >= 127);
-                                    var j: usize = 0;
-                                    while (j < first_set_byte) : (j += 1) {
-                                        assert(latin1[j] < 127);
+                            if (comptime ascii_vector_size >= 16) {
+                                const bytes = @bitCast(Int, latin1[0..size].*);
+                                // https://dotat.at/@/2022-06-27-tolower-swar.html
+                                const mask = bytes & 0x8080808080808080;
+
+                                if (mask > 0) {
+                                    const first_set_byte = @ctz(Int, mask) / 8;
+                                    if (comptime Environment.allow_assert) {
+                                        assert(latin1[first_set_byte] >= 127);
+                                        var j: usize = 0;
+                                        while (j < first_set_byte) : (j += 1) {
+                                            assert(latin1[j] < 127);
+                                        }
                                     }
-                                }
 
-                                buf[0..size].* = @bitCast([size]u8, bytes);
-                                buf = buf[first_set_byte..];
-                                latin1 = latin1[first_set_byte..];
-                                break :inner;
+                                    buf[0..size].* = @bitCast([size]u8, bytes);
+                                    buf = buf[first_set_byte..];
+                                    latin1 = latin1[first_set_byte..];
+                                    break :inner;
+                                }
                             }
                         }
-
                         unreachable;
                     }
-                }
 
-                buf[0..ascii_vector_size].* = @bitCast([ascii_vector_size]u8, vec)[0..ascii_vector_size].*;
-                latin1 = latin1[ascii_vector_size..];
-                buf = buf[ascii_vector_size..];
+                    buf[0..ascii_vector_size].* = @bitCast([ascii_vector_size]u8, vec)[0..ascii_vector_size].*;
+                    latin1.ptr += ascii_vector_size;
+                    buf.ptr += ascii_vector_size;
+                }
+                buf.len -= @ptrToInt(buf.ptr) - start_ptr;
+                latin1.len -= @ptrToInt(latin1.ptr) - start_ptr_latin1;
             }
 
             {
                 const Int = u64;
                 const size = @sizeOf(Int);
-                while (latin1.len >= size) {
+                const latin1_end_ptr = latin1.ptr + latin1.len - (latin1.len % size);
+                const start_ptr = @ptrToInt(buf.ptr);
+                const start_ptr_latin1 = @ptrToInt(latin1.ptr);
+                while (latin1.ptr != latin1_end_ptr) {
                     const bytes = @bitCast(Int, latin1[0..size].*);
                     // https://dotat.at/@/2022-06-27-tolower-swar.html
                     const mask = bytes & 0x8080808080808080;
@@ -1108,67 +1119,51 @@ pub fn allocateLatin1IntoUTF8WithList(list_: std.ArrayList(u8), offset_into_list
                         }
 
                         buf[0..size].* = @bitCast([size]u8, bytes);
-                        buf = buf[first_set_byte..];
-                        latin1 = latin1[first_set_byte..];
+                        buf.ptr += first_set_byte;
+                        latin1.ptr += first_set_byte;
+                        buf.len -= @ptrToInt(buf.ptr) - start_ptr;
+                        latin1.len -= @ptrToInt(latin1.ptr) - start_ptr_latin1;
                         break :inner;
                     }
 
                     buf[0..size].* = @bitCast([size]u8, bytes);
-                    latin1 = latin1[size..];
-                    buf = buf[size..];
+                    latin1.ptr += size;
+                    buf.ptr += size;
                 }
+                buf.len -= @ptrToInt(buf.ptr) - start_ptr;
+                latin1.len -= @ptrToInt(latin1.ptr) - start_ptr_latin1;
             }
 
             {
-                const Int = u32;
-                const size = @sizeOf(Int);
-                while (latin1.len >= size) {
-                    const bytes = @bitCast(Int, latin1[0..size].*);
-                    // https://dotat.at/@/2022-06-27-tolower-swar.html
-                    const mask = bytes & 0x80808080;
-
-                    if (mask > 0) {
-                        const first_set_byte = @ctz(Int, mask) / 8;
-                        if (comptime Environment.allow_assert) {
-                            assert(latin1[first_set_byte] >= 127);
-                            var j: usize = 0;
-                            while (j < first_set_byte) : (j += 1) {
-                                assert(latin1[j] < 127);
-                            }
-                        }
-
-                        buf[0..size].* = @bitCast([size]u8, bytes);
-                        buf = buf[first_set_byte..];
-                        latin1 = latin1[first_set_byte..];
-                        break :inner;
-                    }
-
-                    buf[0..size].* = @bitCast([size]u8, bytes);
-                    latin1 = latin1[size..];
-                    buf = buf[size..];
+                assert(latin1.len < 8);
+                const end = latin1.ptr + latin1.len;
+                const start_ptr = @ptrToInt(buf.ptr);
+                const start_ptr_latin1 = @ptrToInt(latin1.ptr);
+
+                while (latin1.ptr != end and latin1.ptr[0] <= 127) {
+                    buf.ptr[0] = latin1.ptr[0];
+                    buf.ptr += 1;
+                    latin1.ptr += 1;
                 }
-            }
 
-            while (latin1.len >= 1 and latin1[0] < 127) {
-                buf[0] = latin1[0];
-                latin1 = latin1[1..];
-                buf = buf[1..];
+                buf.len -= @ptrToInt(buf.ptr) - start_ptr;
+                latin1.len -= @ptrToInt(latin1.ptr) - start_ptr_latin1;
             }
         }
 
-        i = @ptrToInt(buf.ptr) - @ptrToInt(list.items.ptr);
-        list.items.len = i;
+        while (latin1.len > 0 and latin1[0] > 127) {
+            i = @ptrToInt(buf.ptr) - @ptrToInt(list.items.ptr);
+            list.items.len = i;
 
-        while (latin1.len > 0 and latin1[0] >= 127) {
             try list.ensureUnusedCapacity(2 + latin1.len);
             buf = list.items.ptr[i..list.capacity];
             buf[0..2].* = latin1ToCodepointBytesAssumeNotASCII(latin1[0]);
             latin1 = latin1[1..];
             buf = buf[2..];
-
-            i = @ptrToInt(buf.ptr) - @ptrToInt(list.items.ptr);
-            list.items.len = i;
         }
+
+        i = @ptrToInt(buf.ptr) - @ptrToInt(list.items.ptr);
+        list.items.len = i;
     }
 
     return list;
@@ -1296,6 +1291,60 @@ pub fn copyLatin1IntoUTF8StopOnNonASCII(buf_: []u8, comptime Type: type, latin1_
 
                 if (@reduce(.Max, vec) > 127) {
                     if (comptime stop) return .{ .written = std.math.maxInt(u32), .read = std.math.maxInt(u32) };
+
+                    // zig or LLVM doesn't do @ctz nicely with SIMD
+                    if (comptime ascii_vector_size >= 8) {
+                        const Int = u64;
+                        const size = @sizeOf(Int);
+
+                        {
+                            const bytes = @bitCast(Int, latin1[0..size].*);
+                            // https://dotat.at/@/2022-06-27-tolower-swar.html
+                            const mask = bytes & 0x8080808080808080;
+
+                            if (mask > 0) {
+                                const first_set_byte = @ctz(Int, mask) / 8;
+                                if (comptime Environment.allow_assert) {
+                                    assert(latin1[first_set_byte] >= 127);
+                                    var j: usize = 0;
+                                    while (j < first_set_byte) : (j += 1) {
+                                        assert(latin1[j] < 127);
+                                    }
+                                }
+
+                                buf[0..size].* = @bitCast([size]u8, bytes);
+                                buf = buf[first_set_byte..];
+                                latin1 = latin1[first_set_byte..];
+                                break :inner;
+                            }
+
+                            buf[0..size].* = @bitCast([size]u8, bytes);
+                            latin1 = latin1[size..];
+                            buf = buf[size..];
+                        }
+
+                        if (comptime ascii_vector_size >= 16) {
+                            const bytes = @bitCast(Int, latin1[0..size].*);
+                            // https://dotat.at/@/2022-06-27-tolower-swar.html
+                            const mask = bytes & 0x8080808080808080;
+
+                            if (mask > 0) {
+                                const first_set_byte = @ctz(Int, mask) / 8;
+                                if (comptime Environment.allow_assert) {
+                                    assert(latin1[first_set_byte] >= 127);
+                                    var j: usize = 0;
+                                    while (j < first_set_byte) : (j += 1) {
+                                        assert(latin1[j] < 127);
+                                    }
+                                }
+
+                                buf[0..size].* = @bitCast([size]u8, bytes);
+                                buf = buf[first_set_byte..];
+                                latin1 = latin1[first_set_byte..];
+                                break :inner;
+                            }
+                        }
+                    }
                     break;
                 }
 
@@ -1338,40 +1387,19 @@ pub fn copyLatin1IntoUTF8StopOnNonASCII(buf_: []u8, comptime Type: type, latin1_
             }
 
             {
-                const Int = u32;
-                const size = @sizeOf(Int);
-                while (@minimum(buf.len, latin1.len) >= size) {
-                    const bytes = @bitCast(Int, latin1[0..size].*);
-                    const mask = bytes & 0x80808080;
-
-                    if (mask > 0) {
-                        const first_set_byte = @ctz(Int, mask) / 8;
-                        if (comptime stop) return .{ .written = std.math.maxInt(u32), .read = std.math.maxInt(u32) };
-
-                        if (comptime Environment.allow_assert) {
-                            assert(latin1[first_set_byte] >= 127);
-                            var j: usize = 0;
-                            while (j < first_set_byte) : (j += 1) {
-                                assert(latin1[j] < 127);
-                            }
-                        }
-
-                        buf[0..size].* = @bitCast([size]u8, bytes);
-                        buf = buf[first_set_byte..];
-                        latin1 = latin1[first_set_byte..];
-                        break :inner;
-                    }
-
-                    buf[0..size].* = @bitCast([size]u8, bytes);
-                    latin1 = latin1[size..];
-                    buf = buf[size..];
+                const end = latin1.ptr + latin1.len;
+                assert(@ptrToInt(latin1.ptr + 8) > @ptrToInt(end));
+                const start_ptr = @ptrToInt(buf.ptr);
+                const start_ptr_latin1 = @ptrToInt(latin1.ptr);
+
+                while (latin1.ptr != end and latin1.ptr[0] <= 127) {
+                    buf.ptr[0] = latin1.ptr[0];
+                    buf.ptr += 1;
+                    latin1.ptr += 1;
                 }
-            }
 
-            while (@minimum(buf.len, latin1.len) >= 1 and latin1[0] < 127) {
-                buf[0] = latin1[0];
-                latin1 = latin1[1..];
-                buf = buf[1..];
+                buf.len -= @ptrToInt(buf.ptr) - start_ptr;
+                latin1.len -= @ptrToInt(latin1.ptr) - start_ptr_latin1;
             }
         }
 
@@ -2479,106 +2507,111 @@ pub fn firstNonASCIIWithType(comptime Type: type, slice: Type) ?u32 {
     var remaining = slice;
 
     if (comptime Environment.isAarch64 or Environment.isX64) {
-        while (remaining.len >= ascii_vector_size) {
-            const vec: AsciiVector = remaining[0..ascii_vector_size].*;
+        if (remaining.len >= ascii_vector_size) {
+            const remaining_start = remaining.ptr;
+            const remaining_end = remaining.ptr + remaining.len - (remaining.len % ascii_vector_size);
 
-            if (@reduce(.Max, vec) > 127) {
-                const Int = u64;
-                const size = @sizeOf(Int);
-                {
-                    const bytes = @bitCast(Int, remaining[0..size].*);
-                    // https://dotat.at/@/2022-06-27-tolower-swar.html
-                    const mask = bytes & 0x8080808080808080;
+            while (remaining.ptr != remaining_end) {
+                const vec: AsciiVector = remaining[0..ascii_vector_size].*;
 
-                    if (mask > 0) {
-                        const first_set_byte = @ctz(Int, mask) / 8;
-                        if (comptime Environment.allow_assert) {
-                            assert(remaining[first_set_byte] >= 127);
-                            var j: usize = 0;
-                            while (j < first_set_byte) : (j += 1) {
-                                assert(remaining[j] < 127);
+                if (@reduce(.Max, vec) > 127) {
+                    const Int = u64;
+                    const size = @sizeOf(Int);
+                    remaining.len -= @ptrToInt(remaining.ptr) - @ptrToInt(remaining_start);
+
+                    {
+                        const bytes = @bitCast(Int, remaining[0..size].*);
+                        // https://dotat.at/@/2022-06-27-tolower-swar.html
+                        const mask = bytes & 0x8080808080808080;
+
+                        if (mask > 0) {
+                            const first_set_byte = @ctz(Int, mask) / 8;
+                            if (comptime Environment.allow_assert) {
+                                assert(remaining[first_set_byte] > 127);
+                                var j: usize = 0;
+                                while (j < first_set_byte) : (j += 1) {
+                                    assert(remaining[j] <= 127);
+                                }
                             }
-                        }
 
-                        return @as(u32, first_set_byte) + @intCast(u32, slice.len - remaining.len);
+                            return @as(u32, first_set_byte) + @intCast(u32, slice.len - remaining.len);
+                        }
+                        remaining = remaining[size..];
                     }
-                }
-                {
-                    const bytes = @bitCast(Int, remaining[size..][0..size].*);
-                    const mask = bytes & 0x8080808080808080;
-
-                    if (mask > 0) {
-                        const first_set_byte = @ctz(Int, mask) / 8;
-                        if (comptime Environment.allow_assert) {
-                            assert(remaining[first_set_byte] >= 127);
-                            var j: usize = 0;
-                            while (j < first_set_byte) : (j += 1) {
-                                assert(remaining[j] < 127);
+                    {
+                        const bytes = @bitCast(Int, remaining[0..size].*);
+                        const mask = bytes & 0x8080808080808080;
+
+                        if (mask > 0) {
+                            const first_set_byte = @ctz(Int, mask) / 8;
+                            if (comptime Environment.allow_assert) {
+                                assert(remaining[first_set_byte] > 127);
+                                var j: usize = 0;
+                                while (j < first_set_byte) : (j += 1) {
+                                    assert(remaining[j] <= 127);
+                                }
                             }
-                        }
 
-                        return 8 + @as(u32, first_set_byte) + @intCast(u32, slice.len - remaining.len);
+                            return @as(u32, first_set_byte) + @intCast(u32, slice.len - remaining.len);
+                        }
                     }
+                    unreachable;
                 }
-                break;
-            }
 
-            remaining = remaining[ascii_vector_size..];
+                // the more intuitive way, using slices, produces worse codegen
+                // specifically: it subtracts the length at the end of the loop
+                // we don't need to do that
+                // we only need to subtract the length once at the very end
+                remaining.ptr += ascii_vector_size;
+            }
+            remaining.len -= @ptrToInt(remaining.ptr) - @ptrToInt(remaining_start);
         }
     }
 
     {
         const Int = u64;
         const size = @sizeOf(Int);
-        while (remaining.len >= size) {
-            const bytes = @bitCast(Int, remaining[0..size].*);
-            // https://dotat.at/@/2022-06-27-tolower-swar.html
-            const mask = bytes & 0x8080808080808080;
-
-            if (mask > 0) {
-                const first_set_byte = @ctz(Int, mask) / 8;
-                if (comptime Environment.allow_assert) {
-                    assert(remaining[first_set_byte] >= 127);
-                    var j: usize = 0;
-                    while (j < first_set_byte) : (j += 1) {
-                        assert(remaining[j] < 127);
-                    }
-                }
-
-                return @as(u32, first_set_byte) + @intCast(u32, slice.len - remaining.len);
-            }
+        const remaining_start = remaining.ptr;
+        const remaining_end = remaining.ptr + remaining.len - (remaining.len % size);
 
-            remaining = remaining[size..];
+        if (comptime Environment.isAarch64 or Environment.isX64) {
+            // these assertions exist more so for LLVM
+            assert(remaining.len < ascii_vector_size);
+            assert(@ptrToInt(remaining.ptr + ascii_vector_size) > @ptrToInt(remaining_end));
         }
-    }
 
-    {
-        const Int = u32;
-        const size = @sizeOf(Int);
-        while (remaining.len >= size) {
-            const bytes = @bitCast(Int, remaining[0..size].*);
-            const mask = bytes & 0x80808080;
-
-            if (mask > 0) {
-                const first_set_byte = @ctz(Int, mask) / 8;
-                if (comptime Environment.allow_assert) {
-                    assert(remaining[first_set_byte] >= 127);
-                    var j: usize = 0;
-                    while (j < first_set_byte) : (j += 1) {
-                        assert(remaining[j] < 127);
+        if (remaining.len >= size) {
+            while (remaining.ptr != remaining_end) {
+                const bytes = @bitCast(Int, remaining[0..size].*);
+                // https://dotat.at/@/2022-06-27-tolower-swar.html
+                const mask = bytes & 0x8080808080808080;
+
+                if (mask > 0) {
+                    remaining.len -= @ptrToInt(remaining.ptr) - @ptrToInt(remaining_start);
+                    const first_set_byte = @ctz(Int, mask) / 8;
+                    if (comptime Environment.allow_assert) {
+                        assert(remaining[first_set_byte] > 127);
+                        var j: usize = 0;
+                        while (j < first_set_byte) : (j += 1) {
+                            assert(remaining[j] <= 127);
+                        }
                     }
+
+                    return @as(u32, first_set_byte) + @intCast(u32, slice.len - remaining.len);
                 }
 
-                return @as(u32, first_set_byte) + @intCast(u32, slice.len - remaining.len);
+                remaining.ptr += size;
             }
-
-            remaining = remaining[size..];
+            remaining.len -= @ptrToInt(remaining.ptr) - @ptrToInt(remaining_start);
         }
     }
 
-    for (remaining) |char, i| {
-        if (char > 127) {
-            return @truncate(u32, i + (slice.len - remaining.len));
+    assert(remaining.len < 8);
+
+    for (remaining) |*char| {
+        if (char.* > 127) {
+            // try to prevent it from reading the length of the slice
+            return @truncate(u32, @ptrToInt(char) - @ptrToInt(slice.ptr));
         }
     }
 
@@ -2932,52 +2965,61 @@ pub fn firstNonASCII16CheckMin(comptime Slice: type, slice: Slice, comptime chec
     var remaining = slice;
 
     if (comptime Environment.isAarch64 or Environment.isX64) {
-        while (remaining.len >= ascii_u16_vector_size) {
-            const vec: AsciiU16Vector = remaining[0..ascii_u16_vector_size].*;
-            const max_value = @reduce(.Max, vec);
-
-            if (comptime check_min) {
-                // by using @reduce here, we make it only do one comparison
-                // @reduce doesn't tell us the index though
-                const min_value = @reduce(.Min, vec);
-                if (min_value < 0x20 or max_value > 127) {
-                    // this is really slow
-                    // it does it element-wise for every single u8 on the vector
-                    // instead of doing the SIMD instructions
-                    // it removes a loop, but probably is slower in the end
-                    const cmp = @bitCast(AsciiVectorU16U1, vec > max_u16_ascii) |
-                        @bitCast(AsciiVectorU16U1, vec < min_u16_ascii);
-                    const bitmask = @ptrCast(*const u16, &cmp).*;
-                    const first = @ctz(u16, bitmask);
-
-                    return @intCast(u32, @as(u32, first) +
-                        @intCast(u32, slice.len - remaining.len));
-                }
-            } else if (comptime !check_min) {
-                if (max_value > 127) {
-                    const cmp = vec > max_u16_ascii;
-                    const bitmask = @ptrCast(*const u16, &cmp).*;
-                    const first = @ctz(u16, bitmask);
-
-                    return @intCast(u32, @as(u32, first) +
-                        @intCast(u32, slice.len - remaining.len));
+        const end_ptr = remaining.ptr + remaining.len - (remaining.len % ascii_u16_vector_size);
+        if (remaining.len > ascii_u16_vector_size) {
+            const remaining_start = remaining.ptr;
+            while (remaining.ptr != end_ptr) {
+                const vec: AsciiU16Vector = remaining[0..ascii_u16_vector_size].*;
+                const max_value = @reduce(.Max, vec);
+
+                if (comptime check_min) {
+                    // by using @reduce here, we make it only do one comparison
+                    // @reduce doesn't tell us the index though
+                    const min_value = @reduce(.Min, vec);
+                    if (min_value < 0x20 or max_value > 127) {
+                        remaining.len -= (@ptrToInt(remaining.ptr) - @ptrToInt(remaining_start)) / 2;
+
+                        // this is really slow
+                        // it does it element-wise for every single u8 on the vector
+                        // instead of doing the SIMD instructions
+                        // it removes a loop, but probably is slower in the end
+                        const cmp = @bitCast(AsciiVectorU16U1, vec > max_u16_ascii) |
+                            @bitCast(AsciiVectorU16U1, vec < min_u16_ascii);
+                        const bitmask: u16 = @ptrCast(*const u16, &cmp).*;
+                        const first = @ctz(u16, bitmask);
+
+                        return @intCast(u32, @as(u32, first) +
+                            @intCast(u32, slice.len - remaining.len));
+                    }
+                } else if (comptime !check_min) {
+                    if (max_value > 127) {
+                        remaining.len -= (@ptrToInt(remaining.ptr) - @ptrToInt(remaining_start)) / 2;
+
+                        const cmp = vec > max_u16_ascii;
+                        const bitmask = @ptrCast(*const u16, &cmp).*;
+                        const first = @ctz(u16, bitmask);
+
+                        return @intCast(u32, @as(u32, first) +
+                            @intCast(u32, slice.len - remaining.len));
+                    }
                 }
-            }
 
-            remaining = remaining[ascii_u16_vector_size..];
+                remaining.ptr += ascii_u16_vector_size;
+            }
+            remaining.len -= (@ptrToInt(remaining.ptr) - @ptrToInt(remaining_start)) / 2;
         }
     }
 
     if (comptime check_min) {
-        for (remaining) |char, i| {
+        for (remaining) |char| {
             if (char > 127 or char < 0x20) {
-                return @truncate(u32, i + (slice.len - remaining.len));
+                return @truncate(u32, (@ptrToInt(std.mem.sliceAsBytes(remaining).ptr) - @ptrToInt(std.mem.sliceAsBytes(slice).ptr)) / 2);
             }
         }
     } else {
-        for (remaining) |char, i| {
+        for (remaining) |char| {
             if (char > 127) {
-                return @truncate(u32, i + (slice.len - remaining.len));
+                return @truncate(u32, (@ptrToInt(std.mem.sliceAsBytes(remaining).ptr) - @ptrToInt(std.mem.sliceAsBytes(slice).ptr)) / 2);
             }
         }
     }