diff options
author | 2023-02-16 17:17:29 -0800 | |
---|---|---|
committer | 2023-02-16 17:17:29 -0800 | |
commit | 56b75dbac32233b49b33c12ce25a07c9e9083dee (patch) | |
tree | fe5726a9357a084c7d9363e902e8d8b6d25baa4b /src/string_immutable.zig | |
parent | 5007c6b218ec0e42938eac9c8a9698e313dc9952 (diff) | |
download | bun-56b75dbac32233b49b33c12ce25a07c9e9083dee.tar.gz bun-56b75dbac32233b49b33c12ce25a07c9e9083dee.tar.zst bun-56b75dbac32233b49b33c12ce25a07c9e9083dee.zip |
faster Buffer.byteLength("latin1")
Diffstat (limited to 'src/string_immutable.zig')
-rw-r--r-- | src/string_immutable.zig | 64 |
1 files changed, 28 insertions, 36 deletions
diff --git a/src/string_immutable.zig b/src/string_immutable.zig index e012b6e5a..5f5b943d6 100644 --- a/src/string_immutable.zig +++ b/src/string_immutable.zig @@ -1729,48 +1729,40 @@ pub fn elementLengthLatin1IntoUTF8(comptime Type: type, latin1_: Type) usize { const latin1_last = latin1.ptr + latin1.len; if (latin1.ptr != latin1_last) { - const wrapped_len = latin1.len - (latin1.len % ascii_vector_size); + // reference the pointer directly because it improves codegen var ptr = latin1.ptr; - const latin1_vec_end = ptr + wrapped_len; - - while (ptr != latin1_vec_end) { - const vec: AsciiVector = ptr[0..ascii_vector_size].*; - if (@reduce(.Max, vec) > 127) { - const Int = u64; - const size = @sizeOf(Int); - - const bytes = [2]Int{ - @bitCast(Int, ptr[0..size].*) & 0x8080808080808080, - @bitCast(Int, ptr[size .. 2 * size].*) & 0x8080808080808080, - }; - - total_non_ascii_count += @popCount(bytes[0]) + @popCount(bytes[1]); + if (comptime Environment.enableSIMD) { + const wrapped_len = latin1.len - (latin1.len % ascii_vector_size); + const latin1_vec_end = ptr + wrapped_len; + while (ptr != latin1_vec_end) { + const vec: AsciiVector = ptr[0..ascii_vector_size].*; + const cmp = vec & @splat(ascii_vector_size, @as(u8, 0x80)); + total_non_ascii_count += @reduce(.Add, cmp); + ptr += ascii_vector_size; + } + } else { + while (@ptrToInt(ptr + 8) < @ptrToInt(latin1_last)) { + if (comptime Environment.allow_assert) std.debug.assert(@ptrToInt(ptr) <= @ptrToInt(latin1_last) and @ptrToInt(ptr) >= @ptrToInt(latin1_.ptr)); + const bytes = @bitCast(u64, ptr[0..8].*) & 0x8080808080808080; + total_non_ascii_count += @popCount(bytes); + ptr += 8; } - ptr += ascii_vector_size; - } - - if (@ptrToInt(ptr + 8) < @ptrToInt(latin1_last)) { - if (comptime Environment.allow_assert) std.debug.assert(@ptrToInt(ptr) <= @ptrToInt(latin1_last) and @ptrToInt(ptr) >= @ptrToInt(latin1_.ptr)); - const bytes = @bitCast(u64, ptr[0..8].*) & 0x8080808080808080; - total_non_ascii_count += @popCount(bytes); - ptr += 8; - } - - if (@ptrToInt(ptr + 4) < @ptrToInt(latin1_last)) { - if (comptime Environment.allow_assert) std.debug.assert(@ptrToInt(ptr) <= @ptrToInt(latin1_last) and @ptrToInt(ptr) >= @ptrToInt(latin1_.ptr)); - const bytes = @bitCast(u32, ptr[0..4].*) & 0x80808080; - total_non_ascii_count += @popCount(bytes); - ptr += 4; - } + if (@ptrToInt(ptr + 4) < @ptrToInt(latin1_last)) { + if (comptime Environment.allow_assert) std.debug.assert(@ptrToInt(ptr) <= @ptrToInt(latin1_last) and @ptrToInt(ptr) >= @ptrToInt(latin1_.ptr)); + const bytes = @bitCast(u32, ptr[0..4].*) & 0x80808080; + total_non_ascii_count += @popCount(bytes); + ptr += 4; + } - if (@ptrToInt(ptr + 2) < @ptrToInt(latin1_last)) { - if (comptime Environment.allow_assert) std.debug.assert(@ptrToInt(ptr) <= @ptrToInt(latin1_last) and @ptrToInt(ptr) >= @ptrToInt(latin1_.ptr)); - const bytes = @bitCast(u16, ptr[0..2].*) & 0x8080; - total_non_ascii_count += @popCount(bytes); - ptr += 2; + if (@ptrToInt(ptr + 2) < @ptrToInt(latin1_last)) { + if (comptime Environment.allow_assert) std.debug.assert(@ptrToInt(ptr) <= @ptrToInt(latin1_last) and @ptrToInt(ptr) >= @ptrToInt(latin1_.ptr)); + const bytes = @bitCast(u16, ptr[0..2].*) & 0x8080; + total_non_ascii_count += @popCount(bytes); + ptr += 2; + } } while (ptr != latin1_last) { |