aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGravatar Jarred Sumner <709451+Jarred-Sumner@users.noreply.github.com> 2023-01-23 18:03:35 -0800
committerGravatar Jarred Sumner <709451+Jarred-Sumner@users.noreply.github.com> 2023-01-23 18:03:48 -0800
commit7420cb1d7345fc2e41b63ace068c6e4428652d80 (patch)
treee2e881698ad533c348b209d125359bd448c7e358 /src
parent73961a1494c544843e1145e28d4a4fcf2e45c36a (diff)
downloadbun-7420cb1d7345fc2e41b63ace068c6e4428652d80.tar.gz
bun-7420cb1d7345fc2e41b63ace068c6e4428652d80.tar.zst
bun-7420cb1d7345fc2e41b63ace068c6e4428652d80.zip
fix test failure in aarch64 and add test for simd code path
Diffstat (limited to 'src')
-rw-r--r--src/string_immutable.zig63
1 files changed, 42 insertions, 21 deletions
diff --git a/src/string_immutable.zig b/src/string_immutable.zig
index 27f9ac5d7..db00508e1 100644
--- a/src/string_immutable.zig
+++ b/src/string_immutable.zig
@@ -956,32 +956,53 @@ pub inline fn copyU16IntoU8(output_: []u8, comptime InputType: type, input_: Inp
if (comptime Environment.allow_assert) {
std.debug.assert(input_.len <= output_.len);
}
- if (comptime !JSC.is_bindgen and Environment.isAarch64) {
- // faster on aarch64
- // but it only uses SSE2 when it could use AVX2
- // so it's better to let llvm auto-vectorize it
- JSC.WTF.copyLCharsFromUCharSource(output_.ptr, InputType, input_);
- } else {
- var output = output_;
- var input = input_;
- if (comptime Environment.allow_assert) {
- std.debug.assert(input.len <= output.len);
- }
- // https://zig.godbolt.org/z/Y1qa9PTo1
- // https://github.com/ziglang/zig/issues/11830
- // this auto-vectorizes on x64 and aarch64
- var input_ptr = input.ptr;
- var output_ptr = output.ptr;
+ var output = output_;
+ var input = input_;
+ if (comptime Environment.allow_assert) {
+ std.debug.assert(input.len <= output.len);
+ }
- const last_input_ptr = input_ptr + @min(input.len, output.len);
+ // https://zig.godbolt.org/z/9rTn1orcY
- while (last_input_ptr != input_ptr) {
- output_ptr[0] = @truncate(u8, input_ptr[0]);
- output_ptr += 1;
- input_ptr += 1;
+ const group = @as(usize, 16);
+ // end at the last group of 16 bytes
+ var input_ptr = input.ptr;
+ var output_ptr = output.ptr;
+
+ if (comptime Environment.enableSIMD) {
+ const last_vector_ptr = input.ptr + (@min(input.len, output.len) & ~(group - 1));
+ while (last_vector_ptr != input_ptr) {
+ const input_vec1: @Vector(group, u16) = input_ptr[0..group].*;
+ output_ptr[0] = @truncate(u8, input_vec1[0]);
+ output_ptr[1] = @truncate(u8, input_vec1[1]);
+ output_ptr[2] = @truncate(u8, input_vec1[2]);
+ output_ptr[3] = @truncate(u8, input_vec1[3]);
+ output_ptr[4] = @truncate(u8, input_vec1[4]);
+ output_ptr[5] = @truncate(u8, input_vec1[5]);
+ output_ptr[6] = @truncate(u8, input_vec1[6]);
+ output_ptr[7] = @truncate(u8, input_vec1[7]);
+ output_ptr[8] = @truncate(u8, input_vec1[8]);
+ output_ptr[9] = @truncate(u8, input_vec1[9]);
+ output_ptr[10] = @truncate(u8, input_vec1[10]);
+ output_ptr[11] = @truncate(u8, input_vec1[11]);
+ output_ptr[12] = @truncate(u8, input_vec1[12]);
+ output_ptr[13] = @truncate(u8, input_vec1[13]);
+ output_ptr[14] = @truncate(u8, input_vec1[14]);
+ output_ptr[15] = @truncate(u8, input_vec1[15]);
+
+ output_ptr += group;
+ input_ptr += group;
}
}
+
+ const last_input_ptr = input_ptr + @min(input.len, output.len);
+
+ while (last_input_ptr != input_ptr) {
+ output_ptr[0] = @truncate(u8, input_ptr[0]);
+ output_ptr += 1;
+ input_ptr += 1;
+ }
}
const strings = @This();