aboutsummaryrefslogtreecommitdiff
path: root/src/string_immutable.zig
diff options
context:
space:
mode:
authorGravatar Jarred Sumner <jarred@jarredsumner.com> 2023-08-21 08:31:17 -0700
committerGravatar GitHub <noreply@github.com> 2023-08-21 08:31:17 -0700
commitdef5a85d90e5102ab52e6960e8caab3c3f8ab3e8 (patch)
treeb8501bb40eb97cc019eea5f83454f8fc1c95c2e7 /src/string_immutable.zig
parent1b8f5697929812d4c7abd0edff8ad1e8f12b4e14 (diff)
downloadbun-def5a85d90e5102ab52e6960e8caab3c3f8ab3e8.tar.gz
bun-def5a85d90e5102ab52e6960e8caab3c3f8ab3e8.tar.zst
bun-def5a85d90e5102ab52e6960e8caab3c3f8ab3e8.zip
40x faster .toString('hex') (#4237)
Co-authored-by: Jarred Sumner <709451+Jarred-Sumner@users.noreply.github.com>
Diffstat (limited to 'src/string_immutable.zig')
-rw-r--r--src/string_immutable.zig79
1 files changed, 76 insertions, 3 deletions
diff --git a/src/string_immutable.zig b/src/string_immutable.zig
index 5ff373f03..8266e7e27 100644
--- a/src/string_immutable.zig
+++ b/src/string_immutable.zig
@@ -3602,6 +3602,14 @@ inline fn _decodeHexToBytes(destination: []u8, comptime Char: type, source: []co
return destination.len - remain.len;
}
+fn byte2hex(char: u8) u8 {
+ return switch (char) {
+ 0...9 => char + '0',
+ 10...15 => char - 10 + 'a',
+ else => unreachable,
+ };
+}
+
pub fn encodeBytesToHex(destination: []u8, source: []const u8) usize {
if (comptime Environment.allow_assert) {
std.debug.assert(destination.len > 0);
@@ -3614,10 +3622,75 @@ pub fn encodeBytesToHex(destination: []u8, source: []const u8) usize {
const to_read = to_write / 2;
- const formatter = std.fmt.fmtSliceHexLower(source[0..to_read]);
- const written = std.fmt.bufPrint(destination, "{}", .{formatter}) catch unreachable;
+ var remaining = source[0..to_read];
+ var remaining_dest = destination;
+ if (comptime Environment.enableSIMD) {
+ var remaining_end = remaining.ptr + remaining.len - (remaining.len % 16);
+ while (remaining.ptr != remaining_end) {
+ const input_chunk: @Vector(16, u8) = remaining[0..16].*;
+ const input_chunk_4: @Vector(16, u8) = input_chunk >> @as(@Vector(16, u8), @splat(@as(u8, 4)));
+ const input_chunk_15: @Vector(16, u8) = input_chunk & @as(@Vector(16, u8), @splat(@as(u8, 15)));
+
+ // This looks extremely redundant but it was the easiest way to make the compiler do the right thing
+ // the more convienient "0123456789abcdef" string produces worse codegen
+ // https://zig.godbolt.org/z/bfdracEeq
+ const lower_16 = [16]u8{
+ byte2hex(input_chunk_4[0]),
+ byte2hex(input_chunk_4[1]),
+ byte2hex(input_chunk_4[2]),
+ byte2hex(input_chunk_4[3]),
+ byte2hex(input_chunk_4[4]),
+ byte2hex(input_chunk_4[5]),
+ byte2hex(input_chunk_4[6]),
+ byte2hex(input_chunk_4[7]),
+ byte2hex(input_chunk_4[8]),
+ byte2hex(input_chunk_4[9]),
+ byte2hex(input_chunk_4[10]),
+ byte2hex(input_chunk_4[11]),
+ byte2hex(input_chunk_4[12]),
+ byte2hex(input_chunk_4[13]),
+ byte2hex(input_chunk_4[14]),
+ byte2hex(input_chunk_4[15]),
+ };
+ const upper_16 = [16]u8{
+ byte2hex(input_chunk_15[0]),
+ byte2hex(input_chunk_15[1]),
+ byte2hex(input_chunk_15[2]),
+ byte2hex(input_chunk_15[3]),
+ byte2hex(input_chunk_15[4]),
+ byte2hex(input_chunk_15[5]),
+ byte2hex(input_chunk_15[6]),
+ byte2hex(input_chunk_15[7]),
+ byte2hex(input_chunk_15[8]),
+ byte2hex(input_chunk_15[9]),
+ byte2hex(input_chunk_15[10]),
+ byte2hex(input_chunk_15[11]),
+ byte2hex(input_chunk_15[12]),
+ byte2hex(input_chunk_15[13]),
+ byte2hex(input_chunk_15[14]),
+ byte2hex(input_chunk_15[15]),
+ };
+
+ const output_chunk = std.simd.interlace(.{
+ lower_16,
+ upper_16,
+ });
+
+ remaining_dest[0..32].* = @bitCast(output_chunk);
+ remaining_dest = remaining_dest[32..];
+ remaining = remaining[16..];
+ }
+ }
+
+ for (remaining) |c| {
+ const charset = "0123456789abcdef";
+
+ const buf: [2]u8 = .{ charset[c >> 4], charset[c & 15] };
+ remaining_dest[0..2].* = buf;
+ remaining_dest = remaining_dest[2..];
+ }
- return written.len;
+ return to_read * 2;
}
test "decodeHexToBytes" {