diff options
-rw-r--r-- | src/baby_list.zig | 10 | ||||
-rw-r--r-- | src/bun.js/webcore/streams.zig | 131 | ||||
-rw-r--r-- | src/string_immutable.zig | 512 |
3 files changed, 377 insertions, 276 deletions
diff --git a/src/baby_list.zig b/src/baby_list.zig index 2230a348f..f33216fb8 100644 --- a/src/baby_list.zig +++ b/src/baby_list.zig @@ -115,13 +115,9 @@ pub fn BabyList(comptime Type: type) type { if (comptime Type != u8) @compileError("Unsupported for type " ++ @typeName(Type)); const initial = this.len; - var list_ = this.listManaged(allocator); - { - defer this.update(list_); - const start = list_.items.len; - try list_.appendSlice(str); - strings.replaceLatin1WithUTF8(list_.items[start..list_.items.len]); - } + const old = this.listManaged(allocator); + const new = try strings.allocateLatin1IntoUTF8WithList(old, old.items.len, []const u8, str); + this.update(new); return this.len - initial; } pub fn writeUTF16(this: *@This(), allocator: std.mem.Allocator, str: []const u16) !u32 { diff --git a/src/bun.js/webcore/streams.zig b/src/bun.js/webcore/streams.zig index 654bf4b47..603694f60 100644 --- a/src/bun.js/webcore/streams.zig +++ b/src/bun.js/webcore/streams.zig @@ -244,6 +244,8 @@ pub const StreamStart = union(Tag) { as_uint8array: bool, stream: bool, }, + HTTPSResponseSink: void, + HTTPResponseSink: void, ready: void, pub const Tag = enum { @@ -251,6 +253,8 @@ pub const StreamStart = union(Tag) { err, chunk_size, ArrayBufferSink, + HTTPSResponseSink, + HTTPResponseSink, ready, }; @@ -325,6 +329,21 @@ pub const StreamStart = union(Tag) { }; } }, + .HTTPSResponseSink, .HTTPResponseSink => { + var empty = true; + var chunk_size: JSC.WebCore.Blob.SizeType = 2048; + + if (value.get(globalThis, "highWaterMark")) |chunkSize| { + empty = false; + chunk_size = @intCast(JSC.WebCore.Blob.SizeType, @maximum(256, @truncate(i51, chunkSize.toInt64()))); + } + + if (!empty) { + return .{ + .chunk_size = chunk_size, + }; + } + }, else => @compileError("Unuspported tag"), } @@ -1372,6 +1391,7 @@ pub fn HTTPServerWritable(comptime ssl: bool) type { signal: Signal = .{}, pending_drain: ?*JSC.JSPromise = null, globalThis: *JSGlobalObject = undefined, + highWaterMark: Blob.SizeType = 2048, requested_end: bool = false, @@ -1475,17 +1495,34 @@ pub fn HTTPServerWritable(comptime ssl: bool) type { return false; } - pub fn start(this: *@This(), _: StreamStart) JSC.Node.Maybe(void) { - log("start()", .{}); - + pub fn start(this: *@This(), stream_start: StreamStart) JSC.Node.Maybe(void) { if (this.res.hasResponded()) { this.done = true; this.signal.close(null); return .{ .result = {} }; } + this.buffer.len = 0; + + switch (stream_start) { + .chunk_size => |chunk_size| { + if (chunk_size > 0) { + this.highWaterMark = chunk_size; + } + }, + else => {}, + } + + var list = this.buffer.listManaged(this.allocator); + list.clearRetainingCapacity(); + list.ensureTotalCapacityPrecise(this.highWaterMark) catch return .{ .err = JSC.Node.Syscall.Error.oom }; + this.buffer.update(list); + this.done = false; this.signal.start(); + + log("start({d})", .{this.highWaterMark}); + return .{ .result = {} }; } @@ -1528,34 +1565,40 @@ pub fn HTTPServerWritable(comptime ssl: bool) type { } const bytes = data.slice(); - + const len = @truncate(Blob.SizeType, bytes.len); log("write({d})", .{bytes.len}); if (!this.hasBackpressure()) { - if (this.buffer.len == 0) { + if (this.buffer.len == 0 and len >= this.highWaterMark) { // fast path: // - large-ish chunk // - no backpressure if (this.send(bytes)) { - this.handleWrote(bytes.len); - return .{ .owned = @truncate(Blob.SizeType, bytes.len) }; + this.handleWrote(len); + return .{ .owned = len }; } _ = this.buffer.write(this.allocator, bytes) catch { return .{ .err = JSC.Node.Syscall.Error.fromCode(.NOMEM, .write) }; }; - } else { - // kinda fast path: - // - combined chunk is large enough to flush automatically - // - no backpressure + } else if (this.buffer.len + len >= this.highWaterMark) { + // TODO: attempt to write both in a corked buffer? _ = this.buffer.write(this.allocator, bytes) catch { return .{ .err = JSC.Node.Syscall.Error.fromCode(.NOMEM, .write) }; }; - const readable = this.readableSlice(); - if (this.send(readable)) { - this.handleWrote(readable.len); - return .{ .owned = @truncate(Blob.SizeType, readable.len) }; + const slice = this.readableSlice(); + if (this.send(slice)) { + this.handleWrote(slice.len); + this.buffer.len = 0; + return .{ .owned = len }; } + } else { + // queue the data + // do not send it + _ = this.buffer.write(this.allocator, bytes) catch { + return .{ .err = JSC.Node.Syscall.Error.fromCode(.NOMEM, .write) }; + }; + return .{ .owned = len }; } this.res.onWritable(*@This(), onWritable, this); @@ -1565,7 +1608,7 @@ pub fn HTTPServerWritable(comptime ssl: bool) type { }; } - return .{ .owned = @truncate(Blob.SizeType, bytes.len) }; + return .{ .owned = len }; } pub const writeBytes = write; pub fn writeLatin1(this: *@This(), data: StreamResult) StreamResult.Writable { @@ -1580,34 +1623,51 @@ pub fn HTTPServerWritable(comptime ssl: bool) type { } const bytes = data.slice(); + const len = @truncate(Blob.SizeType, bytes.len); log("writeLatin1({d})", .{bytes.len}); if (!this.hasBackpressure()) { - if (this.buffer.len == 0 and strings.isAllASCII(bytes)) { - // fast path: - // - large-ish chunk - // - no backpressure - if (this.send(bytes)) { - this.handleWrote(bytes.len); - return .{ .owned = @truncate(Blob.SizeType, bytes.len) }; + if (this.buffer.len == 0 and len >= this.highWaterMark) { + var do_send = true; + // common case + if (strings.isAllASCII(bytes)) { + // fast path: + // - large-ish chunk + // - no backpressure + if (this.send(bytes)) { + this.handleWrote(bytes.len); + return .{ .owned = len }; + } + do_send = false; } - // we already checked it's all ascii - _ = this.buffer.write(this.allocator, bytes) catch { + _ = this.buffer.writeLatin1(this.allocator, bytes) catch { return .{ .err = JSC.Node.Syscall.Error.fromCode(.NOMEM, .write) }; }; - } else if (this.buffer.len == 0) { + + if (do_send) { + if (this.send(this.readableSlice())) { + this.handleWrote(bytes.len); + return .{ .owned = len }; + } + } + } else if (this.buffer.len + len >= this.highWaterMark) { // kinda fast path: // - combined chunk is large enough to flush automatically // - no backpressure - const reported = this.buffer.writeLatin1(this.allocator, bytes) catch { + _ = this.buffer.writeLatin1(this.allocator, bytes) catch { return .{ .err = JSC.Node.Syscall.Error.fromCode(.NOMEM, .write) }; }; const readable = this.readableSlice(); if (this.send(readable)) { this.handleWrote(readable.len); - return .{ .owned = @as(Blob.SizeType, reported) }; + return .{ .owned = len }; } + } else { + _ = this.buffer.writeLatin1(this.allocator, bytes) catch { + return .{ .err = JSC.Node.Syscall.Error.fromCode(.NOMEM, .write) }; + }; + return .{ .owned = len }; } this.res.onWritable(*@This(), onWritable, this); @@ -1617,7 +1677,7 @@ pub fn HTTPServerWritable(comptime ssl: bool) type { }; } - return .{ .owned = @truncate(Blob.SizeType, bytes.len) }; + return .{ .owned = len }; } pub fn writeUTF16(this: *@This(), data: StreamResult) StreamResult.Writable { if (this.done or this.requested_end) { @@ -1643,12 +1703,15 @@ pub fn HTTPServerWritable(comptime ssl: bool) type { }; const readable = this.readableSlice(); - if (this.send(readable)) { - this.handleWrote(readable.len); - return .{ .owned = @truncate(Blob.SizeType, written) }; - } - this.res.onWritable(*@This(), onWritable, this); + if (readable.len >= this.highWaterMark) { + if (this.send(readable)) { + this.handleWrote(readable.len); + return .{ .owned = @truncate(Blob.SizeType, written) }; + } + + this.res.onWritable(*@This(), onWritable, this); + } } else { written = this.buffer.writeUTF16(this.allocator, @alignCast(2, std.mem.bytesAsSlice(u16, bytes))) catch { return .{ .err = JSC.Node.Syscall.Error.fromCode(.NOMEM, .write) }; diff --git a/src/string_immutable.zig b/src/string_immutable.zig index 9e4cf3b1c..206642ae7 100644 --- a/src/string_immutable.zig +++ b/src/string_immutable.zig @@ -1019,80 +1019,91 @@ pub fn allocateLatin1IntoUTF8WithList(list_: std.ArrayList(u8), offset_into_list var list = list_; while (latin1.len > 0) { try list.ensureUnusedCapacity(latin1.len); - // assert our starting capcaicty is at least latin1 var buf = list.items.ptr[i..list.capacity]; inner: { - while (latin1.len >= ascii_vector_size) { - const vec: AsciiVector = latin1[0..ascii_vector_size].*; - - if (@reduce(.Max, vec) > 127) { - const Int = u64; - const size = @sizeOf(Int); - - // zig or LLVM doesn't do @ctz nicely with SIMD - if (comptime ascii_vector_size >= 8) { - { - const bytes = @bitCast(Int, latin1[0..size].*); - // https://dotat.at/@/2022-06-27-tolower-swar.html - const mask = bytes & 0x8080808080808080; - - if (mask > 0) { - const first_set_byte = @ctz(Int, mask) / 8; - if (comptime Environment.allow_assert) { - assert(latin1[first_set_byte] >= 127); - var j: usize = 0; - while (j < first_set_byte) : (j += 1) { - assert(latin1[j] < 127); + if (latin1.len >= ascii_vector_size) { + const start_ptr = @ptrToInt(buf.ptr); + const start_ptr_latin1 = @ptrToInt(latin1.ptr); + const end_ptr = @ptrToInt(latin1.ptr + latin1.len - (latin1.len % ascii_vector_size)); + + while (@ptrToInt(latin1.ptr) < end_ptr) { + const vec: AsciiVector = latin1[0..ascii_vector_size].*; + + if (@reduce(.Max, vec) > 127) { + const Int = u64; + const size = @sizeOf(Int); + buf.len -= @ptrToInt(buf.ptr) - start_ptr; + latin1.len -= @ptrToInt(latin1.ptr) - start_ptr_latin1; + + // zig or LLVM doesn't do @ctz nicely with SIMD + if (comptime ascii_vector_size >= 8) { + { + const bytes = @bitCast(Int, latin1[0..size].*); + // https://dotat.at/@/2022-06-27-tolower-swar.html + const mask = bytes & 0x8080808080808080; + + if (mask > 0) { + const first_set_byte = @ctz(Int, mask) / 8; + if (comptime Environment.allow_assert) { + assert(latin1[first_set_byte] >= 127); + var j: usize = 0; + while (j < first_set_byte) : (j += 1) { + assert(latin1[j] < 127); + } } + + buf[0..size].* = @bitCast([size]u8, bytes); + buf = buf[first_set_byte..]; + latin1 = latin1[first_set_byte..]; + break :inner; } buf[0..size].* = @bitCast([size]u8, bytes); - buf = buf[first_set_byte..]; - latin1 = latin1[first_set_byte..]; - break :inner; + latin1 = latin1[size..]; + buf = buf[size..]; } - buf[0..size].* = @bitCast([size]u8, bytes); - latin1 = latin1[size..]; - buf = buf[size..]; - } - - if (comptime ascii_vector_size >= 16) { - const bytes = @bitCast(Int, latin1[0..size].*); - // https://dotat.at/@/2022-06-27-tolower-swar.html - const mask = bytes & 0x8080808080808080; - - if (mask > 0) { - const first_set_byte = @ctz(Int, mask) / 8; - if (comptime Environment.allow_assert) { - assert(latin1[first_set_byte] >= 127); - var j: usize = 0; - while (j < first_set_byte) : (j += 1) { - assert(latin1[j] < 127); + if (comptime ascii_vector_size >= 16) { + const bytes = @bitCast(Int, latin1[0..size].*); + // https://dotat.at/@/2022-06-27-tolower-swar.html + const mask = bytes & 0x8080808080808080; + + if (mask > 0) { + const first_set_byte = @ctz(Int, mask) / 8; + if (comptime Environment.allow_assert) { + assert(latin1[first_set_byte] >= 127); + var j: usize = 0; + while (j < first_set_byte) : (j += 1) { + assert(latin1[j] < 127); + } } - } - buf[0..size].* = @bitCast([size]u8, bytes); - buf = buf[first_set_byte..]; - latin1 = latin1[first_set_byte..]; - break :inner; + buf[0..size].* = @bitCast([size]u8, bytes); + buf = buf[first_set_byte..]; + latin1 = latin1[first_set_byte..]; + break :inner; + } } } - unreachable; } - } - buf[0..ascii_vector_size].* = @bitCast([ascii_vector_size]u8, vec)[0..ascii_vector_size].*; - latin1 = latin1[ascii_vector_size..]; - buf = buf[ascii_vector_size..]; + buf[0..ascii_vector_size].* = @bitCast([ascii_vector_size]u8, vec)[0..ascii_vector_size].*; + latin1.ptr += ascii_vector_size; + buf.ptr += ascii_vector_size; + } + buf.len -= @ptrToInt(buf.ptr) - start_ptr; + latin1.len -= @ptrToInt(latin1.ptr) - start_ptr_latin1; } { const Int = u64; const size = @sizeOf(Int); - while (latin1.len >= size) { + const latin1_end_ptr = latin1.ptr + latin1.len - (latin1.len % size); + const start_ptr = @ptrToInt(buf.ptr); + const start_ptr_latin1 = @ptrToInt(latin1.ptr); + while (latin1.ptr != latin1_end_ptr) { const bytes = @bitCast(Int, latin1[0..size].*); // https://dotat.at/@/2022-06-27-tolower-swar.html const mask = bytes & 0x8080808080808080; @@ -1108,67 +1119,51 @@ pub fn allocateLatin1IntoUTF8WithList(list_: std.ArrayList(u8), offset_into_list } buf[0..size].* = @bitCast([size]u8, bytes); - buf = buf[first_set_byte..]; - latin1 = latin1[first_set_byte..]; + buf.ptr += first_set_byte; + latin1.ptr += first_set_byte; + buf.len -= @ptrToInt(buf.ptr) - start_ptr; + latin1.len -= @ptrToInt(latin1.ptr) - start_ptr_latin1; break :inner; } buf[0..size].* = @bitCast([size]u8, bytes); - latin1 = latin1[size..]; - buf = buf[size..]; + latin1.ptr += size; + buf.ptr += size; } + buf.len -= @ptrToInt(buf.ptr) - start_ptr; + latin1.len -= @ptrToInt(latin1.ptr) - start_ptr_latin1; } { - const Int = u32; - const size = @sizeOf(Int); - while (latin1.len >= size) { - const bytes = @bitCast(Int, latin1[0..size].*); - // https://dotat.at/@/2022-06-27-tolower-swar.html - const mask = bytes & 0x80808080; - - if (mask > 0) { - const first_set_byte = @ctz(Int, mask) / 8; - if (comptime Environment.allow_assert) { - assert(latin1[first_set_byte] >= 127); - var j: usize = 0; - while (j < first_set_byte) : (j += 1) { - assert(latin1[j] < 127); - } - } - - buf[0..size].* = @bitCast([size]u8, bytes); - buf = buf[first_set_byte..]; - latin1 = latin1[first_set_byte..]; - break :inner; - } - - buf[0..size].* = @bitCast([size]u8, bytes); - latin1 = latin1[size..]; - buf = buf[size..]; + assert(latin1.len < 8); + const end = latin1.ptr + latin1.len; + const start_ptr = @ptrToInt(buf.ptr); + const start_ptr_latin1 = @ptrToInt(latin1.ptr); + + while (latin1.ptr != end and latin1.ptr[0] <= 127) { + buf.ptr[0] = latin1.ptr[0]; + buf.ptr += 1; + latin1.ptr += 1; } - } - while (latin1.len >= 1 and latin1[0] < 127) { - buf[0] = latin1[0]; - latin1 = latin1[1..]; - buf = buf[1..]; + buf.len -= @ptrToInt(buf.ptr) - start_ptr; + latin1.len -= @ptrToInt(latin1.ptr) - start_ptr_latin1; } } - i = @ptrToInt(buf.ptr) - @ptrToInt(list.items.ptr); - list.items.len = i; + while (latin1.len > 0 and latin1[0] > 127) { + i = @ptrToInt(buf.ptr) - @ptrToInt(list.items.ptr); + list.items.len = i; - while (latin1.len > 0 and latin1[0] >= 127) { try list.ensureUnusedCapacity(2 + latin1.len); buf = list.items.ptr[i..list.capacity]; buf[0..2].* = latin1ToCodepointBytesAssumeNotASCII(latin1[0]); latin1 = latin1[1..]; buf = buf[2..]; - - i = @ptrToInt(buf.ptr) - @ptrToInt(list.items.ptr); - list.items.len = i; } + + i = @ptrToInt(buf.ptr) - @ptrToInt(list.items.ptr); + list.items.len = i; } return list; @@ -1296,6 +1291,60 @@ pub fn copyLatin1IntoUTF8StopOnNonASCII(buf_: []u8, comptime Type: type, latin1_ if (@reduce(.Max, vec) > 127) { if (comptime stop) return .{ .written = std.math.maxInt(u32), .read = std.math.maxInt(u32) }; + + // zig or LLVM doesn't do @ctz nicely with SIMD + if (comptime ascii_vector_size >= 8) { + const Int = u64; + const size = @sizeOf(Int); + + { + const bytes = @bitCast(Int, latin1[0..size].*); + // https://dotat.at/@/2022-06-27-tolower-swar.html + const mask = bytes & 0x8080808080808080; + + if (mask > 0) { + const first_set_byte = @ctz(Int, mask) / 8; + if (comptime Environment.allow_assert) { + assert(latin1[first_set_byte] >= 127); + var j: usize = 0; + while (j < first_set_byte) : (j += 1) { + assert(latin1[j] < 127); + } + } + + buf[0..size].* = @bitCast([size]u8, bytes); + buf = buf[first_set_byte..]; + latin1 = latin1[first_set_byte..]; + break :inner; + } + + buf[0..size].* = @bitCast([size]u8, bytes); + latin1 = latin1[size..]; + buf = buf[size..]; + } + + if (comptime ascii_vector_size >= 16) { + const bytes = @bitCast(Int, latin1[0..size].*); + // https://dotat.at/@/2022-06-27-tolower-swar.html + const mask = bytes & 0x8080808080808080; + + if (mask > 0) { + const first_set_byte = @ctz(Int, mask) / 8; + if (comptime Environment.allow_assert) { + assert(latin1[first_set_byte] >= 127); + var j: usize = 0; + while (j < first_set_byte) : (j += 1) { + assert(latin1[j] < 127); + } + } + + buf[0..size].* = @bitCast([size]u8, bytes); + buf = buf[first_set_byte..]; + latin1 = latin1[first_set_byte..]; + break :inner; + } + } + } break; } @@ -1338,40 +1387,19 @@ pub fn copyLatin1IntoUTF8StopOnNonASCII(buf_: []u8, comptime Type: type, latin1_ } { - const Int = u32; - const size = @sizeOf(Int); - while (@minimum(buf.len, latin1.len) >= size) { - const bytes = @bitCast(Int, latin1[0..size].*); - const mask = bytes & 0x80808080; - - if (mask > 0) { - const first_set_byte = @ctz(Int, mask) / 8; - if (comptime stop) return .{ .written = std.math.maxInt(u32), .read = std.math.maxInt(u32) }; - - if (comptime Environment.allow_assert) { - assert(latin1[first_set_byte] >= 127); - var j: usize = 0; - while (j < first_set_byte) : (j += 1) { - assert(latin1[j] < 127); - } - } - - buf[0..size].* = @bitCast([size]u8, bytes); - buf = buf[first_set_byte..]; - latin1 = latin1[first_set_byte..]; - break :inner; - } - - buf[0..size].* = @bitCast([size]u8, bytes); - latin1 = latin1[size..]; - buf = buf[size..]; + const end = latin1.ptr + latin1.len; + assert(@ptrToInt(latin1.ptr + 8) > @ptrToInt(end)); + const start_ptr = @ptrToInt(buf.ptr); + const start_ptr_latin1 = @ptrToInt(latin1.ptr); + + while (latin1.ptr != end and latin1.ptr[0] <= 127) { + buf.ptr[0] = latin1.ptr[0]; + buf.ptr += 1; + latin1.ptr += 1; } - } - while (@minimum(buf.len, latin1.len) >= 1 and latin1[0] < 127) { - buf[0] = latin1[0]; - latin1 = latin1[1..]; - buf = buf[1..]; + buf.len -= @ptrToInt(buf.ptr) - start_ptr; + latin1.len -= @ptrToInt(latin1.ptr) - start_ptr_latin1; } } @@ -2479,106 +2507,111 @@ pub fn firstNonASCIIWithType(comptime Type: type, slice: Type) ?u32 { var remaining = slice; if (comptime Environment.isAarch64 or Environment.isX64) { - while (remaining.len >= ascii_vector_size) { - const vec: AsciiVector = remaining[0..ascii_vector_size].*; + if (remaining.len >= ascii_vector_size) { + const remaining_start = remaining.ptr; + const remaining_end = remaining.ptr + remaining.len - (remaining.len % ascii_vector_size); - if (@reduce(.Max, vec) > 127) { - const Int = u64; - const size = @sizeOf(Int); - { - const bytes = @bitCast(Int, remaining[0..size].*); - // https://dotat.at/@/2022-06-27-tolower-swar.html - const mask = bytes & 0x8080808080808080; + while (remaining.ptr != remaining_end) { + const vec: AsciiVector = remaining[0..ascii_vector_size].*; - if (mask > 0) { - const first_set_byte = @ctz(Int, mask) / 8; - if (comptime Environment.allow_assert) { - assert(remaining[first_set_byte] >= 127); - var j: usize = 0; - while (j < first_set_byte) : (j += 1) { - assert(remaining[j] < 127); + if (@reduce(.Max, vec) > 127) { + const Int = u64; + const size = @sizeOf(Int); + remaining.len -= @ptrToInt(remaining.ptr) - @ptrToInt(remaining_start); + + { + const bytes = @bitCast(Int, remaining[0..size].*); + // https://dotat.at/@/2022-06-27-tolower-swar.html + const mask = bytes & 0x8080808080808080; + + if (mask > 0) { + const first_set_byte = @ctz(Int, mask) / 8; + if (comptime Environment.allow_assert) { + assert(remaining[first_set_byte] > 127); + var j: usize = 0; + while (j < first_set_byte) : (j += 1) { + assert(remaining[j] <= 127); + } } - } - return @as(u32, first_set_byte) + @intCast(u32, slice.len - remaining.len); + return @as(u32, first_set_byte) + @intCast(u32, slice.len - remaining.len); + } + remaining = remaining[size..]; } - } - { - const bytes = @bitCast(Int, remaining[size..][0..size].*); - const mask = bytes & 0x8080808080808080; - - if (mask > 0) { - const first_set_byte = @ctz(Int, mask) / 8; - if (comptime Environment.allow_assert) { - assert(remaining[first_set_byte] >= 127); - var j: usize = 0; - while (j < first_set_byte) : (j += 1) { - assert(remaining[j] < 127); + { + const bytes = @bitCast(Int, remaining[0..size].*); + const mask = bytes & 0x8080808080808080; + + if (mask > 0) { + const first_set_byte = @ctz(Int, mask) / 8; + if (comptime Environment.allow_assert) { + assert(remaining[first_set_byte] > 127); + var j: usize = 0; + while (j < first_set_byte) : (j += 1) { + assert(remaining[j] <= 127); + } } - } - return 8 + @as(u32, first_set_byte) + @intCast(u32, slice.len - remaining.len); + return @as(u32, first_set_byte) + @intCast(u32, slice.len - remaining.len); + } } + unreachable; } - break; - } - remaining = remaining[ascii_vector_size..]; + // the more intuitive way, using slices, produces worse codegen + // specifically: it subtracts the length at the end of the loop + // we don't need to do that + // we only need to subtract the length once at the very end + remaining.ptr += ascii_vector_size; + } + remaining.len -= @ptrToInt(remaining.ptr) - @ptrToInt(remaining_start); } } { const Int = u64; const size = @sizeOf(Int); - while (remaining.len >= size) { - const bytes = @bitCast(Int, remaining[0..size].*); - // https://dotat.at/@/2022-06-27-tolower-swar.html - const mask = bytes & 0x8080808080808080; - - if (mask > 0) { - const first_set_byte = @ctz(Int, mask) / 8; - if (comptime Environment.allow_assert) { - assert(remaining[first_set_byte] >= 127); - var j: usize = 0; - while (j < first_set_byte) : (j += 1) { - assert(remaining[j] < 127); - } - } - - return @as(u32, first_set_byte) + @intCast(u32, slice.len - remaining.len); - } + const remaining_start = remaining.ptr; + const remaining_end = remaining.ptr + remaining.len - (remaining.len % size); - remaining = remaining[size..]; + if (comptime Environment.isAarch64 or Environment.isX64) { + // these assertions exist more so for LLVM + assert(remaining.len < ascii_vector_size); + assert(@ptrToInt(remaining.ptr + ascii_vector_size) > @ptrToInt(remaining_end)); } - } - { - const Int = u32; - const size = @sizeOf(Int); - while (remaining.len >= size) { - const bytes = @bitCast(Int, remaining[0..size].*); - const mask = bytes & 0x80808080; - - if (mask > 0) { - const first_set_byte = @ctz(Int, mask) / 8; - if (comptime Environment.allow_assert) { - assert(remaining[first_set_byte] >= 127); - var j: usize = 0; - while (j < first_set_byte) : (j += 1) { - assert(remaining[j] < 127); + if (remaining.len >= size) { + while (remaining.ptr != remaining_end) { + const bytes = @bitCast(Int, remaining[0..size].*); + // https://dotat.at/@/2022-06-27-tolower-swar.html + const mask = bytes & 0x8080808080808080; + + if (mask > 0) { + remaining.len -= @ptrToInt(remaining.ptr) - @ptrToInt(remaining_start); + const first_set_byte = @ctz(Int, mask) / 8; + if (comptime Environment.allow_assert) { + assert(remaining[first_set_byte] > 127); + var j: usize = 0; + while (j < first_set_byte) : (j += 1) { + assert(remaining[j] <= 127); + } } + + return @as(u32, first_set_byte) + @intCast(u32, slice.len - remaining.len); } - return @as(u32, first_set_byte) + @intCast(u32, slice.len - remaining.len); + remaining.ptr += size; } - - remaining = remaining[size..]; + remaining.len -= @ptrToInt(remaining.ptr) - @ptrToInt(remaining_start); } } - for (remaining) |char, i| { - if (char > 127) { - return @truncate(u32, i + (slice.len - remaining.len)); + assert(remaining.len < 8); + + for (remaining) |*char| { + if (char.* > 127) { + // try to prevent it from reading the length of the slice + return @truncate(u32, @ptrToInt(char) - @ptrToInt(slice.ptr)); } } @@ -2932,52 +2965,61 @@ pub fn firstNonASCII16CheckMin(comptime Slice: type, slice: Slice, comptime chec var remaining = slice; if (comptime Environment.isAarch64 or Environment.isX64) { - while (remaining.len >= ascii_u16_vector_size) { - const vec: AsciiU16Vector = remaining[0..ascii_u16_vector_size].*; - const max_value = @reduce(.Max, vec); - - if (comptime check_min) { - // by using @reduce here, we make it only do one comparison - // @reduce doesn't tell us the index though - const min_value = @reduce(.Min, vec); - if (min_value < 0x20 or max_value > 127) { - // this is really slow - // it does it element-wise for every single u8 on the vector - // instead of doing the SIMD instructions - // it removes a loop, but probably is slower in the end - const cmp = @bitCast(AsciiVectorU16U1, vec > max_u16_ascii) | - @bitCast(AsciiVectorU16U1, vec < min_u16_ascii); - const bitmask = @ptrCast(*const u16, &cmp).*; - const first = @ctz(u16, bitmask); - - return @intCast(u32, @as(u32, first) + - @intCast(u32, slice.len - remaining.len)); - } - } else if (comptime !check_min) { - if (max_value > 127) { - const cmp = vec > max_u16_ascii; - const bitmask = @ptrCast(*const u16, &cmp).*; - const first = @ctz(u16, bitmask); - - return @intCast(u32, @as(u32, first) + - @intCast(u32, slice.len - remaining.len)); + const end_ptr = remaining.ptr + remaining.len - (remaining.len % ascii_u16_vector_size); + if (remaining.len > ascii_u16_vector_size) { + const remaining_start = remaining.ptr; + while (remaining.ptr != end_ptr) { + const vec: AsciiU16Vector = remaining[0..ascii_u16_vector_size].*; + const max_value = @reduce(.Max, vec); + + if (comptime check_min) { + // by using @reduce here, we make it only do one comparison + // @reduce doesn't tell us the index though + const min_value = @reduce(.Min, vec); + if (min_value < 0x20 or max_value > 127) { + remaining.len -= (@ptrToInt(remaining.ptr) - @ptrToInt(remaining_start)) / 2; + + // this is really slow + // it does it element-wise for every single u8 on the vector + // instead of doing the SIMD instructions + // it removes a loop, but probably is slower in the end + const cmp = @bitCast(AsciiVectorU16U1, vec > max_u16_ascii) | + @bitCast(AsciiVectorU16U1, vec < min_u16_ascii); + const bitmask: u16 = @ptrCast(*const u16, &cmp).*; + const first = @ctz(u16, bitmask); + + return @intCast(u32, @as(u32, first) + + @intCast(u32, slice.len - remaining.len)); + } + } else if (comptime !check_min) { + if (max_value > 127) { + remaining.len -= (@ptrToInt(remaining.ptr) - @ptrToInt(remaining_start)) / 2; + + const cmp = vec > max_u16_ascii; + const bitmask = @ptrCast(*const u16, &cmp).*; + const first = @ctz(u16, bitmask); + + return @intCast(u32, @as(u32, first) + + @intCast(u32, slice.len - remaining.len)); + } } - } - remaining = remaining[ascii_u16_vector_size..]; + remaining.ptr += ascii_u16_vector_size; + } + remaining.len -= (@ptrToInt(remaining.ptr) - @ptrToInt(remaining_start)) / 2; } } if (comptime check_min) { - for (remaining) |char, i| { + for (remaining) |char| { if (char > 127 or char < 0x20) { - return @truncate(u32, i + (slice.len - remaining.len)); + return @truncate(u32, (@ptrToInt(std.mem.sliceAsBytes(remaining).ptr) - @ptrToInt(std.mem.sliceAsBytes(slice).ptr)) / 2); } } } else { - for (remaining) |char, i| { + for (remaining) |char| { if (char > 127) { - return @truncate(u32, i + (slice.len - remaining.len)); + return @truncate(u32, (@ptrToInt(std.mem.sliceAsBytes(remaining).ptr) - @ptrToInt(std.mem.sliceAsBytes(slice).ptr)) / 2); } } } |