aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/baby_list.zig10
-rw-r--r--src/bun.js/webcore/streams.zig131
-rw-r--r--src/string_immutable.zig512
3 files changed, 377 insertions, 276 deletions
diff --git a/src/baby_list.zig b/src/baby_list.zig
index 2230a348f..f33216fb8 100644
--- a/src/baby_list.zig
+++ b/src/baby_list.zig
@@ -115,13 +115,9 @@ pub fn BabyList(comptime Type: type) type {
if (comptime Type != u8)
@compileError("Unsupported for type " ++ @typeName(Type));
const initial = this.len;
- var list_ = this.listManaged(allocator);
- {
- defer this.update(list_);
- const start = list_.items.len;
- try list_.appendSlice(str);
- strings.replaceLatin1WithUTF8(list_.items[start..list_.items.len]);
- }
+ const old = this.listManaged(allocator);
+ const new = try strings.allocateLatin1IntoUTF8WithList(old, old.items.len, []const u8, str);
+ this.update(new);
return this.len - initial;
}
pub fn writeUTF16(this: *@This(), allocator: std.mem.Allocator, str: []const u16) !u32 {
diff --git a/src/bun.js/webcore/streams.zig b/src/bun.js/webcore/streams.zig
index 654bf4b47..603694f60 100644
--- a/src/bun.js/webcore/streams.zig
+++ b/src/bun.js/webcore/streams.zig
@@ -244,6 +244,8 @@ pub const StreamStart = union(Tag) {
as_uint8array: bool,
stream: bool,
},
+ HTTPSResponseSink: void,
+ HTTPResponseSink: void,
ready: void,
pub const Tag = enum {
@@ -251,6 +253,8 @@ pub const StreamStart = union(Tag) {
err,
chunk_size,
ArrayBufferSink,
+ HTTPSResponseSink,
+ HTTPResponseSink,
ready,
};
@@ -325,6 +329,21 @@ pub const StreamStart = union(Tag) {
};
}
},
+ .HTTPSResponseSink, .HTTPResponseSink => {
+ var empty = true;
+ var chunk_size: JSC.WebCore.Blob.SizeType = 2048;
+
+ if (value.get(globalThis, "highWaterMark")) |chunkSize| {
+ empty = false;
+ chunk_size = @intCast(JSC.WebCore.Blob.SizeType, @maximum(256, @truncate(i51, chunkSize.toInt64())));
+ }
+
+ if (!empty) {
+ return .{
+ .chunk_size = chunk_size,
+ };
+ }
+ },
else => @compileError("Unuspported tag"),
}
@@ -1372,6 +1391,7 @@ pub fn HTTPServerWritable(comptime ssl: bool) type {
signal: Signal = .{},
pending_drain: ?*JSC.JSPromise = null,
globalThis: *JSGlobalObject = undefined,
+ highWaterMark: Blob.SizeType = 2048,
requested_end: bool = false,
@@ -1475,17 +1495,34 @@ pub fn HTTPServerWritable(comptime ssl: bool) type {
return false;
}
- pub fn start(this: *@This(), _: StreamStart) JSC.Node.Maybe(void) {
- log("start()", .{});
-
+ pub fn start(this: *@This(), stream_start: StreamStart) JSC.Node.Maybe(void) {
if (this.res.hasResponded()) {
this.done = true;
this.signal.close(null);
return .{ .result = {} };
}
+ this.buffer.len = 0;
+
+ switch (stream_start) {
+ .chunk_size => |chunk_size| {
+ if (chunk_size > 0) {
+ this.highWaterMark = chunk_size;
+ }
+ },
+ else => {},
+ }
+
+ var list = this.buffer.listManaged(this.allocator);
+ list.clearRetainingCapacity();
+ list.ensureTotalCapacityPrecise(this.highWaterMark) catch return .{ .err = JSC.Node.Syscall.Error.oom };
+ this.buffer.update(list);
+
this.done = false;
this.signal.start();
+
+ log("start({d})", .{this.highWaterMark});
+
return .{ .result = {} };
}
@@ -1528,34 +1565,40 @@ pub fn HTTPServerWritable(comptime ssl: bool) type {
}
const bytes = data.slice();
-
+ const len = @truncate(Blob.SizeType, bytes.len);
log("write({d})", .{bytes.len});
if (!this.hasBackpressure()) {
- if (this.buffer.len == 0) {
+ if (this.buffer.len == 0 and len >= this.highWaterMark) {
// fast path:
// - large-ish chunk
// - no backpressure
if (this.send(bytes)) {
- this.handleWrote(bytes.len);
- return .{ .owned = @truncate(Blob.SizeType, bytes.len) };
+ this.handleWrote(len);
+ return .{ .owned = len };
}
_ = this.buffer.write(this.allocator, bytes) catch {
return .{ .err = JSC.Node.Syscall.Error.fromCode(.NOMEM, .write) };
};
- } else {
- // kinda fast path:
- // - combined chunk is large enough to flush automatically
- // - no backpressure
+ } else if (this.buffer.len + len >= this.highWaterMark) {
+ // TODO: attempt to write both in a corked buffer?
_ = this.buffer.write(this.allocator, bytes) catch {
return .{ .err = JSC.Node.Syscall.Error.fromCode(.NOMEM, .write) };
};
- const readable = this.readableSlice();
- if (this.send(readable)) {
- this.handleWrote(readable.len);
- return .{ .owned = @truncate(Blob.SizeType, readable.len) };
+ const slice = this.readableSlice();
+ if (this.send(slice)) {
+ this.handleWrote(slice.len);
+ this.buffer.len = 0;
+ return .{ .owned = len };
}
+ } else {
+ // queue the data
+ // do not send it
+ _ = this.buffer.write(this.allocator, bytes) catch {
+ return .{ .err = JSC.Node.Syscall.Error.fromCode(.NOMEM, .write) };
+ };
+ return .{ .owned = len };
}
this.res.onWritable(*@This(), onWritable, this);
@@ -1565,7 +1608,7 @@ pub fn HTTPServerWritable(comptime ssl: bool) type {
};
}
- return .{ .owned = @truncate(Blob.SizeType, bytes.len) };
+ return .{ .owned = len };
}
pub const writeBytes = write;
pub fn writeLatin1(this: *@This(), data: StreamResult) StreamResult.Writable {
@@ -1580,34 +1623,51 @@ pub fn HTTPServerWritable(comptime ssl: bool) type {
}
const bytes = data.slice();
+ const len = @truncate(Blob.SizeType, bytes.len);
log("writeLatin1({d})", .{bytes.len});
if (!this.hasBackpressure()) {
- if (this.buffer.len == 0 and strings.isAllASCII(bytes)) {
- // fast path:
- // - large-ish chunk
- // - no backpressure
- if (this.send(bytes)) {
- this.handleWrote(bytes.len);
- return .{ .owned = @truncate(Blob.SizeType, bytes.len) };
+ if (this.buffer.len == 0 and len >= this.highWaterMark) {
+ var do_send = true;
+ // common case
+ if (strings.isAllASCII(bytes)) {
+ // fast path:
+ // - large-ish chunk
+ // - no backpressure
+ if (this.send(bytes)) {
+ this.handleWrote(bytes.len);
+ return .{ .owned = len };
+ }
+ do_send = false;
}
- // we already checked it's all ascii
- _ = this.buffer.write(this.allocator, bytes) catch {
+ _ = this.buffer.writeLatin1(this.allocator, bytes) catch {
return .{ .err = JSC.Node.Syscall.Error.fromCode(.NOMEM, .write) };
};
- } else if (this.buffer.len == 0) {
+
+ if (do_send) {
+ if (this.send(this.readableSlice())) {
+ this.handleWrote(bytes.len);
+ return .{ .owned = len };
+ }
+ }
+ } else if (this.buffer.len + len >= this.highWaterMark) {
// kinda fast path:
// - combined chunk is large enough to flush automatically
// - no backpressure
- const reported = this.buffer.writeLatin1(this.allocator, bytes) catch {
+ _ = this.buffer.writeLatin1(this.allocator, bytes) catch {
return .{ .err = JSC.Node.Syscall.Error.fromCode(.NOMEM, .write) };
};
const readable = this.readableSlice();
if (this.send(readable)) {
this.handleWrote(readable.len);
- return .{ .owned = @as(Blob.SizeType, reported) };
+ return .{ .owned = len };
}
+ } else {
+ _ = this.buffer.writeLatin1(this.allocator, bytes) catch {
+ return .{ .err = JSC.Node.Syscall.Error.fromCode(.NOMEM, .write) };
+ };
+ return .{ .owned = len };
}
this.res.onWritable(*@This(), onWritable, this);
@@ -1617,7 +1677,7 @@ pub fn HTTPServerWritable(comptime ssl: bool) type {
};
}
- return .{ .owned = @truncate(Blob.SizeType, bytes.len) };
+ return .{ .owned = len };
}
pub fn writeUTF16(this: *@This(), data: StreamResult) StreamResult.Writable {
if (this.done or this.requested_end) {
@@ -1643,12 +1703,15 @@ pub fn HTTPServerWritable(comptime ssl: bool) type {
};
const readable = this.readableSlice();
- if (this.send(readable)) {
- this.handleWrote(readable.len);
- return .{ .owned = @truncate(Blob.SizeType, written) };
- }
- this.res.onWritable(*@This(), onWritable, this);
+ if (readable.len >= this.highWaterMark) {
+ if (this.send(readable)) {
+ this.handleWrote(readable.len);
+ return .{ .owned = @truncate(Blob.SizeType, written) };
+ }
+
+ this.res.onWritable(*@This(), onWritable, this);
+ }
} else {
written = this.buffer.writeUTF16(this.allocator, @alignCast(2, std.mem.bytesAsSlice(u16, bytes))) catch {
return .{ .err = JSC.Node.Syscall.Error.fromCode(.NOMEM, .write) };
diff --git a/src/string_immutable.zig b/src/string_immutable.zig
index 9e4cf3b1c..206642ae7 100644
--- a/src/string_immutable.zig
+++ b/src/string_immutable.zig
@@ -1019,80 +1019,91 @@ pub fn allocateLatin1IntoUTF8WithList(list_: std.ArrayList(u8), offset_into_list
var list = list_;
while (latin1.len > 0) {
try list.ensureUnusedCapacity(latin1.len);
- // assert our starting capcaicty is at least latin1
var buf = list.items.ptr[i..list.capacity];
inner: {
- while (latin1.len >= ascii_vector_size) {
- const vec: AsciiVector = latin1[0..ascii_vector_size].*;
-
- if (@reduce(.Max, vec) > 127) {
- const Int = u64;
- const size = @sizeOf(Int);
-
- // zig or LLVM doesn't do @ctz nicely with SIMD
- if (comptime ascii_vector_size >= 8) {
- {
- const bytes = @bitCast(Int, latin1[0..size].*);
- // https://dotat.at/@/2022-06-27-tolower-swar.html
- const mask = bytes & 0x8080808080808080;
-
- if (mask > 0) {
- const first_set_byte = @ctz(Int, mask) / 8;
- if (comptime Environment.allow_assert) {
- assert(latin1[first_set_byte] >= 127);
- var j: usize = 0;
- while (j < first_set_byte) : (j += 1) {
- assert(latin1[j] < 127);
+ if (latin1.len >= ascii_vector_size) {
+ const start_ptr = @ptrToInt(buf.ptr);
+ const start_ptr_latin1 = @ptrToInt(latin1.ptr);
+ const end_ptr = @ptrToInt(latin1.ptr + latin1.len - (latin1.len % ascii_vector_size));
+
+ while (@ptrToInt(latin1.ptr) < end_ptr) {
+ const vec: AsciiVector = latin1[0..ascii_vector_size].*;
+
+ if (@reduce(.Max, vec) > 127) {
+ const Int = u64;
+ const size = @sizeOf(Int);
+ buf.len -= @ptrToInt(buf.ptr) - start_ptr;
+ latin1.len -= @ptrToInt(latin1.ptr) - start_ptr_latin1;
+
+ // zig or LLVM doesn't do @ctz nicely with SIMD
+ if (comptime ascii_vector_size >= 8) {
+ {
+ const bytes = @bitCast(Int, latin1[0..size].*);
+ // https://dotat.at/@/2022-06-27-tolower-swar.html
+ const mask = bytes & 0x8080808080808080;
+
+ if (mask > 0) {
+ const first_set_byte = @ctz(Int, mask) / 8;
+ if (comptime Environment.allow_assert) {
+ assert(latin1[first_set_byte] >= 127);
+ var j: usize = 0;
+ while (j < first_set_byte) : (j += 1) {
+ assert(latin1[j] < 127);
+ }
}
+
+ buf[0..size].* = @bitCast([size]u8, bytes);
+ buf = buf[first_set_byte..];
+ latin1 = latin1[first_set_byte..];
+ break :inner;
}
buf[0..size].* = @bitCast([size]u8, bytes);
- buf = buf[first_set_byte..];
- latin1 = latin1[first_set_byte..];
- break :inner;
+ latin1 = latin1[size..];
+ buf = buf[size..];
}
- buf[0..size].* = @bitCast([size]u8, bytes);
- latin1 = latin1[size..];
- buf = buf[size..];
- }
-
- if (comptime ascii_vector_size >= 16) {
- const bytes = @bitCast(Int, latin1[0..size].*);
- // https://dotat.at/@/2022-06-27-tolower-swar.html
- const mask = bytes & 0x8080808080808080;
-
- if (mask > 0) {
- const first_set_byte = @ctz(Int, mask) / 8;
- if (comptime Environment.allow_assert) {
- assert(latin1[first_set_byte] >= 127);
- var j: usize = 0;
- while (j < first_set_byte) : (j += 1) {
- assert(latin1[j] < 127);
+ if (comptime ascii_vector_size >= 16) {
+ const bytes = @bitCast(Int, latin1[0..size].*);
+ // https://dotat.at/@/2022-06-27-tolower-swar.html
+ const mask = bytes & 0x8080808080808080;
+
+ if (mask > 0) {
+ const first_set_byte = @ctz(Int, mask) / 8;
+ if (comptime Environment.allow_assert) {
+ assert(latin1[first_set_byte] >= 127);
+ var j: usize = 0;
+ while (j < first_set_byte) : (j += 1) {
+ assert(latin1[j] < 127);
+ }
}
- }
- buf[0..size].* = @bitCast([size]u8, bytes);
- buf = buf[first_set_byte..];
- latin1 = latin1[first_set_byte..];
- break :inner;
+ buf[0..size].* = @bitCast([size]u8, bytes);
+ buf = buf[first_set_byte..];
+ latin1 = latin1[first_set_byte..];
+ break :inner;
+ }
}
}
-
unreachable;
}
- }
- buf[0..ascii_vector_size].* = @bitCast([ascii_vector_size]u8, vec)[0..ascii_vector_size].*;
- latin1 = latin1[ascii_vector_size..];
- buf = buf[ascii_vector_size..];
+ buf[0..ascii_vector_size].* = @bitCast([ascii_vector_size]u8, vec)[0..ascii_vector_size].*;
+ latin1.ptr += ascii_vector_size;
+ buf.ptr += ascii_vector_size;
+ }
+ buf.len -= @ptrToInt(buf.ptr) - start_ptr;
+ latin1.len -= @ptrToInt(latin1.ptr) - start_ptr_latin1;
}
{
const Int = u64;
const size = @sizeOf(Int);
- while (latin1.len >= size) {
+ const latin1_end_ptr = latin1.ptr + latin1.len - (latin1.len % size);
+ const start_ptr = @ptrToInt(buf.ptr);
+ const start_ptr_latin1 = @ptrToInt(latin1.ptr);
+ while (latin1.ptr != latin1_end_ptr) {
const bytes = @bitCast(Int, latin1[0..size].*);
// https://dotat.at/@/2022-06-27-tolower-swar.html
const mask = bytes & 0x8080808080808080;
@@ -1108,67 +1119,51 @@ pub fn allocateLatin1IntoUTF8WithList(list_: std.ArrayList(u8), offset_into_list
}
buf[0..size].* = @bitCast([size]u8, bytes);
- buf = buf[first_set_byte..];
- latin1 = latin1[first_set_byte..];
+ buf.ptr += first_set_byte;
+ latin1.ptr += first_set_byte;
+ buf.len -= @ptrToInt(buf.ptr) - start_ptr;
+ latin1.len -= @ptrToInt(latin1.ptr) - start_ptr_latin1;
break :inner;
}
buf[0..size].* = @bitCast([size]u8, bytes);
- latin1 = latin1[size..];
- buf = buf[size..];
+ latin1.ptr += size;
+ buf.ptr += size;
}
+ buf.len -= @ptrToInt(buf.ptr) - start_ptr;
+ latin1.len -= @ptrToInt(latin1.ptr) - start_ptr_latin1;
}
{
- const Int = u32;
- const size = @sizeOf(Int);
- while (latin1.len >= size) {
- const bytes = @bitCast(Int, latin1[0..size].*);
- // https://dotat.at/@/2022-06-27-tolower-swar.html
- const mask = bytes & 0x80808080;
-
- if (mask > 0) {
- const first_set_byte = @ctz(Int, mask) / 8;
- if (comptime Environment.allow_assert) {
- assert(latin1[first_set_byte] >= 127);
- var j: usize = 0;
- while (j < first_set_byte) : (j += 1) {
- assert(latin1[j] < 127);
- }
- }
-
- buf[0..size].* = @bitCast([size]u8, bytes);
- buf = buf[first_set_byte..];
- latin1 = latin1[first_set_byte..];
- break :inner;
- }
-
- buf[0..size].* = @bitCast([size]u8, bytes);
- latin1 = latin1[size..];
- buf = buf[size..];
+ assert(latin1.len < 8);
+ const end = latin1.ptr + latin1.len;
+ const start_ptr = @ptrToInt(buf.ptr);
+ const start_ptr_latin1 = @ptrToInt(latin1.ptr);
+
+ while (latin1.ptr != end and latin1.ptr[0] <= 127) {
+ buf.ptr[0] = latin1.ptr[0];
+ buf.ptr += 1;
+ latin1.ptr += 1;
}
- }
- while (latin1.len >= 1 and latin1[0] < 127) {
- buf[0] = latin1[0];
- latin1 = latin1[1..];
- buf = buf[1..];
+ buf.len -= @ptrToInt(buf.ptr) - start_ptr;
+ latin1.len -= @ptrToInt(latin1.ptr) - start_ptr_latin1;
}
}
- i = @ptrToInt(buf.ptr) - @ptrToInt(list.items.ptr);
- list.items.len = i;
+ while (latin1.len > 0 and latin1[0] > 127) {
+ i = @ptrToInt(buf.ptr) - @ptrToInt(list.items.ptr);
+ list.items.len = i;
- while (latin1.len > 0 and latin1[0] >= 127) {
try list.ensureUnusedCapacity(2 + latin1.len);
buf = list.items.ptr[i..list.capacity];
buf[0..2].* = latin1ToCodepointBytesAssumeNotASCII(latin1[0]);
latin1 = latin1[1..];
buf = buf[2..];
-
- i = @ptrToInt(buf.ptr) - @ptrToInt(list.items.ptr);
- list.items.len = i;
}
+
+ i = @ptrToInt(buf.ptr) - @ptrToInt(list.items.ptr);
+ list.items.len = i;
}
return list;
@@ -1296,6 +1291,60 @@ pub fn copyLatin1IntoUTF8StopOnNonASCII(buf_: []u8, comptime Type: type, latin1_
if (@reduce(.Max, vec) > 127) {
if (comptime stop) return .{ .written = std.math.maxInt(u32), .read = std.math.maxInt(u32) };
+
+ // zig or LLVM doesn't do @ctz nicely with SIMD
+ if (comptime ascii_vector_size >= 8) {
+ const Int = u64;
+ const size = @sizeOf(Int);
+
+ {
+ const bytes = @bitCast(Int, latin1[0..size].*);
+ // https://dotat.at/@/2022-06-27-tolower-swar.html
+ const mask = bytes & 0x8080808080808080;
+
+ if (mask > 0) {
+ const first_set_byte = @ctz(Int, mask) / 8;
+ if (comptime Environment.allow_assert) {
+ assert(latin1[first_set_byte] >= 127);
+ var j: usize = 0;
+ while (j < first_set_byte) : (j += 1) {
+ assert(latin1[j] < 127);
+ }
+ }
+
+ buf[0..size].* = @bitCast([size]u8, bytes);
+ buf = buf[first_set_byte..];
+ latin1 = latin1[first_set_byte..];
+ break :inner;
+ }
+
+ buf[0..size].* = @bitCast([size]u8, bytes);
+ latin1 = latin1[size..];
+ buf = buf[size..];
+ }
+
+ if (comptime ascii_vector_size >= 16) {
+ const bytes = @bitCast(Int, latin1[0..size].*);
+ // https://dotat.at/@/2022-06-27-tolower-swar.html
+ const mask = bytes & 0x8080808080808080;
+
+ if (mask > 0) {
+ const first_set_byte = @ctz(Int, mask) / 8;
+ if (comptime Environment.allow_assert) {
+ assert(latin1[first_set_byte] >= 127);
+ var j: usize = 0;
+ while (j < first_set_byte) : (j += 1) {
+ assert(latin1[j] < 127);
+ }
+ }
+
+ buf[0..size].* = @bitCast([size]u8, bytes);
+ buf = buf[first_set_byte..];
+ latin1 = latin1[first_set_byte..];
+ break :inner;
+ }
+ }
+ }
break;
}
@@ -1338,40 +1387,19 @@ pub fn copyLatin1IntoUTF8StopOnNonASCII(buf_: []u8, comptime Type: type, latin1_
}
{
- const Int = u32;
- const size = @sizeOf(Int);
- while (@minimum(buf.len, latin1.len) >= size) {
- const bytes = @bitCast(Int, latin1[0..size].*);
- const mask = bytes & 0x80808080;
-
- if (mask > 0) {
- const first_set_byte = @ctz(Int, mask) / 8;
- if (comptime stop) return .{ .written = std.math.maxInt(u32), .read = std.math.maxInt(u32) };
-
- if (comptime Environment.allow_assert) {
- assert(latin1[first_set_byte] >= 127);
- var j: usize = 0;
- while (j < first_set_byte) : (j += 1) {
- assert(latin1[j] < 127);
- }
- }
-
- buf[0..size].* = @bitCast([size]u8, bytes);
- buf = buf[first_set_byte..];
- latin1 = latin1[first_set_byte..];
- break :inner;
- }
-
- buf[0..size].* = @bitCast([size]u8, bytes);
- latin1 = latin1[size..];
- buf = buf[size..];
+ const end = latin1.ptr + latin1.len;
+ assert(@ptrToInt(latin1.ptr + 8) > @ptrToInt(end));
+ const start_ptr = @ptrToInt(buf.ptr);
+ const start_ptr_latin1 = @ptrToInt(latin1.ptr);
+
+ while (latin1.ptr != end and latin1.ptr[0] <= 127) {
+ buf.ptr[0] = latin1.ptr[0];
+ buf.ptr += 1;
+ latin1.ptr += 1;
}
- }
- while (@minimum(buf.len, latin1.len) >= 1 and latin1[0] < 127) {
- buf[0] = latin1[0];
- latin1 = latin1[1..];
- buf = buf[1..];
+ buf.len -= @ptrToInt(buf.ptr) - start_ptr;
+ latin1.len -= @ptrToInt(latin1.ptr) - start_ptr_latin1;
}
}
@@ -2479,106 +2507,111 @@ pub fn firstNonASCIIWithType(comptime Type: type, slice: Type) ?u32 {
var remaining = slice;
if (comptime Environment.isAarch64 or Environment.isX64) {
- while (remaining.len >= ascii_vector_size) {
- const vec: AsciiVector = remaining[0..ascii_vector_size].*;
+ if (remaining.len >= ascii_vector_size) {
+ const remaining_start = remaining.ptr;
+ const remaining_end = remaining.ptr + remaining.len - (remaining.len % ascii_vector_size);
- if (@reduce(.Max, vec) > 127) {
- const Int = u64;
- const size = @sizeOf(Int);
- {
- const bytes = @bitCast(Int, remaining[0..size].*);
- // https://dotat.at/@/2022-06-27-tolower-swar.html
- const mask = bytes & 0x8080808080808080;
+ while (remaining.ptr != remaining_end) {
+ const vec: AsciiVector = remaining[0..ascii_vector_size].*;
- if (mask > 0) {
- const first_set_byte = @ctz(Int, mask) / 8;
- if (comptime Environment.allow_assert) {
- assert(remaining[first_set_byte] >= 127);
- var j: usize = 0;
- while (j < first_set_byte) : (j += 1) {
- assert(remaining[j] < 127);
+ if (@reduce(.Max, vec) > 127) {
+ const Int = u64;
+ const size = @sizeOf(Int);
+ remaining.len -= @ptrToInt(remaining.ptr) - @ptrToInt(remaining_start);
+
+ {
+ const bytes = @bitCast(Int, remaining[0..size].*);
+ // https://dotat.at/@/2022-06-27-tolower-swar.html
+ const mask = bytes & 0x8080808080808080;
+
+ if (mask > 0) {
+ const first_set_byte = @ctz(Int, mask) / 8;
+ if (comptime Environment.allow_assert) {
+ assert(remaining[first_set_byte] > 127);
+ var j: usize = 0;
+ while (j < first_set_byte) : (j += 1) {
+ assert(remaining[j] <= 127);
+ }
}
- }
- return @as(u32, first_set_byte) + @intCast(u32, slice.len - remaining.len);
+ return @as(u32, first_set_byte) + @intCast(u32, slice.len - remaining.len);
+ }
+ remaining = remaining[size..];
}
- }
- {
- const bytes = @bitCast(Int, remaining[size..][0..size].*);
- const mask = bytes & 0x8080808080808080;
-
- if (mask > 0) {
- const first_set_byte = @ctz(Int, mask) / 8;
- if (comptime Environment.allow_assert) {
- assert(remaining[first_set_byte] >= 127);
- var j: usize = 0;
- while (j < first_set_byte) : (j += 1) {
- assert(remaining[j] < 127);
+ {
+ const bytes = @bitCast(Int, remaining[0..size].*);
+ const mask = bytes & 0x8080808080808080;
+
+ if (mask > 0) {
+ const first_set_byte = @ctz(Int, mask) / 8;
+ if (comptime Environment.allow_assert) {
+ assert(remaining[first_set_byte] > 127);
+ var j: usize = 0;
+ while (j < first_set_byte) : (j += 1) {
+ assert(remaining[j] <= 127);
+ }
}
- }
- return 8 + @as(u32, first_set_byte) + @intCast(u32, slice.len - remaining.len);
+ return @as(u32, first_set_byte) + @intCast(u32, slice.len - remaining.len);
+ }
}
+ unreachable;
}
- break;
- }
- remaining = remaining[ascii_vector_size..];
+ // the more intuitive way, using slices, produces worse codegen
+ // specifically: it subtracts the length at the end of the loop
+ // we don't need to do that
+ // we only need to subtract the length once at the very end
+ remaining.ptr += ascii_vector_size;
+ }
+ remaining.len -= @ptrToInt(remaining.ptr) - @ptrToInt(remaining_start);
}
}
{
const Int = u64;
const size = @sizeOf(Int);
- while (remaining.len >= size) {
- const bytes = @bitCast(Int, remaining[0..size].*);
- // https://dotat.at/@/2022-06-27-tolower-swar.html
- const mask = bytes & 0x8080808080808080;
-
- if (mask > 0) {
- const first_set_byte = @ctz(Int, mask) / 8;
- if (comptime Environment.allow_assert) {
- assert(remaining[first_set_byte] >= 127);
- var j: usize = 0;
- while (j < first_set_byte) : (j += 1) {
- assert(remaining[j] < 127);
- }
- }
-
- return @as(u32, first_set_byte) + @intCast(u32, slice.len - remaining.len);
- }
+ const remaining_start = remaining.ptr;
+ const remaining_end = remaining.ptr + remaining.len - (remaining.len % size);
- remaining = remaining[size..];
+ if (comptime Environment.isAarch64 or Environment.isX64) {
+ // these assertions exist more so for LLVM
+ assert(remaining.len < ascii_vector_size);
+ assert(@ptrToInt(remaining.ptr + ascii_vector_size) > @ptrToInt(remaining_end));
}
- }
- {
- const Int = u32;
- const size = @sizeOf(Int);
- while (remaining.len >= size) {
- const bytes = @bitCast(Int, remaining[0..size].*);
- const mask = bytes & 0x80808080;
-
- if (mask > 0) {
- const first_set_byte = @ctz(Int, mask) / 8;
- if (comptime Environment.allow_assert) {
- assert(remaining[first_set_byte] >= 127);
- var j: usize = 0;
- while (j < first_set_byte) : (j += 1) {
- assert(remaining[j] < 127);
+ if (remaining.len >= size) {
+ while (remaining.ptr != remaining_end) {
+ const bytes = @bitCast(Int, remaining[0..size].*);
+ // https://dotat.at/@/2022-06-27-tolower-swar.html
+ const mask = bytes & 0x8080808080808080;
+
+ if (mask > 0) {
+ remaining.len -= @ptrToInt(remaining.ptr) - @ptrToInt(remaining_start);
+ const first_set_byte = @ctz(Int, mask) / 8;
+ if (comptime Environment.allow_assert) {
+ assert(remaining[first_set_byte] > 127);
+ var j: usize = 0;
+ while (j < first_set_byte) : (j += 1) {
+ assert(remaining[j] <= 127);
+ }
}
+
+ return @as(u32, first_set_byte) + @intCast(u32, slice.len - remaining.len);
}
- return @as(u32, first_set_byte) + @intCast(u32, slice.len - remaining.len);
+ remaining.ptr += size;
}
-
- remaining = remaining[size..];
+ remaining.len -= @ptrToInt(remaining.ptr) - @ptrToInt(remaining_start);
}
}
- for (remaining) |char, i| {
- if (char > 127) {
- return @truncate(u32, i + (slice.len - remaining.len));
+ assert(remaining.len < 8);
+
+ for (remaining) |*char| {
+ if (char.* > 127) {
+ // try to prevent it from reading the length of the slice
+ return @truncate(u32, @ptrToInt(char) - @ptrToInt(slice.ptr));
}
}
@@ -2932,52 +2965,61 @@ pub fn firstNonASCII16CheckMin(comptime Slice: type, slice: Slice, comptime chec
var remaining = slice;
if (comptime Environment.isAarch64 or Environment.isX64) {
- while (remaining.len >= ascii_u16_vector_size) {
- const vec: AsciiU16Vector = remaining[0..ascii_u16_vector_size].*;
- const max_value = @reduce(.Max, vec);
-
- if (comptime check_min) {
- // by using @reduce here, we make it only do one comparison
- // @reduce doesn't tell us the index though
- const min_value = @reduce(.Min, vec);
- if (min_value < 0x20 or max_value > 127) {
- // this is really slow
- // it does it element-wise for every single u8 on the vector
- // instead of doing the SIMD instructions
- // it removes a loop, but probably is slower in the end
- const cmp = @bitCast(AsciiVectorU16U1, vec > max_u16_ascii) |
- @bitCast(AsciiVectorU16U1, vec < min_u16_ascii);
- const bitmask = @ptrCast(*const u16, &cmp).*;
- const first = @ctz(u16, bitmask);
-
- return @intCast(u32, @as(u32, first) +
- @intCast(u32, slice.len - remaining.len));
- }
- } else if (comptime !check_min) {
- if (max_value > 127) {
- const cmp = vec > max_u16_ascii;
- const bitmask = @ptrCast(*const u16, &cmp).*;
- const first = @ctz(u16, bitmask);
-
- return @intCast(u32, @as(u32, first) +
- @intCast(u32, slice.len - remaining.len));
+ const end_ptr = remaining.ptr + remaining.len - (remaining.len % ascii_u16_vector_size);
+ if (remaining.len > ascii_u16_vector_size) {
+ const remaining_start = remaining.ptr;
+ while (remaining.ptr != end_ptr) {
+ const vec: AsciiU16Vector = remaining[0..ascii_u16_vector_size].*;
+ const max_value = @reduce(.Max, vec);
+
+ if (comptime check_min) {
+ // by using @reduce here, we make it only do one comparison
+ // @reduce doesn't tell us the index though
+ const min_value = @reduce(.Min, vec);
+ if (min_value < 0x20 or max_value > 127) {
+ remaining.len -= (@ptrToInt(remaining.ptr) - @ptrToInt(remaining_start)) / 2;
+
+ // this is really slow
+ // it does it element-wise for every single u8 on the vector
+ // instead of doing the SIMD instructions
+ // it removes a loop, but probably is slower in the end
+ const cmp = @bitCast(AsciiVectorU16U1, vec > max_u16_ascii) |
+ @bitCast(AsciiVectorU16U1, vec < min_u16_ascii);
+ const bitmask: u16 = @ptrCast(*const u16, &cmp).*;
+ const first = @ctz(u16, bitmask);
+
+ return @intCast(u32, @as(u32, first) +
+ @intCast(u32, slice.len - remaining.len));
+ }
+ } else if (comptime !check_min) {
+ if (max_value > 127) {
+ remaining.len -= (@ptrToInt(remaining.ptr) - @ptrToInt(remaining_start)) / 2;
+
+ const cmp = vec > max_u16_ascii;
+ const bitmask = @ptrCast(*const u16, &cmp).*;
+ const first = @ctz(u16, bitmask);
+
+ return @intCast(u32, @as(u32, first) +
+ @intCast(u32, slice.len - remaining.len));
+ }
}
- }
- remaining = remaining[ascii_u16_vector_size..];
+ remaining.ptr += ascii_u16_vector_size;
+ }
+ remaining.len -= (@ptrToInt(remaining.ptr) - @ptrToInt(remaining_start)) / 2;
}
}
if (comptime check_min) {
- for (remaining) |char, i| {
+ for (remaining) |char| {
if (char > 127 or char < 0x20) {
- return @truncate(u32, i + (slice.len - remaining.len));
+ return @truncate(u32, (@ptrToInt(std.mem.sliceAsBytes(remaining).ptr) - @ptrToInt(std.mem.sliceAsBytes(slice).ptr)) / 2);
}
}
} else {
- for (remaining) |char, i| {
+ for (remaining) |char| {
if (char > 127) {
- return @truncate(u32, i + (slice.len - remaining.len));
+ return @truncate(u32, (@ptrToInt(std.mem.sliceAsBytes(remaining).ptr) - @ptrToInt(std.mem.sliceAsBytes(slice).ptr)) / 2);
}
}
}