3 files changed, 377 insertions, 276 deletions
diff --git a/src/baby_list.zig b/src/baby_list.zig
index 2230a348f..f33216fb8 100644
--- a/src/baby_list.zig
+++ b/src/baby_list.zig
@@ -115,13 +115,9 @@ pub fn BabyList(comptime Type: type) type {
             if (comptime Type != u8)
                 @compileError("Unsupported for type " ++ @typeName(Type));
             const initial = this.len;
-            var list_ = this.listManaged(allocator);
-            {
-                defer this.update(list_);
-                const start = list_.items.len;
-                try list_.appendSlice(str);
-                strings.replaceLatin1WithUTF8(list_.items[start..list_.items.len]);
-            }
+            const old = this.listManaged(allocator);
+            const new = try strings.allocateLatin1IntoUTF8WithList(old, old.items.len, []const u8, str);
+            this.update(new);
             return this.len - initial;
         }
         pub fn writeUTF16(this: *@This(), allocator: std.mem.Allocator, str: []const u16) !u32 {
diff --git a/src/bun.js/webcore/streams.zig b/src/bun.js/webcore/streams.zig
index 654bf4b47..603694f60 100644
--- a/src/bun.js/webcore/streams.zig
+++ b/src/bun.js/webcore/streams.zig
@@ -244,6 +244,8 @@ pub const StreamStart = union(Tag) {
         as_uint8array: bool,
         stream: bool,
     },
+    HTTPSResponseSink: void,
+    HTTPResponseSink: void,
     ready: void,
 
     pub const Tag = enum {
@@ -251,6 +253,8 @@ pub const StreamStart = union(Tag) {
         err,
         chunk_size,
         ArrayBufferSink,
+        HTTPSResponseSink,
+        HTTPResponseSink,
         ready,
     };
 
@@ -325,6 +329,21 @@ pub const StreamStart = union(Tag) {
                     };
                 }
             },
+            .HTTPSResponseSink, .HTTPResponseSink => {
+                var empty = true;
+                var chunk_size: JSC.WebCore.Blob.SizeType = 2048;
+
+                if (value.get(globalThis, "highWaterMark")) |chunkSize| {
+                    empty = false;
+                    chunk_size = @intCast(JSC.WebCore.Blob.SizeType, @maximum(256, @truncate(i51, chunkSize.toInt64())));
+                }
+
+                if (!empty) {
+                    return .{
+                        .chunk_size = chunk_size,
+                    };
+                }
+            },
             else => @compileError("Unuspported tag"),
         }
 
@@ -1372,6 +1391,7 @@ pub fn HTTPServerWritable(comptime ssl: bool) type {
         signal: Signal = .{},
         pending_drain: ?*JSC.JSPromise = null,
         globalThis: *JSGlobalObject = undefined,
+        highWaterMark: Blob.SizeType = 2048,
 
         requested_end: bool = false,
 
@@ -1475,17 +1495,34 @@ pub fn HTTPServerWritable(comptime ssl: bool) type {
             return false;
         }
 
-        pub fn start(this: *@This(), _: StreamStart) JSC.Node.Maybe(void) {
-            log("start()", .{});
-
+        pub fn start(this: *@This(), stream_start: StreamStart) JSC.Node.Maybe(void) {
             if (this.res.hasResponded()) {
                 this.done = true;
                 this.signal.close(null);
                 return .{ .result = {} };
             }
 
+            this.buffer.len = 0;
+
+            switch (stream_start) {
+                .chunk_size => |chunk_size| {
+                    if (chunk_size > 0) {
+                        this.highWaterMark = chunk_size;
+                    }
+                },
+                else => {},
+            }
+
+            var list = this.buffer.listManaged(this.allocator);
+            list.clearRetainingCapacity();
+            list.ensureTotalCapacityPrecise(this.highWaterMark) catch return .{ .err = JSC.Node.Syscall.Error.oom };
+            this.buffer.update(list);
+
             this.done = false;
             this.signal.start();
+
+            log("start({d})", .{this.highWaterMark});
+
             return .{ .result = {} };
         }
 
@@ -1528,34 +1565,40 @@ pub fn HTTPServerWritable(comptime ssl: bool) type {
             }
 
             const bytes = data.slice();
-
+            const len = @truncate(Blob.SizeType, bytes.len);
             log("write({d})", .{bytes.len});
 
             if (!this.hasBackpressure()) {
-                if (this.buffer.len == 0) {
+                if (this.buffer.len == 0 and len >= this.highWaterMark) {
                     // fast path:
                     // - large-ish chunk
                     // - no backpressure
                     if (this.send(bytes)) {
-                        this.handleWrote(bytes.len);
-                        return .{ .owned = @truncate(Blob.SizeType, bytes.len) };
+                        this.handleWrote(len);
+                        return .{ .owned = len };
                     }
 
                     _ = this.buffer.write(this.allocator, bytes) catch {
                         return .{ .err = JSC.Node.Syscall.Error.fromCode(.NOMEM, .write) };
                     };
-                } else {
-                    // kinda fast path:
-                    // - combined chunk is large enough to flush automatically
-                    // - no backpressure
+                } else if (this.buffer.len + len >= this.highWaterMark) {
+                    // TODO: attempt to write both in a corked buffer?
                     _ = this.buffer.write(this.allocator, bytes) catch {
                         return .{ .err = JSC.Node.Syscall.Error.fromCode(.NOMEM, .write) };
                     };
-                    const readable = this.readableSlice();
-                    if (this.send(readable)) {
-                        this.handleWrote(readable.len);
-                        return .{ .owned = @truncate(Blob.SizeType, readable.len) };
+                    const slice = this.readableSlice();
+                    if (this.send(slice)) {
+                        this.handleWrote(slice.len);
+                        this.buffer.len = 0;
+                        return .{ .owned = len };
                     }
+                } else {
+                    // queue the data
+                    // do not send it
+                    _ = this.buffer.write(this.allocator, bytes) catch {
+                        return .{ .err = JSC.Node.Syscall.Error.fromCode(.NOMEM, .write) };
+                    };
+                    return .{ .owned = len };
                 }
 
                 this.res.onWritable(*@This(), onWritable, this);
@@ -1565,7 +1608,7 @@ pub fn HTTPServerWritable(comptime ssl: bool) type {
                 };
             }
 
-            return .{ .owned = @truncate(Blob.SizeType, bytes.len) };
+            return .{ .owned = len };
         }
         pub const writeBytes = write;
         pub fn writeLatin1(this: *@This(), data: StreamResult) StreamResult.Writable {
@@ -1580,34 +1623,51 @@ pub fn HTTPServerWritable(comptime ssl: bool) type {
             }
 
             const bytes = data.slice();
+            const len = @truncate(Blob.SizeType, bytes.len);
             log("writeLatin1({d})", .{bytes.len});
 
             if (!this.hasBackpressure()) {
-                if (this.buffer.len == 0 and strings.isAllASCII(bytes)) {
-                    // fast path:
-                    // - large-ish chunk
-                    // - no backpressure
-                    if (this.send(bytes)) {
-                        this.handleWrote(bytes.len);
-                        return .{ .owned = @truncate(Blob.SizeType, bytes.len) };
+                if (this.buffer.len == 0 and len >= this.highWaterMark) {
+                    var do_send = true;
+                    // common case
+                    if (strings.isAllASCII(bytes)) {
+                        // fast path:
+                        // - large-ish chunk
+                        // - no backpressure
+                        if (this.send(bytes)) {
+                            this.handleWrote(bytes.len);
+                            return .{ .owned = len };
+                        }
+                        do_send = false;
                     }
 
-                    // we already checked it's all ascii
-                    _ = this.buffer.write(this.allocator, bytes) catch {
+                    _ = this.buffer.writeLatin1(this.allocator, bytes) catch {
                         return .{ .err = JSC.Node.Syscall.Error.fromCode(.NOMEM, .write) };
                     };
-                } else if (this.buffer.len == 0) {
+
+                    if (do_send) {
+                        if (this.send(this.readableSlice())) {
+                            this.handleWrote(bytes.len);
+                            return .{ .owned = len };
+                        }
+                    }
+                } else if (this.buffer.len + len >= this.highWaterMark) {
                     // kinda fast path:
                     // - combined chunk is large enough to flush automatically
                     // - no backpressure
-                    const reported = this.buffer.writeLatin1(this.allocator, bytes) catch {
+                    _ = this.buffer.writeLatin1(this.allocator, bytes) catch {
                         return .{ .err = JSC.Node.Syscall.Error.fromCode(.NOMEM, .write) };
                     };
                     const readable = this.readableSlice();
                     if (this.send(readable)) {
                         this.handleWrote(readable.len);
-                        return .{ .owned = @as(Blob.SizeType, reported) };
+                        return .{ .owned = len };
                     }
+                } else {
+                    _ = this.buffer.writeLatin1(this.allocator, bytes) catch {
+                        return .{ .err = JSC.Node.Syscall.Error.fromCode(.NOMEM, .write) };
+                    };
+                    return .{ .owned = len };
                 }
 
                 this.res.onWritable(*@This(), onWritable, this);
@@ -1617,7 +1677,7 @@ pub fn HTTPServerWritable(comptime ssl: bool) type {
                 };
             }
 
-            return .{ .owned = @truncate(Blob.SizeType, bytes.len) };
+            return .{ .owned = len };
         }
         pub fn writeUTF16(this: *@This(), data: StreamResult) StreamResult.Writable {
             if (this.done or this.requested_end) {
@@ -1643,12 +1703,15 @@ pub fn HTTPServerWritable(comptime ssl: bool) type {
                 };
 
                 const readable = this.readableSlice();
-                if (this.send(readable)) {
-                    this.handleWrote(readable.len);
-                    return .{ .owned = @truncate(Blob.SizeType, written) };
-                }
 
-                this.res.onWritable(*@This(), onWritable, this);
+                if (readable.len >= this.highWaterMark) {
+                    if (this.send(readable)) {
+                        this.handleWrote(readable.len);
+                        return .{ .owned = @truncate(Blob.SizeType, written) };
+                    }
+
+                    this.res.onWritable(*@This(), onWritable, this);
+                }
             } else {
                 written = this.buffer.writeUTF16(this.allocator, @alignCast(2, std.mem.bytesAsSlice(u16, bytes))) catch {
                     return .{ .err = JSC.Node.Syscall.Error.fromCode(.NOMEM, .write) };
diff --git a/src/string_immutable.zig b/src/string_immutable.zig
index 9e4cf3b1c..206642ae7 100644
--- a/src/string_immutable.zig
+++ b/src/string_immutable.zig
@@ -1019,80 +1019,91 @@ pub fn allocateLatin1IntoUTF8WithList(list_: std.ArrayList(u8), offset_into_list
     var list = list_;
     while (latin1.len > 0) {
         try list.ensureUnusedCapacity(latin1.len);
-        // assert our starting capcaicty is at least latin1
         var buf = list.items.ptr[i..list.capacity];
 
         inner: {
-            while (latin1.len >= ascii_vector_size) {
-                const vec: AsciiVector = latin1[0..ascii_vector_size].*;
-
-                if (@reduce(.Max, vec) > 127) {
-                    const Int = u64;
-                    const size = @sizeOf(Int);
-
-                    // zig or LLVM doesn't do @ctz nicely with SIMD
-                    if (comptime ascii_vector_size >= 8) {
-                        {
-                            const bytes = @bitCast(Int, latin1[0..size].*);
-                            // https://dotat.at/@/2022-06-27-tolower-swar.html
-                            const mask = bytes & 0x8080808080808080;
-
-                            if (mask > 0) {
-                                const first_set_byte = @ctz(Int, mask) / 8;
-                                if (comptime Environment.allow_assert) {
-                                    assert(latin1[first_set_byte] >= 127);
-                                    var j: usize = 0;
-                                    while (j < first_set_byte) : (j += 1) {
-                                        assert(latin1[j] < 127);
+            if (latin1.len >= ascii_vector_size) {
+                const start_ptr = @ptrToInt(buf.ptr);
+                const start_ptr_latin1 = @ptrToInt(latin1.ptr);
+                const end_ptr = @ptrToInt(latin1.ptr + latin1.len - (latin1.len % ascii_vector_size));
+
+                while (@ptrToInt(latin1.ptr) < end_ptr) {
+                    const vec: AsciiVector = latin1[0..ascii_vector_size].*;
+
+                    if (@reduce(.Max, vec) > 127) {
+                        const Int = u64;
+                        const size = @sizeOf(Int);
+                        buf.len -= @ptrToInt(buf.ptr) - start_ptr;
+                        latin1.len -= @ptrToInt(latin1.ptr) - start_ptr_latin1;
+
+                        // zig or LLVM doesn't do @ctz nicely with SIMD
+                        if (comptime ascii_vector_size >= 8) {
+                            {
+                                const bytes = @bitCast(Int, latin1[0..size].*);
+                                // https://dotat.at/@/2022-06-27-tolower-swar.html
+                                const mask = bytes & 0x8080808080808080;
+
+                                if (mask > 0) {
+                                    const first_set_byte = @ctz(Int, mask) / 8;
+                                    if (comptime Environment.allow_assert) {
+                                        assert(latin1[first_set_byte] >= 127);
+                                        var j: usize = 0;
+                                        while (j < first_set_byte) : (j += 1) {
+                                            assert(latin1[j] < 127);
+                                        }
                                     }
+
+                                    buf[0..size].* = @bitCast([size]u8, bytes);
+                                    buf = buf[first_set_byte..];
+                                    latin1 = latin1[first_set_byte..];
+                                    break :inner;
                                 }
 
                                 buf[0..size].* = @bitCast([size]u8, bytes);
-                                buf = buf[first_set_byte..];
-                                latin1 = latin1[first_set_byte..];
-                                break :inner;
+                                latin1 = latin1[size..];
+                                buf = buf[size..];
                             }
 
-                            buf[0..size].* = @bitCast([size]u8, bytes);
-                            latin1 = latin1[size..];
-                            buf = buf[size..];
-                        }
-
-                        if (comptime ascii_vector_size >= 16) {
-                            const bytes = @bitCast(Int, latin1[0..size].*);
-                            // https://dotat.at/@/2022-06-27-tolower-swar.html
-                            const mask = bytes & 0x8080808080808080;
-
-                            if (mask > 0) {
-                                const first_set_byte = @ctz(Int, mask) / 8;
-                                if (comptime Environment.allow_assert) {
-                                    assert(latin1[first_set_byte] >= 127);
-                                    var j: usize = 0;
-                                    while (j < first_set_byte) : (j += 1) {
-                                        assert(latin1[j] < 127);
+                            if (comptime ascii_vector_size >= 16) {
+                                const bytes = @bitCast(Int, latin1[0..size].*);
+                                // https://dotat.at/@/2022-06-27-tolower-swar.html
+                                const mask = bytes & 0x8080808080808080;
+
+                                if (mask > 0) {
+                                    const first_set_byte = @ctz(Int, mask) / 8;
+                                    if (comptime Environment.allow_assert) {
+                                        assert(latin1[first_set_byte] >= 127);
+                                        var j: usize = 0;
+                                        while (j < first_set_byte) : (j += 1) {
+                                            assert(latin1[j] < 127);
+                                        }
                                     }
-                                }
 
-                                buf[0..size].* = @bitCast([size]u8, bytes);
-                                buf = buf[first_set_byte..];
-                                latin1 = latin1[first_set_byte..];
-                                break :inner;
+                                    buf[0..size].* = @bitCast([size]u8, bytes);
+                                    buf = buf[first_set_byte..];
+                                    latin1 = latin1[first_set_byte..];
+                                    break :inner;
+                                }
                             }
                         }
-
                         unreachable;
                     }
-                }
 
-                buf[0..ascii_vector_size].* = @bitCast([ascii_vector_size]u8, vec)[0..ascii_vector_size].*;
-                latin1 = latin1[ascii_vector_size..];
-                buf = buf[ascii_vector_size..];
+                    buf[0..ascii_vector_size].* = @bitCast([ascii_vector_size]u8, vec)[0..ascii_vector_size].*;
+                    latin1.ptr += ascii_vector_size;
+                    buf.ptr += ascii_vector_size;
+                }
+                buf.len -= @ptrToInt(buf.ptr) - start_ptr;
+                latin1.len -= @ptrToInt(latin1.ptr) - start_ptr_latin1;
             }
 
             {
                 const Int = u64;
                 const size = @sizeOf(Int);
-                while (latin1.len >= size) {
+                const latin1_end_ptr = latin1.ptr + latin1.len - (latin1.len % size);
+                const start_ptr = @ptrToInt(buf.ptr);
+                const start_ptr_latin1 = @ptrToInt(latin1.ptr);
+                while (latin1.ptr != latin1_end_ptr) {
                     const bytes = @bitCast(Int, latin1[0..size].*);
                     // https://dotat.at/@/2022-06-27-tolower-swar.html
                     const mask = bytes & 0x8080808080808080;
@@ -1108,67 +1119,51 @@ pub fn allocateLatin1IntoUTF8WithList(list_: std.ArrayList(u8), offset_into_list
                         }
 
                         buf[0..size].* = @bitCast([size]u8, bytes);
-                        buf = buf[first_set_byte..];
-                        latin1 = latin1[first_set_byte..];
+                        buf.ptr += first_set_byte;
+                        latin1.ptr += first_set_byte;
+                        buf.len -= @ptrToInt(buf.ptr) - start_ptr;
+                        latin1.len -= @ptrToInt(latin1.ptr) - start_ptr_latin1;
                         break :inner;
                     }
 
                     buf[0..size].* = @bitCast([size]u8, bytes);
-                    latin1 = latin1[size..];
-                    buf = buf[size..];
+                    latin1.ptr += size;
+                    buf.ptr += size;
                 }
+                buf.len -= @ptrToInt(buf.ptr) - start_ptr;
+                latin1.len -= @ptrToInt(latin1.ptr) - start_ptr_latin1;
             }
 
             {
-                const Int = u32;
-                const size = @sizeOf(Int);
-                while (latin1.len >= size) {
-                    const bytes = @bitCast(Int, latin1[0..size].*);
-                    // https://dotat.at/@/2022-06-27-tolower-swar.html
-                    const mask = bytes & 0x80808080;
-
-                    if (mask > 0) {
-                        const first_set_byte = @ctz(Int, mask) / 8;
-                        if (comptime Environment.allow_assert) {
-                            assert(latin1[first_set_byte] >= 127);
-                            var j: usize = 0;
-                            while (j < first_set_byte) : (j += 1) {
-                                assert(latin1[j] < 127);
-                            }
-                        }
-
-                        buf[0..size].* = @bitCast([size]u8, bytes);
-                        buf = buf[first_set_byte..];
-                        latin1 = latin1[first_set_byte..];
-                        break :inner;
-                    }
-
-                    buf[0..size].* = @bitCast([size]u8, bytes);
-                    latin1 = latin1[size..];
-                    buf = buf[size..];
+                assert(latin1.len < 8);
+                const end = latin1.ptr + latin1.len;
+                const start_ptr = @ptrToInt(buf.ptr);
+                const start_ptr_latin1 = @ptrToInt(latin1.ptr);
+
+                while (latin1.ptr != end and latin1.ptr[0] <= 127) {
+                    buf.ptr[0] = latin1.ptr[0];
+                    buf.ptr += 1;
+                    latin1.ptr += 1;
                 }
-            }
 
-            while (latin1.len >= 1 and latin1[0] < 127) {
-                buf[0] = latin1[0];
-                latin1 = latin1[1..];
-                buf = buf[1..];
+                buf.len -= @ptrToInt(buf.ptr) - start_ptr;
+                latin1.len -= @ptrToInt(latin1.ptr) - start_ptr_latin1;
             }
         }
 
-        i = @ptrToInt(buf.ptr) - @ptrToInt(list.items.ptr);
-        list.items.len = i;
+        while (latin1.len > 0 and latin1[0] > 127) {
+            i = @ptrToInt(buf.ptr) - @ptrToInt(list.items.ptr);
+            list.items.len = i;
 
-        while (latin1.len > 0 and latin1[0] >= 127) {
             try list.ensureUnusedCapacity(2 + latin1.len);
             buf = list.items.ptr[i..list.capacity];
             buf[0..2].* = latin1ToCodepointBytesAssumeNotASCII(latin1[0]);
             latin1 = latin1[1..];
             buf = buf[2..];
-
-            i = @ptrToInt(buf.ptr) - @ptrToInt(list.items.ptr);
-            list.items.len = i;
         }
+
+        i = @ptrToInt(buf.ptr) - @ptrToInt(list.items.ptr);
+        list.items.len = i;
     }
 
     return list;
@@ -1296,6 +1291,60 @@ pub fn copyLatin1IntoUTF8StopOnNonASCII(buf_: []u8, comptime Type: type, latin1_
 
                 if (@reduce(.Max, vec) > 127) {
                     if (comptime stop) return .{ .written = std.math.maxInt(u32), .read = std.math.maxInt(u32) };
+
+                    // zig or LLVM doesn't do @ctz nicely with SIMD
+                    if (comptime ascii_vector_size >= 8) {
+                        const Int = u64;
+                        const size = @sizeOf(Int);
+
+                        {
+                            const bytes = @bitCast(Int, latin1[0..size].*);
+                            // https://dotat.at/@/2022-06-27-tolower-swar.html
+                            const mask = bytes & 0x8080808080808080;
+
+                            if (mask > 0) {
+                                const first_set_byte = @ctz(Int, mask) / 8;
+                                if (comptime Environment.allow_assert) {
+                                    assert(latin1[first_set_byte] >= 127);
+                                    var j: usize = 0;
+                                    while (j < first_set_byte) : (j += 1) {
+                                        assert(latin1[j] < 127);
+                                    }
+                                }
+
+                                buf[0..size].* = @bitCast([size]u8, bytes);
+                                buf = buf[first_set_byte..];
+                                latin1 = latin1[first_set_byte..];
+                                break :inner;
+                            }
+
+                            buf[0..size].* = @bitCast([size]u8, bytes);
+                            latin1 = latin1[size..];
+                            buf = buf[size..];
+                        }
+
+                        if (comptime ascii_vector_size >= 16) {
+                            const bytes = @bitCast(Int, latin1[0..size].*);
+                            // https://dotat.at/@/2022-06-27-tolower-swar.html
+                            const mask = bytes & 0x8080808080808080;
+
+                            if (mask > 0) {
+                                const first_set_byte = @ctz(Int, mask) / 8;
+                                if (comptime Environment.allow_assert) {
+                                    assert(latin1[first_set_byte] >= 127);
+                                    var j: usize = 0;
+                                    while (j < first_set_byte) : (j += 1) {
+                                        assert(latin1[j] < 127);
+                                    }
+                                }
+
+                                buf[0..size].* = @bitCast([size]u8, bytes);
+                                buf = buf[first_set_byte..];
+                                latin1 = latin1[first_set_byte..];
+                                break :inner;
+                            }
+                        }
+                    }
                     break;
                 }
 
@@ -1338,40 +1387,19 @@ pub fn copyLatin1IntoUTF8StopOnNonASCII(buf_: []u8, comptime Type: type, latin1_
             }
 
             {
-                const Int = u32;
-                const size = @sizeOf(Int);
-                while (@minimum(buf.len, latin1.len) >= size) {
-                    const bytes = @bitCast(Int, latin1[0..size].*);
-                    const mask = bytes & 0x80808080;
-
-                    if (mask > 0) {
-                        const first_set_byte = @ctz(Int, mask) / 8;
-                        if (comptime stop) return .{ .written = std.math.maxInt(u32), .read = std.math.maxInt(u32) };
-
-                        if (comptime Environment.allow_assert) {
-                            assert(latin1[first_set_byte] >= 127);
-                            var j: usize = 0;
-                            while (j < first_set_byte) : (j += 1) {
-                                assert(latin1[j] < 127);
-                            }
-                        }
-
-                        buf[0..size].* = @bitCast([size]u8, bytes);
-                        buf = buf[first_set_byte..];
-                        latin1 = latin1[first_set_byte..];
-                        break :inner;
-                    }
-
-                    buf[0..size].* = @bitCast([size]u8, bytes);
-                    latin1 = latin1[size..];
-                    buf = buf[size..];
+                const end = latin1.ptr + latin1.len;
+                assert(@ptrToInt(latin1.ptr + 8) > @ptrToInt(end));
+                const start_ptr = @ptrToInt(buf.ptr);
+                const start_ptr_latin1 = @ptrToInt(latin1.ptr);
+
+                while (latin1.ptr != end and latin1.ptr[0] <= 127) {
+                    buf.ptr[0] = latin1.ptr[0];
+                    buf.ptr += 1;
+                    latin1.ptr += 1;
                 }
-            }
 
-            while (@minimum(buf.len, latin1.len) >= 1 and latin1[0] < 127) {
-                buf[0] = latin1[0];
-                latin1 = latin1[1..];
-                buf = buf[1..];
+                buf.len -= @ptrToInt(buf.ptr) - start_ptr;
+                latin1.len -= @ptrToInt(latin1.ptr) - start_ptr_latin1;
             }
         }
 
@@ -2479,106 +2507,111 @@ pub fn firstNonASCIIWithType(comptime Type: type, slice: Type) ?u32 {
     var remaining = slice;
 
     if (comptime Environment.isAarch64 or Environment.isX64) {
-        while (remaining.len >= ascii_vector_size) {
-            const vec: AsciiVector = remaining[0..ascii_vector_size].*;
+        if (remaining.len >= ascii_vector_size) {
+            const remaining_start = remaining.ptr;
+            const remaining_end = remaining.ptr + remaining.len - (remaining.len % ascii_vector_size);
 
-            if (@reduce(.Max, vec) > 127) {
-                const Int = u64;
-                const size = @sizeOf(Int);
-                {
-                    const bytes = @bitCast(Int, remaining[0..size].*);
-                    // https://dotat.at/@/2022-06-27-tolower-swar.html
-                    const mask = bytes & 0x8080808080808080;
+            while (remaining.ptr != remaining_end) {
+                const vec: AsciiVector = remaining[0..ascii_vector_size].*;
 
-                    if (mask > 0) {
-                        const first_set_byte = @ctz(Int, mask) / 8;
-                        if (comptime Environment.allow_assert) {
-                            assert(remaining[first_set_byte] >= 127);
-                            var j: usize = 0;
-                            while (j < first_set_byte) : (j += 1) {
-                                assert(remaining[j] < 127);
+                if (@reduce(.Max, vec) > 127) {
+                    const Int = u64;
+                    const size = @sizeOf(Int);
+                    remaining.len -= @ptrToInt(remaining.ptr) - @ptrToInt(remaining_start);
+
+                    {
+                        const bytes = @bitCast(Int, remaining[0..size].*);
+                        // https://dotat.at/@/2022-06-27-tolower-swar.html
+                        const mask = bytes & 0x8080808080808080;
+
+                        if (mask > 0) {
+                            const first_set_byte = @ctz(Int, mask) / 8;
+                            if (comptime Environment.allow_assert) {
+                                assert(remaining[first_set_byte] > 127);
+                                var j: usize = 0;
+                                while (j < first_set_byte) : (j += 1) {
+                                    assert(remaining[j] <= 127);
+                                }
                             }
-                        }
 
-                        return @as(u32, first_set_byte) + @intCast(u32, slice.len - remaining.len);
+                            return @as(u32, first_set_byte) + @intCast(u32, slice.len - remaining.len);
+                        }
+                        remaining = remaining[size..];
                     }
-                }
-                {
-                    const bytes = @bitCast(Int, remaining[size..][0..size].*);
-                    const mask = bytes & 0x8080808080808080;
-
-                    if (mask > 0) {
-                        const first_set_byte = @ctz(Int, mask) / 8;
-                        if (comptime Environment.allow_assert) {
-                            assert(remaining[first_set_byte] >= 127);
-                            var j: usize = 0;
-                            while (j < first_set_byte) : (j += 1) {
-                                assert(remaining[j] < 127);
+                    {
+                        const bytes = @bitCast(Int, remaining[0..size].*);
+                        const mask = bytes & 0x8080808080808080;
+
+                        if (mask > 0) {
+                            const first_set_byte = @ctz(Int, mask) / 8;
+                            if (comptime Environment.allow_assert) {
+                                assert(remaining[first_set_byte] > 127);
+                                var j: usize = 0;
+                                while (j < first_set_byte) : (j += 1) {
+                                    assert(remaining[j] <= 127);
+                                }
                             }
-                        }
 
-                        return 8 + @as(u32, first_set_byte) + @intCast(u32, slice.len - remaining.len);
+                            return @as(u32, first_set_byte) + @intCast(u32, slice.len - remaining.len);
+                        }
                     }
+                    unreachable;
                 }
-                break;
-            }
 
-            remaining = remaining[ascii_vector_size..];
+                // the more intuitive way, using slices, produces worse codegen
+                // specifically: it subtracts the length at the end of the loop
+                // we don't need to do that
+                // we only need to subtract the length once at the very end
+                remaining.ptr += ascii_vector_size;
+            }
+            remaining.len -= @ptrToInt(remaining.ptr) - @ptrToInt(remaining_start);
         }
     }
 
     {
         const Int = u64;
         const size = @sizeOf(Int);
-        while (remaining.len >= size) {
-            const bytes = @bitCast(Int, remaining[0..size].*);
-            // https://dotat.at/@/2022-06-27-tolower-swar.html
-            const mask = bytes & 0x8080808080808080;
-
-            if (mask > 0) {
-                const first_set_byte = @ctz(Int, mask) / 8;
-                if (comptime Environment.allow_assert) {
-                    assert(remaining[first_set_byte] >= 127);
-                    var j: usize = 0;
-                    while (j < first_set_byte) : (j += 1) {
-                        assert(remaining[j] < 127);
-                    }
-                }
-
-                return @as(u32, first_set_byte) + @intCast(u32, slice.len - remaining.len);
-            }
+        const remaining_start = remaining.ptr;
+        const remaining_end = remaining.ptr + remaining.len - (remaining.len % size);
 
-            remaining = remaining[size..];
+        if (comptime Environment.isAarch64 or Environment.isX64) {
+            // these assertions exist more so for LLVM
+            assert(remaining.len < ascii_vector_size);
+            assert(@ptrToInt(remaining.ptr + ascii_vector_size) > @ptrToInt(remaining_end));
         }
-    }
 
-    {
-        const Int = u32;
-        const size = @sizeOf(Int);
-        while (remaining.len >= size) {
-            const bytes = @bitCast(Int, remaining[0..size].*);
-            const mask = bytes & 0x80808080;
-
-            if (mask > 0) {
-                const first_set_byte = @ctz(Int, mask) / 8;
-                if (comptime Environment.allow_assert) {
-                    assert(remaining[first_set_byte] >= 127);
-                    var j: usize = 0;
-                    while (j < first_set_byte) : (j += 1) {
-                        assert(remaining[j] < 127);
+        if (remaining.len >= size) {
+            while (remaining.ptr != remaining_end) {
+                const bytes = @bitCast(Int, remaining[0..size].*);
+                // https://dotat.at/@/2022-06-27-tolower-swar.html
+                const mask = bytes & 0x8080808080808080;
+
+                if (mask > 0) {
+                    remaining.len -= @ptrToInt(remaining.ptr) - @ptrToInt(remaining_start);
+                    const first_set_byte = @ctz(Int, mask) / 8;
+                    if (comptime Environment.allow_assert) {
+                        assert(remaining[first_set_byte] > 127);
+                        var j: usize = 0;
+                        while (j < first_set_byte) : (j += 1) {
+                            assert(remaining[j] <= 127);
+                        }
                     }
+
+                    return @as(u32, first_set_byte) + @intCast(u32, slice.len - remaining.len);
                 }
 
-                return @as(u32, first_set_byte) + @intCast(u32, slice.len - remaining.len);
+                remaining.ptr += size;
             }
-
-            remaining = remaining[size..];
+            remaining.len -= @ptrToInt(remaining.ptr) - @ptrToInt(remaining_start);
         }
     }
 
-    for (remaining) |char, i| {
-        if (char > 127) {
-            return @truncate(u32, i + (slice.len - remaining.len));
+    assert(remaining.len < 8);
+
+    for (remaining) |*char| {
+        if (char.* > 127) {
+            // try to prevent it from reading the length of the slice
+            return @truncate(u32, @ptrToInt(char) - @ptrToInt(slice.ptr));
         }
     }
 
@@ -2932,52 +2965,61 @@ pub fn firstNonASCII16CheckMin(comptime Slice: type, slice: Slice, comptime chec
     var remaining = slice;
 
     if (comptime Environment.isAarch64 or Environment.isX64) {
-        while (remaining.len >= ascii_u16_vector_size) {
-            const vec: AsciiU16Vector = remaining[0..ascii_u16_vector_size].*;
-            const max_value = @reduce(.Max, vec);
-
-            if (comptime check_min) {
-                // by using @reduce here, we make it only do one comparison
-                // @reduce doesn't tell us the index though
-                const min_value = @reduce(.Min, vec);
-                if (min_value < 0x20 or max_value > 127) {
-                    // this is really slow
-                    // it does it element-wise for every single u8 on the vector
-                    // instead of doing the SIMD instructions
-                    // it removes a loop, but probably is slower in the end
-                    const cmp = @bitCast(AsciiVectorU16U1, vec > max_u16_ascii) |
-                        @bitCast(AsciiVectorU16U1, vec < min_u16_ascii);
-                    const bitmask = @ptrCast(*const u16, &cmp).*;
-                    const first = @ctz(u16, bitmask);
-
-                    return @intCast(u32, @as(u32, first) +
-                        @intCast(u32, slice.len - remaining.len));
-                }
-            } else if (comptime !check_min) {
-                if (max_value > 127) {
-                    const cmp = vec > max_u16_ascii;
-                    const bitmask = @ptrCast(*const u16, &cmp).*;
-                    const first = @ctz(u16, bitmask);
-
-                    return @intCast(u32, @as(u32, first) +
-                        @intCast(u32, slice.len - remaining.len));
+        const end_ptr = remaining.ptr + remaining.len - (remaining.len % ascii_u16_vector_size);
+        if (remaining.len > ascii_u16_vector_size) {
+            const remaining_start = remaining.ptr;
+            while (remaining.ptr != end_ptr) {
+                const vec: AsciiU16Vector = remaining[0..ascii_u16_vector_size].*;
+                const max_value = @reduce(.Max, vec);
+
+                if (comptime check_min) {
+                    // by using @reduce here, we make it only do one comparison
+                    // @reduce doesn't tell us the index though
+                    const min_value = @reduce(.Min, vec);
+                    if (min_value < 0x20 or max_value > 127) {
+                        remaining.len -= (@ptrToInt(remaining.ptr) - @ptrToInt(remaining_start)) / 2;
+
+                        // this is really slow
+                        // it does it element-wise for every single u8 on the vector
+                        // instead of doing the SIMD instructions
+                        // it removes a loop, but probably is slower in the end
+                        const cmp = @bitCast(AsciiVectorU16U1, vec > max_u16_ascii) |
+                            @bitCast(AsciiVectorU16U1, vec < min_u16_ascii);
+                        const bitmask: u16 = @ptrCast(*const u16, &cmp).*;
+                        const first = @ctz(u16, bitmask);
+
+                        return @intCast(u32, @as(u32, first) +
+                            @intCast(u32, slice.len - remaining.len));
+                    }
+                } else if (comptime !check_min) {
+                    if (max_value > 127) {
+                        remaining.len -= (@ptrToInt(remaining.ptr) - @ptrToInt(remaining_start)) / 2;
+
+                        const cmp = vec > max_u16_ascii;
+                        const bitmask = @ptrCast(*const u16, &cmp).*;
+                        const first = @ctz(u16, bitmask);
+
+                        return @intCast(u32, @as(u32, first) +
+                            @intCast(u32, slice.len - remaining.len));
+                    }
                 }
-            }
 
-            remaining = remaining[ascii_u16_vector_size..];
+                remaining.ptr += ascii_u16_vector_size;
+            }
+            remaining.len -= (@ptrToInt(remaining.ptr) - @ptrToInt(remaining_start)) / 2;
         }
     }
 
     if (comptime check_min) {
-        for (remaining) |char, i| {
+        for (remaining) |char| {
             if (char > 127 or char < 0x20) {
-                return @truncate(u32, i + (slice.len - remaining.len));
+                return @truncate(u32, (@ptrToInt(std.mem.sliceAsBytes(remaining).ptr) - @ptrToInt(std.mem.sliceAsBytes(slice).ptr)) / 2);
             }
         }
     } else {
-        for (remaining) |char, i| {
+        for (remaining) |char| {
             if (char > 127) {
-                return @truncate(u32, i + (slice.len - remaining.len));
+                return @truncate(u32, (@ptrToInt(std.mem.sliceAsBytes(remaining).ptr) - @ptrToInt(std.mem.sliceAsBytes(slice).ptr)) / 2);
             }
         }
     }