Document failed experiment

author: Jarred Sumner <709451+Jarred-Sumner@users.noreply.github.com> 2022-06-30 19:23:36 -0700
committer: Jarred Sumner <709451+Jarred-Sumner@users.noreply.github.com> 2022-06-30 19:23:36 -0700
commit: 39d111fa63c73f8e023ca5f8ad4748db63e1caf9 (patch)
tree: 2d76eb7fd4c3ae7ab12107b93dc8c2dd99143b99
parent: 4821b9c10bdec3660c20db01713a21c0e13172ea (diff)
download: bun-39d111fa63c73f8e023ca5f8ad4748db63e1caf9.tar.gz
bun-39d111fa63c73f8e023ca5f8ad4748db63e1caf9.tar.zst
bun-39d111fa63c73f8e023ca5f8ad4748db63e1caf9.zip
1 files changed, 251 insertions, 14 deletions
diff --git a/src/baby_list.zig b/src/baby_list.zig
index f33216fb8..08745e2fe 100644
--- a/src/baby_list.zig
+++ b/src/baby_list.zig
@@ -1,6 +1,233 @@
 const std = @import("std");
 const Environment = @import("./env.zig");
 const strings = @import("./string_immutable.zig");
+const bun = @import("./global.zig");
+
+// -- Failed Experiment --
+// Delete this code later
+// -- Failed Experiment --
+// Writing tons of < 8 byte chunks is kind of expensive
+// because we have to loop through them to copy and then again to encode
+// It would be faster if we could use SIMD
+// but the behavior is out of our control
+// so instead, we copy the _unencoded_ bytes to a buffer
+// Then, just before we send it over the network, we encode it, usually in-place
+// The caveat is if the encoding changes
+// pub const Delayer = struct {
+//     last_write: u32 = 0,
+//     last_encoding: Encoding = Encoding.bytes,
+
+//     const log = bun.Output.scoped(.Delayer, true);
+//     pub const Encoding = enum {
+//         bytes,
+//         utf16,
+//         latin1,
+//     };
+
+//     fn flushLatin1(this: *Delayer, list_: BabyList(u8), allocator: std.mem.Allocator) !BabyList(u8) {
+//         var list = list_;
+//         var remain = list.slice()[this.last_write..];
+//         const element_count = strings.elementLengthLatin1IntoUTF8([]const u8, remain);
+//         log("flushLatin1({any}, {any})", .{ .element_count = element_count, .remain = remain.len });
+//         // common case: nothing to do, it's just ascii
+//         if (element_count == remain.len) {
+//             this.last_write += @truncate(u32, remain.len);
+
+//             return list;
+//         }
+
+//         std.debug.assert(element_count > remain.len);
+//         var arraylist = list.listManaged(allocator);
+//         // assert we have enough room
+//         try arraylist.ensureUnusedCapacity(element_count - remain.len);
+
+//         list.update(arraylist);
+//         var read_remain = arraylist.items.ptr[this.last_write..arraylist.items.len];
+//         var write_remain = arraylist.items.ptr[this.last_write .. arraylist.items.len + (element_count - remain.len)];
+//         std.debug.assert(write_remain.len > 0);
+//         std.debug.assert(read_remain.len > 0);
+//         std.debug.assert(write_remain.len > read_remain.len);
+//         this.last_write += @truncate(u32, write_remain.len);
+//         list.len += @intCast(u32, element_count - remain.len);
+
+//         // faster path: stack allocated buffer
+//         if (write_remain.len <= 4096) {
+//             var buf: [4096]u8 = undefined;
+//             const result = strings.copyLatin1IntoUTF8(&buf, []const u8, remain);
+//             std.debug.assert(@as(usize, result.written) == write_remain.len);
+//             std.debug.assert(@as(usize, result.read) == read_remain.len);
+//             @memcpy(write_remain.ptr, &buf, write_remain.len);
+//         } else {
+//             // slow path
+//             var temp_buf = try allocator.dupe(u8, read_remain);
+//             defer allocator.free(temp_buf);
+//             const result = strings.copyLatin1IntoUTF8(write_remain, []const u8, temp_buf);
+
+//             std.debug.assert(@as(usize, result.written) == write_remain.len);
+//             std.debug.assert(@as(usize, result.read) == read_remain.len);
+//         }
+
+//         return list;
+//     }
+//     fn flushUTF16(this: *Delayer, list_: BabyList(u8), allocator: std.mem.Allocator) !BabyList(u8) {
+//         var list = list_;
+//         var remain = std.mem.bytesAsSlice(u16, list.slice()[this.last_write..]);
+//         const element_count = strings.elementLengthUTF16IntoUTF8(@TypeOf(remain), remain) * 2;
+//         log("flushUTF16({any}, {any})", .{ .element_count = element_count, .remain = remain.len });
+
+//         var arraylist = list.listManaged(allocator);
+
+//         // assert we have enough room
+//         const grow = element_count - list.slice()[this.last_write..].len;
+//         try arraylist.ensureUnusedCapacity(grow);
+//         list.update(arraylist);
+//         var write_remain = arraylist.items.ptr[this.last_write .. arraylist.items.len + grow];
+//         this.last_write += @truncate(u32, grow);
+//         list.len += @intCast(u32, grow);
+
+//         var buf: [4096]u8 = undefined;
+
+//         if (element_count < buf.len) {
+//             const result = strings.copyUTF16IntoUTF8(&buf, @TypeOf(remain), remain);
+//             std.debug.assert(@as(usize, result.written * 2) == write_remain.len);
+//             std.debug.assert(@as(usize, result.read) == remain.len);
+//             @memcpy(write_remain.ptr, &buf, write_remain.len);
+//         } else {
+//             // slow path
+//             var temp_buf = try allocator.alloc(u16, remain.len);
+//             @memcpy(std.mem.sliceAsBytes(temp_buf).ptr, std.mem.sliceAsBytes(remain).ptr, std.mem.sliceAsBytes(remain).len);
+//             defer allocator.free(temp_buf);
+//             const result = strings.copyUTF16IntoUTF8(write_remain, @TypeOf(temp_buf), temp_buf);
+//             std.debug.assert(@as(usize, result.written * 2) == write_remain.len);
+//             std.debug.assert(@as(usize, result.read) == remain.len);
+//         }
+
+//         return list;
+//     }
+//     pub fn writeUTF16(this: *Delayer, list_: BabyList(u8), str: []const u16, allocator: std.mem.Allocator) !BabyList(u8) {
+//         var list = list_;
+//         log("writeUTF16({any}, {any})", .{ .delayer = this, .len = str.len });
+
+//         {
+//             switch (this.last_encoding) {
+//                 .latin1 => {
+//                     list = try this.flushLatin1(list, allocator);
+//                     this.last_write = list.len;
+//                     var arraylist = list.listManaged(allocator);
+//                     var bytes = std.mem.sliceAsBytes(str);
+//                     try arraylist.ensureUnusedCapacity(bytes.len);
+//                     @memcpy(arraylist.items.ptr + arraylist.items.len, bytes.ptr, bytes.len);
+//                     this.last_encoding = .utf16;
+//                     list.update(arraylist);
+//                     list.len += @intCast(u32, bytes.len);
+//                     return list;
+//                 },
+//                 .bytes, .utf16 => |enc| {
+//                     if (enc == .bytes) {
+//                         this.last_write = list.len;
+//                         this.last_encoding = .utf16;
+//                     }
+//                     var arraylist = list.listManaged(allocator);
+//                     var bytes = std.mem.sliceAsBytes(str);
+//                     try arraylist.ensureUnusedCapacity(bytes.len);
+//                     @memcpy(arraylist.items.ptr + arraylist.items.len, bytes.ptr, bytes.len);
+//                     list.update(arraylist);
+//                     list.len += @intCast(u32, bytes.len);
+//                 },
+//             }
+//         }
+
+//         return list;
+//     }
+
+//     pub fn flush(this: *Delayer, list_: BabyList(u8), allocator: std.mem.Allocator) !BabyList(u8) {
+//         if (this.last_encoding == .bytes) {
+//             std.debug.assert(this.last_write == list_.len);
+//             return list_;
+//         }
+
+//         var list = list_;
+//         switch (this.last_encoding) {
+//             .utf16 => {
+//                 list = try this.flushUTF16(list_, allocator);
+//                 this.last_write = list.len;
+//                 this.last_encoding = .bytes;
+//             },
+//             .latin1 => {
+//                 list = try this.flushLatin1(list_, allocator);
+//                 this.last_write = list.len;
+//                 this.last_encoding = .bytes;
+//             },
+//             .bytes => unreachable,
+//         }
+
+//         return list;
+//     }
+
+//     pub fn writeLatin1(this: *Delayer, list_: BabyList(u8), str: []const u8, allocator: std.mem.Allocator) !BabyList(u8) {
+//         var list = list_;
+//         log("writeLatin1({any}, {s})", .{ .delayer = this, .str = str });
+
+//         {
+//             switch (this.last_encoding) {
+//                 .utf16 => {
+//                     list = try this.flushUTF16(list, allocator);
+//                     this.last_write = list.len;
+//                     var arraylist = list.listManaged(allocator);
+//                     var bytes = std.mem.sliceAsBytes(str);
+//                     try arraylist.ensureUnusedCapacity(bytes.len);
+//                     @memcpy(arraylist.items.ptr + arraylist.items.len, bytes.ptr, bytes.len);
+//                     this.last_encoding = .latin1;
+//                     list.update(arraylist);
+//                     list.len += @intCast(u32, bytes.len);
+//                     return list;
+//                 },
+//                 .bytes, .latin1 => |enc| {
+//                     if (enc == .bytes) {
+//                         this.last_write = list.len;
+//                         this.last_encoding = .latin1;
+//                     }
+//                     var arraylist = list.listManaged(allocator);
+//                     var bytes = std.mem.sliceAsBytes(str);
+//                     try arraylist.ensureUnusedCapacity(bytes.len);
+//                     @memcpy(arraylist.items.ptr + arraylist.items.len, bytes.ptr, bytes.len);
+//                     list.update(arraylist);
+//                     list.len += @intCast(u32, bytes.len);
+//                 },
+//             }
+//         }
+
+//         return list;
+//     }
+
+//     pub fn writeBytes(this: *Delayer, list_: BabyList(u8), str: []const u8, allocator: std.mem.Allocator) !BabyList(u8) {
+//         var list = list_;
+//         log("writeBytes({any}, {any})", .{ .delayer = this, .str = str });
+
+//         {
+//             switch (this.last_encoding) {
+//                 .utf16 => {
+//                     list = try this.flushUTF16(list, allocator);
+//                 },
+//                 .latin1 => {
+//                     list = try this.flushLatin1(list, allocator);
+//                 },
+//                 else => {},
+//             }
+
+//             var arraylist = list.listManaged(allocator);
+//             var bytes = std.mem.sliceAsBytes(str);
+//             try arraylist.ensureUnusedCapacity(bytes.len);
+//             @memcpy(arraylist.items.ptr + arraylist.items.len, bytes.ptr, bytes.len);
+//             list.update(arraylist);
+//             list.len += @intCast(u32, bytes.len);
+//             this.last_write = list.len;
+//             this.last_encoding = .bytes;
+//         }
+
+//         return list;
+//     }
+// };
 
 /// This is like ArrayList except it stores the length and capacity as u32
 /// In practice, it is very unusual to have lengths above 4 GB
@@ -125,24 +352,34 @@ pub fn BabyList(comptime Type: type) type {
                 @compileError("Unsupported for type " ++ @typeName(Type));
 
             var list_ = this.listManaged(allocator);
-            defer this.update(list_);
-            try list_.ensureTotalCapacityPrecise(list_.items.len + str.len);
             const initial = this.len;
-            var remain = str;
-            while (remain.len > 0) {
-                const orig_len = list_.items.len;
-
-                var slice_ = list_.items.ptr[orig_len..list_.capacity];
-                const result = strings.copyUTF16IntoUTF8(slice_, []const u16, remain);
-                remain = remain[result.read..];
-                list_.items.len += @as(usize, result.written);
-                if (remain.len > 0) {
-                    try list_.ensureTotalCapacityPrecise(list_.items.len + strings.elementLengthUTF16IntoUTF8([]const u16, remain));
-                    continue;
+            {
+                defer this.update(list_);
+                try list_.ensureTotalCapacityPrecise(list_.items.len + str.len + 4);
+
+                var remain = str;
+                while (remain.len > 0) {
+                    const orig_len = list_.items.len;
+
+                    var slice_ = list_.items.ptr[orig_len..list_.capacity];
+                    const result = strings.copyUTF16IntoUTF8(slice_, []const u16, remain);
+                    remain = remain[result.read..];
+                    list_.items.len += @as(usize, result.written);
+                    if (remain.len > 0) {
+                        try list_.ensureTotalCapacityPrecise(list_.items.len + strings.elementLengthUTF16IntoUTF8([]const u16, remain));
+                        continue;
+                    }
+                    if (result.read == 0 or result.written == 0) break;
                 }
-                if (result.read == 0 or result.written == 0) break;
             }
 
+            if (comptime Environment.allow_assert) {
+                // sanity check that encoding produced a consistent result
+                var allocated = try strings.toUTF8Alloc(allocator, str);
+                defer allocator.free(allocated);
+                const encoded = this.ptr[initial..this.len];
+                std.testing.expectEqualStrings(allocated, encoded) catch unreachable;
+            }
             return this.len - initial;
         }
     };
author	Jarred Sumner <709451+Jarred-Sumner@users.noreply.github.com>	2022-06-30 19:23:36 -0700
committer	Jarred Sumner <709451+Jarred-Sumner@users.noreply.github.com>	2022-06-30 19:23:36 -0700
commit	39d111fa63c73f8e023ca5f8ad4748db63e1caf9 (patch)
tree	2d76eb7fd4c3ae7ab12107b93dc8c2dd99143b99
parent	4821b9c10bdec3660c20db01713a21c0e13172ea (diff)
download	bun-39d111fa63c73f8e023ca5f8ad4748db63e1caf9.tar.gz bun-39d111fa63c73f8e023ca5f8ad4748db63e1caf9.tar.zst bun-39d111fa63c73f8e023ca5f8ad4748db63e1caf9.zip