diff options
author | 2022-06-30 19:23:36 -0700 | |
---|---|---|
committer | 2022-06-30 19:23:36 -0700 | |
commit | 39d111fa63c73f8e023ca5f8ad4748db63e1caf9 (patch) | |
tree | 2d76eb7fd4c3ae7ab12107b93dc8c2dd99143b99 | |
parent | 4821b9c10bdec3660c20db01713a21c0e13172ea (diff) | |
download | bun-39d111fa63c73f8e023ca5f8ad4748db63e1caf9.tar.gz bun-39d111fa63c73f8e023ca5f8ad4748db63e1caf9.tar.zst bun-39d111fa63c73f8e023ca5f8ad4748db63e1caf9.zip |
Document failed experiment
-rw-r--r-- | src/baby_list.zig | 265 |
1 files changed, 251 insertions, 14 deletions
diff --git a/src/baby_list.zig b/src/baby_list.zig index f33216fb8..08745e2fe 100644 --- a/src/baby_list.zig +++ b/src/baby_list.zig @@ -1,6 +1,233 @@ const std = @import("std"); const Environment = @import("./env.zig"); const strings = @import("./string_immutable.zig"); +const bun = @import("./global.zig"); + +// -- Failed Experiment -- +// Delete this code later +// -- Failed Experiment -- +// Writing tons of < 8 byte chunks is kind of expensive +// because we have to loop through them to copy and then again to encode +// It would be faster if we could use SIMD +// but the behavior is out of our control +// so instead, we copy the _unencoded_ bytes to a buffer +// Then, just before we send it over the network, we encode it, usually in-place +// The caveat is if the encoding changes +// pub const Delayer = struct { +// last_write: u32 = 0, +// last_encoding: Encoding = Encoding.bytes, + +// const log = bun.Output.scoped(.Delayer, true); +// pub const Encoding = enum { +// bytes, +// utf16, +// latin1, +// }; + +// fn flushLatin1(this: *Delayer, list_: BabyList(u8), allocator: std.mem.Allocator) !BabyList(u8) { +// var list = list_; +// var remain = list.slice()[this.last_write..]; +// const element_count = strings.elementLengthLatin1IntoUTF8([]const u8, remain); +// log("flushLatin1({any}, {any})", .{ .element_count = element_count, .remain = remain.len }); +// // common case: nothing to do, it's just ascii +// if (element_count == remain.len) { +// this.last_write += @truncate(u32, remain.len); + +// return list; +// } + +// std.debug.assert(element_count > remain.len); +// var arraylist = list.listManaged(allocator); +// // assert we have enough room +// try arraylist.ensureUnusedCapacity(element_count - remain.len); + +// list.update(arraylist); +// var read_remain = arraylist.items.ptr[this.last_write..arraylist.items.len]; +// var write_remain = arraylist.items.ptr[this.last_write .. arraylist.items.len + (element_count - remain.len)]; +// std.debug.assert(write_remain.len > 0); +// std.debug.assert(read_remain.len > 0); +// std.debug.assert(write_remain.len > read_remain.len); +// this.last_write += @truncate(u32, write_remain.len); +// list.len += @intCast(u32, element_count - remain.len); + +// // faster path: stack allocated buffer +// if (write_remain.len <= 4096) { +// var buf: [4096]u8 = undefined; +// const result = strings.copyLatin1IntoUTF8(&buf, []const u8, remain); +// std.debug.assert(@as(usize, result.written) == write_remain.len); +// std.debug.assert(@as(usize, result.read) == read_remain.len); +// @memcpy(write_remain.ptr, &buf, write_remain.len); +// } else { +// // slow path +// var temp_buf = try allocator.dupe(u8, read_remain); +// defer allocator.free(temp_buf); +// const result = strings.copyLatin1IntoUTF8(write_remain, []const u8, temp_buf); + +// std.debug.assert(@as(usize, result.written) == write_remain.len); +// std.debug.assert(@as(usize, result.read) == read_remain.len); +// } + +// return list; +// } +// fn flushUTF16(this: *Delayer, list_: BabyList(u8), allocator: std.mem.Allocator) !BabyList(u8) { +// var list = list_; +// var remain = std.mem.bytesAsSlice(u16, list.slice()[this.last_write..]); +// const element_count = strings.elementLengthUTF16IntoUTF8(@TypeOf(remain), remain) * 2; +// log("flushUTF16({any}, {any})", .{ .element_count = element_count, .remain = remain.len }); + +// var arraylist = list.listManaged(allocator); + +// // assert we have enough room +// const grow = element_count - list.slice()[this.last_write..].len; +// try arraylist.ensureUnusedCapacity(grow); +// list.update(arraylist); +// var write_remain = arraylist.items.ptr[this.last_write .. arraylist.items.len + grow]; +// this.last_write += @truncate(u32, grow); +// list.len += @intCast(u32, grow); + +// var buf: [4096]u8 = undefined; + +// if (element_count < buf.len) { +// const result = strings.copyUTF16IntoUTF8(&buf, @TypeOf(remain), remain); +// std.debug.assert(@as(usize, result.written * 2) == write_remain.len); +// std.debug.assert(@as(usize, result.read) == remain.len); +// @memcpy(write_remain.ptr, &buf, write_remain.len); +// } else { +// // slow path +// var temp_buf = try allocator.alloc(u16, remain.len); +// @memcpy(std.mem.sliceAsBytes(temp_buf).ptr, std.mem.sliceAsBytes(remain).ptr, std.mem.sliceAsBytes(remain).len); +// defer allocator.free(temp_buf); +// const result = strings.copyUTF16IntoUTF8(write_remain, @TypeOf(temp_buf), temp_buf); +// std.debug.assert(@as(usize, result.written * 2) == write_remain.len); +// std.debug.assert(@as(usize, result.read) == remain.len); +// } + +// return list; +// } +// pub fn writeUTF16(this: *Delayer, list_: BabyList(u8), str: []const u16, allocator: std.mem.Allocator) !BabyList(u8) { +// var list = list_; +// log("writeUTF16({any}, {any})", .{ .delayer = this, .len = str.len }); + +// { +// switch (this.last_encoding) { +// .latin1 => { +// list = try this.flushLatin1(list, allocator); +// this.last_write = list.len; +// var arraylist = list.listManaged(allocator); +// var bytes = std.mem.sliceAsBytes(str); +// try arraylist.ensureUnusedCapacity(bytes.len); +// @memcpy(arraylist.items.ptr + arraylist.items.len, bytes.ptr, bytes.len); +// this.last_encoding = .utf16; +// list.update(arraylist); +// list.len += @intCast(u32, bytes.len); +// return list; +// }, +// .bytes, .utf16 => |enc| { +// if (enc == .bytes) { +// this.last_write = list.len; +// this.last_encoding = .utf16; +// } +// var arraylist = list.listManaged(allocator); +// var bytes = std.mem.sliceAsBytes(str); +// try arraylist.ensureUnusedCapacity(bytes.len); +// @memcpy(arraylist.items.ptr + arraylist.items.len, bytes.ptr, bytes.len); +// list.update(arraylist); +// list.len += @intCast(u32, bytes.len); +// }, +// } +// } + +// return list; +// } + +// pub fn flush(this: *Delayer, list_: BabyList(u8), allocator: std.mem.Allocator) !BabyList(u8) { +// if (this.last_encoding == .bytes) { +// std.debug.assert(this.last_write == list_.len); +// return list_; +// } + +// var list = list_; +// switch (this.last_encoding) { +// .utf16 => { +// list = try this.flushUTF16(list_, allocator); +// this.last_write = list.len; +// this.last_encoding = .bytes; +// }, +// .latin1 => { +// list = try this.flushLatin1(list_, allocator); +// this.last_write = list.len; +// this.last_encoding = .bytes; +// }, +// .bytes => unreachable, +// } + +// return list; +// } + +// pub fn writeLatin1(this: *Delayer, list_: BabyList(u8), str: []const u8, allocator: std.mem.Allocator) !BabyList(u8) { +// var list = list_; +// log("writeLatin1({any}, {s})", .{ .delayer = this, .str = str }); + +// { +// switch (this.last_encoding) { +// .utf16 => { +// list = try this.flushUTF16(list, allocator); +// this.last_write = list.len; +// var arraylist = list.listManaged(allocator); +// var bytes = std.mem.sliceAsBytes(str); +// try arraylist.ensureUnusedCapacity(bytes.len); +// @memcpy(arraylist.items.ptr + arraylist.items.len, bytes.ptr, bytes.len); +// this.last_encoding = .latin1; +// list.update(arraylist); +// list.len += @intCast(u32, bytes.len); +// return list; +// }, +// .bytes, .latin1 => |enc| { +// if (enc == .bytes) { +// this.last_write = list.len; +// this.last_encoding = .latin1; +// } +// var arraylist = list.listManaged(allocator); +// var bytes = std.mem.sliceAsBytes(str); +// try arraylist.ensureUnusedCapacity(bytes.len); +// @memcpy(arraylist.items.ptr + arraylist.items.len, bytes.ptr, bytes.len); +// list.update(arraylist); +// list.len += @intCast(u32, bytes.len); +// }, +// } +// } + +// return list; +// } + +// pub fn writeBytes(this: *Delayer, list_: BabyList(u8), str: []const u8, allocator: std.mem.Allocator) !BabyList(u8) { +// var list = list_; +// log("writeBytes({any}, {any})", .{ .delayer = this, .str = str }); + +// { +// switch (this.last_encoding) { +// .utf16 => { +// list = try this.flushUTF16(list, allocator); +// }, +// .latin1 => { +// list = try this.flushLatin1(list, allocator); +// }, +// else => {}, +// } + +// var arraylist = list.listManaged(allocator); +// var bytes = std.mem.sliceAsBytes(str); +// try arraylist.ensureUnusedCapacity(bytes.len); +// @memcpy(arraylist.items.ptr + arraylist.items.len, bytes.ptr, bytes.len); +// list.update(arraylist); +// list.len += @intCast(u32, bytes.len); +// this.last_write = list.len; +// this.last_encoding = .bytes; +// } + +// return list; +// } +// }; /// This is like ArrayList except it stores the length and capacity as u32 /// In practice, it is very unusual to have lengths above 4 GB @@ -125,24 +352,34 @@ pub fn BabyList(comptime Type: type) type { @compileError("Unsupported for type " ++ @typeName(Type)); var list_ = this.listManaged(allocator); - defer this.update(list_); - try list_.ensureTotalCapacityPrecise(list_.items.len + str.len); const initial = this.len; - var remain = str; - while (remain.len > 0) { - const orig_len = list_.items.len; - - var slice_ = list_.items.ptr[orig_len..list_.capacity]; - const result = strings.copyUTF16IntoUTF8(slice_, []const u16, remain); - remain = remain[result.read..]; - list_.items.len += @as(usize, result.written); - if (remain.len > 0) { - try list_.ensureTotalCapacityPrecise(list_.items.len + strings.elementLengthUTF16IntoUTF8([]const u16, remain)); - continue; + { + defer this.update(list_); + try list_.ensureTotalCapacityPrecise(list_.items.len + str.len + 4); + + var remain = str; + while (remain.len > 0) { + const orig_len = list_.items.len; + + var slice_ = list_.items.ptr[orig_len..list_.capacity]; + const result = strings.copyUTF16IntoUTF8(slice_, []const u16, remain); + remain = remain[result.read..]; + list_.items.len += @as(usize, result.written); + if (remain.len > 0) { + try list_.ensureTotalCapacityPrecise(list_.items.len + strings.elementLengthUTF16IntoUTF8([]const u16, remain)); + continue; + } + if (result.read == 0 or result.written == 0) break; } - if (result.read == 0 or result.written == 0) break; } + if (comptime Environment.allow_assert) { + // sanity check that encoding produced a consistent result + var allocated = try strings.toUTF8Alloc(allocator, str); + defer allocator.free(allocated); + const encoded = this.ptr[initial..this.len]; + std.testing.expectEqualStrings(allocated, encoded) catch unreachable; + } return this.len - initial; } }; |