aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Jarred Sumner <709451+Jarred-Sumner@users.noreply.github.com> 2022-06-30 19:23:36 -0700
committerGravatar Jarred Sumner <709451+Jarred-Sumner@users.noreply.github.com> 2022-06-30 19:23:36 -0700
commit39d111fa63c73f8e023ca5f8ad4748db63e1caf9 (patch)
tree2d76eb7fd4c3ae7ab12107b93dc8c2dd99143b99
parent4821b9c10bdec3660c20db01713a21c0e13172ea (diff)
downloadbun-39d111fa63c73f8e023ca5f8ad4748db63e1caf9.tar.gz
bun-39d111fa63c73f8e023ca5f8ad4748db63e1caf9.tar.zst
bun-39d111fa63c73f8e023ca5f8ad4748db63e1caf9.zip
Document failed experiment
-rw-r--r--src/baby_list.zig265
1 files changed, 251 insertions, 14 deletions
diff --git a/src/baby_list.zig b/src/baby_list.zig
index f33216fb8..08745e2fe 100644
--- a/src/baby_list.zig
+++ b/src/baby_list.zig
@@ -1,6 +1,233 @@
const std = @import("std");
const Environment = @import("./env.zig");
const strings = @import("./string_immutable.zig");
+const bun = @import("./global.zig");
+
+// -- Failed Experiment --
+// Delete this code later
+// -- Failed Experiment --
+// Writing tons of < 8 byte chunks is kind of expensive
+// because we have to loop through them to copy and then again to encode
+// It would be faster if we could use SIMD
+// but the behavior is out of our control
+// so instead, we copy the _unencoded_ bytes to a buffer
+// Then, just before we send it over the network, we encode it, usually in-place
+// The caveat is if the encoding changes
+// pub const Delayer = struct {
+// last_write: u32 = 0,
+// last_encoding: Encoding = Encoding.bytes,
+
+// const log = bun.Output.scoped(.Delayer, true);
+// pub const Encoding = enum {
+// bytes,
+// utf16,
+// latin1,
+// };
+
+// fn flushLatin1(this: *Delayer, list_: BabyList(u8), allocator: std.mem.Allocator) !BabyList(u8) {
+// var list = list_;
+// var remain = list.slice()[this.last_write..];
+// const element_count = strings.elementLengthLatin1IntoUTF8([]const u8, remain);
+// log("flushLatin1({any}, {any})", .{ .element_count = element_count, .remain = remain.len });
+// // common case: nothing to do, it's just ascii
+// if (element_count == remain.len) {
+// this.last_write += @truncate(u32, remain.len);
+
+// return list;
+// }
+
+// std.debug.assert(element_count > remain.len);
+// var arraylist = list.listManaged(allocator);
+// // assert we have enough room
+// try arraylist.ensureUnusedCapacity(element_count - remain.len);
+
+// list.update(arraylist);
+// var read_remain = arraylist.items.ptr[this.last_write..arraylist.items.len];
+// var write_remain = arraylist.items.ptr[this.last_write .. arraylist.items.len + (element_count - remain.len)];
+// std.debug.assert(write_remain.len > 0);
+// std.debug.assert(read_remain.len > 0);
+// std.debug.assert(write_remain.len > read_remain.len);
+// this.last_write += @truncate(u32, write_remain.len);
+// list.len += @intCast(u32, element_count - remain.len);
+
+// // faster path: stack allocated buffer
+// if (write_remain.len <= 4096) {
+// var buf: [4096]u8 = undefined;
+// const result = strings.copyLatin1IntoUTF8(&buf, []const u8, remain);
+// std.debug.assert(@as(usize, result.written) == write_remain.len);
+// std.debug.assert(@as(usize, result.read) == read_remain.len);
+// @memcpy(write_remain.ptr, &buf, write_remain.len);
+// } else {
+// // slow path
+// var temp_buf = try allocator.dupe(u8, read_remain);
+// defer allocator.free(temp_buf);
+// const result = strings.copyLatin1IntoUTF8(write_remain, []const u8, temp_buf);
+
+// std.debug.assert(@as(usize, result.written) == write_remain.len);
+// std.debug.assert(@as(usize, result.read) == read_remain.len);
+// }
+
+// return list;
+// }
+// fn flushUTF16(this: *Delayer, list_: BabyList(u8), allocator: std.mem.Allocator) !BabyList(u8) {
+// var list = list_;
+// var remain = std.mem.bytesAsSlice(u16, list.slice()[this.last_write..]);
+// const element_count = strings.elementLengthUTF16IntoUTF8(@TypeOf(remain), remain) * 2;
+// log("flushUTF16({any}, {any})", .{ .element_count = element_count, .remain = remain.len });
+
+// var arraylist = list.listManaged(allocator);
+
+// // assert we have enough room
+// const grow = element_count - list.slice()[this.last_write..].len;
+// try arraylist.ensureUnusedCapacity(grow);
+// list.update(arraylist);
+// var write_remain = arraylist.items.ptr[this.last_write .. arraylist.items.len + grow];
+// this.last_write += @truncate(u32, grow);
+// list.len += @intCast(u32, grow);
+
+// var buf: [4096]u8 = undefined;
+
+// if (element_count < buf.len) {
+// const result = strings.copyUTF16IntoUTF8(&buf, @TypeOf(remain), remain);
+// std.debug.assert(@as(usize, result.written * 2) == write_remain.len);
+// std.debug.assert(@as(usize, result.read) == remain.len);
+// @memcpy(write_remain.ptr, &buf, write_remain.len);
+// } else {
+// // slow path
+// var temp_buf = try allocator.alloc(u16, remain.len);
+// @memcpy(std.mem.sliceAsBytes(temp_buf).ptr, std.mem.sliceAsBytes(remain).ptr, std.mem.sliceAsBytes(remain).len);
+// defer allocator.free(temp_buf);
+// const result = strings.copyUTF16IntoUTF8(write_remain, @TypeOf(temp_buf), temp_buf);
+// std.debug.assert(@as(usize, result.written * 2) == write_remain.len);
+// std.debug.assert(@as(usize, result.read) == remain.len);
+// }
+
+// return list;
+// }
+// pub fn writeUTF16(this: *Delayer, list_: BabyList(u8), str: []const u16, allocator: std.mem.Allocator) !BabyList(u8) {
+// var list = list_;
+// log("writeUTF16({any}, {any})", .{ .delayer = this, .len = str.len });
+
+// {
+// switch (this.last_encoding) {
+// .latin1 => {
+// list = try this.flushLatin1(list, allocator);
+// this.last_write = list.len;
+// var arraylist = list.listManaged(allocator);
+// var bytes = std.mem.sliceAsBytes(str);
+// try arraylist.ensureUnusedCapacity(bytes.len);
+// @memcpy(arraylist.items.ptr + arraylist.items.len, bytes.ptr, bytes.len);
+// this.last_encoding = .utf16;
+// list.update(arraylist);
+// list.len += @intCast(u32, bytes.len);
+// return list;
+// },
+// .bytes, .utf16 => |enc| {
+// if (enc == .bytes) {
+// this.last_write = list.len;
+// this.last_encoding = .utf16;
+// }
+// var arraylist = list.listManaged(allocator);
+// var bytes = std.mem.sliceAsBytes(str);
+// try arraylist.ensureUnusedCapacity(bytes.len);
+// @memcpy(arraylist.items.ptr + arraylist.items.len, bytes.ptr, bytes.len);
+// list.update(arraylist);
+// list.len += @intCast(u32, bytes.len);
+// },
+// }
+// }
+
+// return list;
+// }
+
+// pub fn flush(this: *Delayer, list_: BabyList(u8), allocator: std.mem.Allocator) !BabyList(u8) {
+// if (this.last_encoding == .bytes) {
+// std.debug.assert(this.last_write == list_.len);
+// return list_;
+// }
+
+// var list = list_;
+// switch (this.last_encoding) {
+// .utf16 => {
+// list = try this.flushUTF16(list_, allocator);
+// this.last_write = list.len;
+// this.last_encoding = .bytes;
+// },
+// .latin1 => {
+// list = try this.flushLatin1(list_, allocator);
+// this.last_write = list.len;
+// this.last_encoding = .bytes;
+// },
+// .bytes => unreachable,
+// }
+
+// return list;
+// }
+
+// pub fn writeLatin1(this: *Delayer, list_: BabyList(u8), str: []const u8, allocator: std.mem.Allocator) !BabyList(u8) {
+// var list = list_;
+// log("writeLatin1({any}, {s})", .{ .delayer = this, .str = str });
+
+// {
+// switch (this.last_encoding) {
+// .utf16 => {
+// list = try this.flushUTF16(list, allocator);
+// this.last_write = list.len;
+// var arraylist = list.listManaged(allocator);
+// var bytes = std.mem.sliceAsBytes(str);
+// try arraylist.ensureUnusedCapacity(bytes.len);
+// @memcpy(arraylist.items.ptr + arraylist.items.len, bytes.ptr, bytes.len);
+// this.last_encoding = .latin1;
+// list.update(arraylist);
+// list.len += @intCast(u32, bytes.len);
+// return list;
+// },
+// .bytes, .latin1 => |enc| {
+// if (enc == .bytes) {
+// this.last_write = list.len;
+// this.last_encoding = .latin1;
+// }
+// var arraylist = list.listManaged(allocator);
+// var bytes = std.mem.sliceAsBytes(str);
+// try arraylist.ensureUnusedCapacity(bytes.len);
+// @memcpy(arraylist.items.ptr + arraylist.items.len, bytes.ptr, bytes.len);
+// list.update(arraylist);
+// list.len += @intCast(u32, bytes.len);
+// },
+// }
+// }
+
+// return list;
+// }
+
+// pub fn writeBytes(this: *Delayer, list_: BabyList(u8), str: []const u8, allocator: std.mem.Allocator) !BabyList(u8) {
+// var list = list_;
+// log("writeBytes({any}, {any})", .{ .delayer = this, .str = str });
+
+// {
+// switch (this.last_encoding) {
+// .utf16 => {
+// list = try this.flushUTF16(list, allocator);
+// },
+// .latin1 => {
+// list = try this.flushLatin1(list, allocator);
+// },
+// else => {},
+// }
+
+// var arraylist = list.listManaged(allocator);
+// var bytes = std.mem.sliceAsBytes(str);
+// try arraylist.ensureUnusedCapacity(bytes.len);
+// @memcpy(arraylist.items.ptr + arraylist.items.len, bytes.ptr, bytes.len);
+// list.update(arraylist);
+// list.len += @intCast(u32, bytes.len);
+// this.last_write = list.len;
+// this.last_encoding = .bytes;
+// }
+
+// return list;
+// }
+// };
/// This is like ArrayList except it stores the length and capacity as u32
/// In practice, it is very unusual to have lengths above 4 GB
@@ -125,24 +352,34 @@ pub fn BabyList(comptime Type: type) type {
@compileError("Unsupported for type " ++ @typeName(Type));
var list_ = this.listManaged(allocator);
- defer this.update(list_);
- try list_.ensureTotalCapacityPrecise(list_.items.len + str.len);
const initial = this.len;
- var remain = str;
- while (remain.len > 0) {
- const orig_len = list_.items.len;
-
- var slice_ = list_.items.ptr[orig_len..list_.capacity];
- const result = strings.copyUTF16IntoUTF8(slice_, []const u16, remain);
- remain = remain[result.read..];
- list_.items.len += @as(usize, result.written);
- if (remain.len > 0) {
- try list_.ensureTotalCapacityPrecise(list_.items.len + strings.elementLengthUTF16IntoUTF8([]const u16, remain));
- continue;
+ {
+ defer this.update(list_);
+ try list_.ensureTotalCapacityPrecise(list_.items.len + str.len + 4);
+
+ var remain = str;
+ while (remain.len > 0) {
+ const orig_len = list_.items.len;
+
+ var slice_ = list_.items.ptr[orig_len..list_.capacity];
+ const result = strings.copyUTF16IntoUTF8(slice_, []const u16, remain);
+ remain = remain[result.read..];
+ list_.items.len += @as(usize, result.written);
+ if (remain.len > 0) {
+ try list_.ensureTotalCapacityPrecise(list_.items.len + strings.elementLengthUTF16IntoUTF8([]const u16, remain));
+ continue;
+ }
+ if (result.read == 0 or result.written == 0) break;
}
- if (result.read == 0 or result.written == 0) break;
}
+ if (comptime Environment.allow_assert) {
+ // sanity check that encoding produced a consistent result
+ var allocated = try strings.toUTF8Alloc(allocator, str);
+ defer allocator.free(allocated);
+ const encoded = this.ptr[initial..this.len];
+ std.testing.expectEqualStrings(allocated, encoded) catch unreachable;
+ }
return this.len - initial;
}
};