diff options
| author | 2022-12-12 13:41:55 -0800 | |
|---|---|---|
| committer | 2022-12-12 13:41:55 -0800 | |
| commit | bbc2dacd840709f2fdf932ae27c978078f348ac6 (patch) | |
| tree | 781017e67fba24dff67ce1a0109d85b49b55be6b /src | |
| parent | 0bb9493e4700b630e19abf48bd2b6bc7db417d9c (diff) | |
| download | bun-bbc2dacd840709f2fdf932ae27c978078f348ac6.tar.gz bun-bbc2dacd840709f2fdf932ae27c978078f348ac6.tar.zst bun-bbc2dacd840709f2fdf932ae27c978078f348ac6.zip | |
markbinding for utf8 length from utf16le
Diffstat (limited to 'src')
| -rw-r--r-- | src/baby_list.zig | 226 | ||||
| -rw-r--r-- | src/bun.js/bindings/bun-simdutf.zig | 3 |
2 files changed, 3 insertions, 226 deletions
diff --git a/src/baby_list.zig b/src/baby_list.zig index 9876f5083..9f1580294 100644 --- a/src/baby_list.zig +++ b/src/baby_list.zig @@ -3,232 +3,6 @@ const Environment = @import("./env.zig"); const strings = @import("./string_immutable.zig"); const bun = @import("bun"); -// -- Failed Experiment -- -// Delete this code later -// -- Failed Experiment -- -// Writing tons of < 8 byte chunks is kind of expensive -// because we have to loop through them to copy and then again to encode -// It would be faster if we could use SIMD -// but the behavior is out of our control -// so instead, we copy the _unencoded_ bytes to a buffer -// Then, just before we send it over the network, we encode it, usually in-place -// The caveat is if the encoding changes -// pub const Delayer = struct { -// last_write: u32 = 0, -// last_encoding: Encoding = Encoding.bytes, - -// const log = bun.Output.scoped(.Delayer, true); -// pub const Encoding = enum { -// bytes, -// utf16, -// latin1, -// }; - -// fn flushLatin1(this: *Delayer, list_: BabyList(u8), allocator: std.mem.Allocator) !BabyList(u8) { -// var list = list_; -// var remain = list.slice()[this.last_write..]; -// const element_count = strings.elementLengthLatin1IntoUTF8([]const u8, remain); -// log("flushLatin1({any}, {any})", .{ .element_count = element_count, .remain = remain.len }); -// // common case: nothing to do, it's just ascii -// if (element_count == remain.len) { -// this.last_write += @truncate(u32, remain.len); - -// return list; -// } - -// std.debug.assert(element_count > remain.len); -// var arraylist = list.listManaged(allocator); -// // assert we have enough room -// try arraylist.ensureUnusedCapacity(element_count - remain.len); - -// list.update(arraylist); -// var read_remain = arraylist.items.ptr[this.last_write..arraylist.items.len]; -// var write_remain = arraylist.items.ptr[this.last_write .. arraylist.items.len + (element_count - remain.len)]; -// std.debug.assert(write_remain.len > 0); -// std.debug.assert(read_remain.len > 0); -// std.debug.assert(write_remain.len > read_remain.len); -// this.last_write += @truncate(u32, write_remain.len); -// list.len += @intCast(u32, element_count - remain.len); - -// // faster path: stack allocated buffer -// if (write_remain.len <= 4096) { -// var buf: [4096]u8 = undefined; -// const result = strings.copyLatin1IntoUTF8(&buf, []const u8, remain); -// std.debug.assert(@as(usize, result.written) == write_remain.len); -// std.debug.assert(@as(usize, result.read) == read_remain.len); -// @memcpy(write_remain.ptr, &buf, write_remain.len); -// } else { -// // slow path -// var temp_buf = try allocator.dupe(u8, read_remain); -// defer allocator.free(temp_buf); -// const result = strings.copyLatin1IntoUTF8(write_remain, []const u8, temp_buf); - -// std.debug.assert(@as(usize, result.written) == write_remain.len); -// std.debug.assert(@as(usize, result.read) == read_remain.len); -// } - -// return list; -// } -// fn flushUTF16(this: *Delayer, list_: BabyList(u8), allocator: std.mem.Allocator) !BabyList(u8) { -// var list = list_; -// var remain = std.mem.bytesAsSlice(u16, list.slice()[this.last_write..]); -// const element_count = strings.elementLengthUTF16IntoUTF8(@TypeOf(remain), remain) * 2; -// log("flushUTF16({any}, {any})", .{ .element_count = element_count, .remain = remain.len }); - -// var arraylist = list.listManaged(allocator); - -// // assert we have enough room -// const grow = element_count - list.slice()[this.last_write..].len; -// try arraylist.ensureUnusedCapacity(grow); -// list.update(arraylist); -// var write_remain = arraylist.items.ptr[this.last_write .. arraylist.items.len + grow]; -// this.last_write += @truncate(u32, grow); -// list.len += @intCast(u32, grow); - -// var buf: [4096]u8 = undefined; - -// if (element_count < buf.len) { -// const result = strings.copyUTF16IntoUTF8(&buf, @TypeOf(remain), remain); -// std.debug.assert(@as(usize, result.written * 2) == write_remain.len); -// std.debug.assert(@as(usize, result.read) == remain.len); -// @memcpy(write_remain.ptr, &buf, write_remain.len); -// } else { -// // slow path -// var temp_buf = try allocator.alloc(u16, remain.len); -// @memcpy(std.mem.sliceAsBytes(temp_buf).ptr, std.mem.sliceAsBytes(remain).ptr, std.mem.sliceAsBytes(remain).len); -// defer allocator.free(temp_buf); -// const result = strings.copyUTF16IntoUTF8(write_remain, @TypeOf(temp_buf), temp_buf); -// std.debug.assert(@as(usize, result.written * 2) == write_remain.len); -// std.debug.assert(@as(usize, result.read) == remain.len); -// } - -// return list; -// } -// pub fn writeUTF16(this: *Delayer, list_: BabyList(u8), str: []const u16, allocator: std.mem.Allocator) !BabyList(u8) { -// var list = list_; -// log("writeUTF16({any}, {any})", .{ .delayer = this, .len = str.len }); - -// { -// switch (this.last_encoding) { -// .latin1 => { -// list = try this.flushLatin1(list, allocator); -// this.last_write = list.len; -// var arraylist = list.listManaged(allocator); -// var bytes = std.mem.sliceAsBytes(str); -// try arraylist.ensureUnusedCapacity(bytes.len); -// @memcpy(arraylist.items.ptr + arraylist.items.len, bytes.ptr, bytes.len); -// this.last_encoding = .utf16; -// list.update(arraylist); -// list.len += @intCast(u32, bytes.len); -// return list; -// }, -// .bytes, .utf16 => |enc| { -// if (enc == .bytes) { -// this.last_write = list.len; -// this.last_encoding = .utf16; -// } -// var arraylist = list.listManaged(allocator); -// var bytes = std.mem.sliceAsBytes(str); -// try arraylist.ensureUnusedCapacity(bytes.len); -// @memcpy(arraylist.items.ptr + arraylist.items.len, bytes.ptr, bytes.len); -// list.update(arraylist); -// list.len += @intCast(u32, bytes.len); -// }, -// } -// } - -// return list; -// } - -// pub fn flush(this: *Delayer, list_: BabyList(u8), allocator: std.mem.Allocator) !BabyList(u8) { -// if (this.last_encoding == .bytes) { -// std.debug.assert(this.last_write == list_.len); -// return list_; -// } - -// var list = list_; -// switch (this.last_encoding) { -// .utf16 => { -// list = try this.flushUTF16(list_, allocator); -// this.last_write = list.len; -// this.last_encoding = .bytes; -// }, -// .latin1 => { -// list = try this.flushLatin1(list_, allocator); -// this.last_write = list.len; -// this.last_encoding = .bytes; -// }, -// .bytes => unreachable, -// } - -// return list; -// } - -// pub fn writeLatin1(this: *Delayer, list_: BabyList(u8), str: []const u8, allocator: std.mem.Allocator) !BabyList(u8) { -// var list = list_; -// log("writeLatin1({any}, {s})", .{ .delayer = this, .str = str }); - -// { -// switch (this.last_encoding) { -// .utf16 => { -// list = try this.flushUTF16(list, allocator); -// this.last_write = list.len; -// var arraylist = list.listManaged(allocator); -// var bytes = std.mem.sliceAsBytes(str); -// try arraylist.ensureUnusedCapacity(bytes.len); -// @memcpy(arraylist.items.ptr + arraylist.items.len, bytes.ptr, bytes.len); -// this.last_encoding = .latin1; -// list.update(arraylist); -// list.len += @intCast(u32, bytes.len); -// return list; -// }, -// .bytes, .latin1 => |enc| { -// if (enc == .bytes) { -// this.last_write = list.len; -// this.last_encoding = .latin1; -// } -// var arraylist = list.listManaged(allocator); -// var bytes = std.mem.sliceAsBytes(str); -// try arraylist.ensureUnusedCapacity(bytes.len); -// @memcpy(arraylist.items.ptr + arraylist.items.len, bytes.ptr, bytes.len); -// list.update(arraylist); -// list.len += @intCast(u32, bytes.len); -// }, -// } -// } - -// return list; -// } - -// pub fn writeBytes(this: *Delayer, list_: BabyList(u8), str: []const u8, allocator: std.mem.Allocator) !BabyList(u8) { -// var list = list_; -// log("writeBytes({any}, {any})", .{ .delayer = this, .str = str }); - -// { -// switch (this.last_encoding) { -// .utf16 => { -// list = try this.flushUTF16(list, allocator); -// }, -// .latin1 => { -// list = try this.flushLatin1(list, allocator); -// }, -// else => {}, -// } - -// var arraylist = list.listManaged(allocator); -// var bytes = std.mem.sliceAsBytes(str); -// try arraylist.ensureUnusedCapacity(bytes.len); -// @memcpy(arraylist.items.ptr + arraylist.items.len, bytes.ptr, bytes.len); -// list.update(arraylist); -// list.len += @intCast(u32, bytes.len); -// this.last_write = list.len; -// this.last_encoding = .bytes; -// } - -// return list; -// } -// }; - /// This is like ArrayList except it stores the length and capacity as u32 /// In practice, it is very unusual to have lengths above 4 GB /// diff --git a/src/bun.js/bindings/bun-simdutf.zig b/src/bun.js/bindings/bun-simdutf.zig index 531e9c3ef..4d05c4ce1 100644 --- a/src/bun.js/bindings/bun-simdutf.zig +++ b/src/bun.js/bindings/bun-simdutf.zig @@ -1,3 +1,5 @@ +const JSC = @import("bun").JSC; + pub const SIMDUTFResult = extern struct { status: Status, count: usize = 0, @@ -237,6 +239,7 @@ pub const length = struct { pub const from = struct { pub const utf16 = struct { pub fn le(input: []const u16) usize { + JSC.markBinding(@src()); return simdutf__utf8_length_from_utf16le(input.ptr, input.len); } pub fn be(input: []const u16) usize { |
