markbinding for utf8 length from utf16le

author: Dylan Conway <dylan.conway567@gmail.com> 2022-12-12 13:41:55 -0800
committer: Dylan Conway <dylan.conway567@gmail.com> 2022-12-12 13:41:55 -0800
commit: bbc2dacd840709f2fdf932ae27c978078f348ac6 (patch)
tree: 781017e67fba24dff67ce1a0109d85b49b55be6b /src
parent: 0bb9493e4700b630e19abf48bd2b6bc7db417d9c (diff)
download: bun-bbc2dacd840709f2fdf932ae27c978078f348ac6.tar.gz
bun-bbc2dacd840709f2fdf932ae27c978078f348ac6.tar.zst
bun-bbc2dacd840709f2fdf932ae27c978078f348ac6.zip
2 files changed, 3 insertions, 226 deletions
diff --git a/src/baby_list.zig b/src/baby_list.zig
index 9876f5083..9f1580294 100644
--- a/src/baby_list.zig
+++ b/src/baby_list.zig
@@ -3,232 +3,6 @@ const Environment = @import("./env.zig");
 const strings = @import("./string_immutable.zig");
 const bun = @import("bun");
 
-// -- Failed Experiment --
-// Delete this code later
-// -- Failed Experiment --
-// Writing tons of < 8 byte chunks is kind of expensive
-// because we have to loop through them to copy and then again to encode
-// It would be faster if we could use SIMD
-// but the behavior is out of our control
-// so instead, we copy the _unencoded_ bytes to a buffer
-// Then, just before we send it over the network, we encode it, usually in-place
-// The caveat is if the encoding changes
-// pub const Delayer = struct {
-//     last_write: u32 = 0,
-//     last_encoding: Encoding = Encoding.bytes,
-
-//     const log = bun.Output.scoped(.Delayer, true);
-//     pub const Encoding = enum {
-//         bytes,
-//         utf16,
-//         latin1,
-//     };
-
-//     fn flushLatin1(this: *Delayer, list_: BabyList(u8), allocator: std.mem.Allocator) !BabyList(u8) {
-//         var list = list_;
-//         var remain = list.slice()[this.last_write..];
-//         const element_count = strings.elementLengthLatin1IntoUTF8([]const u8, remain);
-//         log("flushLatin1({any}, {any})", .{ .element_count = element_count, .remain = remain.len });
-//         // common case: nothing to do, it's just ascii
-//         if (element_count == remain.len) {
-//             this.last_write += @truncate(u32, remain.len);
-
-//             return list;
-//         }
-
-//         std.debug.assert(element_count > remain.len);
-//         var arraylist = list.listManaged(allocator);
-//         // assert we have enough room
-//         try arraylist.ensureUnusedCapacity(element_count - remain.len);
-
-//         list.update(arraylist);
-//         var read_remain = arraylist.items.ptr[this.last_write..arraylist.items.len];
-//         var write_remain = arraylist.items.ptr[this.last_write .. arraylist.items.len + (element_count - remain.len)];
-//         std.debug.assert(write_remain.len > 0);
-//         std.debug.assert(read_remain.len > 0);
-//         std.debug.assert(write_remain.len > read_remain.len);
-//         this.last_write += @truncate(u32, write_remain.len);
-//         list.len += @intCast(u32, element_count - remain.len);
-
-//         // faster path: stack allocated buffer
-//         if (write_remain.len <= 4096) {
-//             var buf: [4096]u8 = undefined;
-//             const result = strings.copyLatin1IntoUTF8(&buf, []const u8, remain);
-//             std.debug.assert(@as(usize, result.written) == write_remain.len);
-//             std.debug.assert(@as(usize, result.read) == read_remain.len);
-//             @memcpy(write_remain.ptr, &buf, write_remain.len);
-//         } else {
-//             // slow path
-//             var temp_buf = try allocator.dupe(u8, read_remain);
-//             defer allocator.free(temp_buf);
-//             const result = strings.copyLatin1IntoUTF8(write_remain, []const u8, temp_buf);
-
-//             std.debug.assert(@as(usize, result.written) == write_remain.len);
-//             std.debug.assert(@as(usize, result.read) == read_remain.len);
-//         }
-
-//         return list;
-//     }
-//     fn flushUTF16(this: *Delayer, list_: BabyList(u8), allocator: std.mem.Allocator) !BabyList(u8) {
-//         var list = list_;
-//         var remain = std.mem.bytesAsSlice(u16, list.slice()[this.last_write..]);
-//         const element_count = strings.elementLengthUTF16IntoUTF8(@TypeOf(remain), remain) * 2;
-//         log("flushUTF16({any}, {any})", .{ .element_count = element_count, .remain = remain.len });
-
-//         var arraylist = list.listManaged(allocator);
-
-//         // assert we have enough room
-//         const grow = element_count - list.slice()[this.last_write..].len;
-//         try arraylist.ensureUnusedCapacity(grow);
-//         list.update(arraylist);
-//         var write_remain = arraylist.items.ptr[this.last_write .. arraylist.items.len + grow];
-//         this.last_write += @truncate(u32, grow);
-//         list.len += @intCast(u32, grow);
-
-//         var buf: [4096]u8 = undefined;
-
-//         if (element_count < buf.len) {
-//             const result = strings.copyUTF16IntoUTF8(&buf, @TypeOf(remain), remain);
-//             std.debug.assert(@as(usize, result.written * 2) == write_remain.len);
-//             std.debug.assert(@as(usize, result.read) == remain.len);
-//             @memcpy(write_remain.ptr, &buf, write_remain.len);
-//         } else {
-//             // slow path
-//             var temp_buf = try allocator.alloc(u16, remain.len);
-//             @memcpy(std.mem.sliceAsBytes(temp_buf).ptr, std.mem.sliceAsBytes(remain).ptr, std.mem.sliceAsBytes(remain).len);
-//             defer allocator.free(temp_buf);
-//             const result = strings.copyUTF16IntoUTF8(write_remain, @TypeOf(temp_buf), temp_buf);
-//             std.debug.assert(@as(usize, result.written * 2) == write_remain.len);
-//             std.debug.assert(@as(usize, result.read) == remain.len);
-//         }
-
-//         return list;
-//     }
-//     pub fn writeUTF16(this: *Delayer, list_: BabyList(u8), str: []const u16, allocator: std.mem.Allocator) !BabyList(u8) {
-//         var list = list_;
-//         log("writeUTF16({any}, {any})", .{ .delayer = this, .len = str.len });
-
-//         {
-//             switch (this.last_encoding) {
-//                 .latin1 => {
-//                     list = try this.flushLatin1(list, allocator);
-//                     this.last_write = list.len;
-//                     var arraylist = list.listManaged(allocator);
-//                     var bytes = std.mem.sliceAsBytes(str);
-//                     try arraylist.ensureUnusedCapacity(bytes.len);
-//                     @memcpy(arraylist.items.ptr + arraylist.items.len, bytes.ptr, bytes.len);
-//                     this.last_encoding = .utf16;
-//                     list.update(arraylist);
-//                     list.len += @intCast(u32, bytes.len);
-//                     return list;
-//                 },
-//                 .bytes, .utf16 => |enc| {
-//                     if (enc == .bytes) {
-//                         this.last_write = list.len;
-//                         this.last_encoding = .utf16;
-//                     }
-//                     var arraylist = list.listManaged(allocator);
-//                     var bytes = std.mem.sliceAsBytes(str);
-//                     try arraylist.ensureUnusedCapacity(bytes.len);
-//                     @memcpy(arraylist.items.ptr + arraylist.items.len, bytes.ptr, bytes.len);
-//                     list.update(arraylist);
-//                     list.len += @intCast(u32, bytes.len);
-//                 },
-//             }
-//         }
-
-//         return list;
-//     }
-
-//     pub fn flush(this: *Delayer, list_: BabyList(u8), allocator: std.mem.Allocator) !BabyList(u8) {
-//         if (this.last_encoding == .bytes) {
-//             std.debug.assert(this.last_write == list_.len);
-//             return list_;
-//         }
-
-//         var list = list_;
-//         switch (this.last_encoding) {
-//             .utf16 => {
-//                 list = try this.flushUTF16(list_, allocator);
-//                 this.last_write = list.len;
-//                 this.last_encoding = .bytes;
-//             },
-//             .latin1 => {
-//                 list = try this.flushLatin1(list_, allocator);
-//                 this.last_write = list.len;
-//                 this.last_encoding = .bytes;
-//             },
-//             .bytes => unreachable,
-//         }
-
-//         return list;
-//     }
-
-//     pub fn writeLatin1(this: *Delayer, list_: BabyList(u8), str: []const u8, allocator: std.mem.Allocator) !BabyList(u8) {
-//         var list = list_;
-//         log("writeLatin1({any}, {s})", .{ .delayer = this, .str = str });
-
-//         {
-//             switch (this.last_encoding) {
-//                 .utf16 => {
-//                     list = try this.flushUTF16(list, allocator);
-//                     this.last_write = list.len;
-//                     var arraylist = list.listManaged(allocator);
-//                     var bytes = std.mem.sliceAsBytes(str);
-//                     try arraylist.ensureUnusedCapacity(bytes.len);
-//                     @memcpy(arraylist.items.ptr + arraylist.items.len, bytes.ptr, bytes.len);
-//                     this.last_encoding = .latin1;
-//                     list.update(arraylist);
-//                     list.len += @intCast(u32, bytes.len);
-//                     return list;
-//                 },
-//                 .bytes, .latin1 => |enc| {
-//                     if (enc == .bytes) {
-//                         this.last_write = list.len;
-//                         this.last_encoding = .latin1;
-//                     }
-//                     var arraylist = list.listManaged(allocator);
-//                     var bytes = std.mem.sliceAsBytes(str);
-//                     try arraylist.ensureUnusedCapacity(bytes.len);
-//                     @memcpy(arraylist.items.ptr + arraylist.items.len, bytes.ptr, bytes.len);
-//                     list.update(arraylist);
-//                     list.len += @intCast(u32, bytes.len);
-//                 },
-//             }
-//         }
-
-//         return list;
-//     }
-
-//     pub fn writeBytes(this: *Delayer, list_: BabyList(u8), str: []const u8, allocator: std.mem.Allocator) !BabyList(u8) {
-//         var list = list_;
-//         log("writeBytes({any}, {any})", .{ .delayer = this, .str = str });
-
-//         {
-//             switch (this.last_encoding) {
-//                 .utf16 => {
-//                     list = try this.flushUTF16(list, allocator);
-//                 },
-//                 .latin1 => {
-//                     list = try this.flushLatin1(list, allocator);
-//                 },
-//                 else => {},
-//             }
-
-//             var arraylist = list.listManaged(allocator);
-//             var bytes = std.mem.sliceAsBytes(str);
-//             try arraylist.ensureUnusedCapacity(bytes.len);
-//             @memcpy(arraylist.items.ptr + arraylist.items.len, bytes.ptr, bytes.len);
-//             list.update(arraylist);
-//             list.len += @intCast(u32, bytes.len);
-//             this.last_write = list.len;
-//             this.last_encoding = .bytes;
-//         }
-
-//         return list;
-//     }
-// };
-
 /// This is like ArrayList except it stores the length and capacity as u32
 /// In practice, it is very unusual to have lengths above 4 GB
 ///
diff --git a/src/bun.js/bindings/bun-simdutf.zig b/src/bun.js/bindings/bun-simdutf.zig
index 531e9c3ef..4d05c4ce1 100644
--- a/src/bun.js/bindings/bun-simdutf.zig
+++ b/src/bun.js/bindings/bun-simdutf.zig
@@ -1,3 +1,5 @@
+const JSC = @import("bun").JSC;
+
 pub const SIMDUTFResult = extern struct {
     status: Status,
     count: usize = 0,
@@ -237,6 +239,7 @@ pub const length = struct {
         pub const from = struct {
             pub const utf16 = struct {
                 pub fn le(input: []const u16) usize {
+                    JSC.markBinding(@src());
                     return simdutf__utf8_length_from_utf16le(input.ptr, input.len);
                 }
                 pub fn be(input: []const u16) usize {
author	Dylan Conway <dylan.conway567@gmail.com>	2022-12-12 13:41:55 -0800
committer	Dylan Conway <dylan.conway567@gmail.com>	2022-12-12 13:41:55 -0800
commit	bbc2dacd840709f2fdf932ae27c978078f348ac6 (patch)
tree	781017e67fba24dff67ce1a0109d85b49b55be6b /src
parent	0bb9493e4700b630e19abf48bd2b6bc7db417d9c (diff)
download	bun-bbc2dacd840709f2fdf932ae27c978078f348ac6.tar.gz bun-bbc2dacd840709f2fdf932ae27c978078f348ac6.tar.zst bun-bbc2dacd840709f2fdf932ae27c978078f348ac6.zip