aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGravatar Dylan Conway <dylan.conway567@gmail.com> 2022-12-12 13:41:55 -0800
committerGravatar Dylan Conway <dylan.conway567@gmail.com> 2022-12-12 13:41:55 -0800
commitbbc2dacd840709f2fdf932ae27c978078f348ac6 (patch)
tree781017e67fba24dff67ce1a0109d85b49b55be6b /src
parent0bb9493e4700b630e19abf48bd2b6bc7db417d9c (diff)
downloadbun-bbc2dacd840709f2fdf932ae27c978078f348ac6.tar.gz
bun-bbc2dacd840709f2fdf932ae27c978078f348ac6.tar.zst
bun-bbc2dacd840709f2fdf932ae27c978078f348ac6.zip
markbinding for utf8 length from utf16le
Diffstat (limited to 'src')
-rw-r--r--src/baby_list.zig226
-rw-r--r--src/bun.js/bindings/bun-simdutf.zig3
2 files changed, 3 insertions, 226 deletions
diff --git a/src/baby_list.zig b/src/baby_list.zig
index 9876f5083..9f1580294 100644
--- a/src/baby_list.zig
+++ b/src/baby_list.zig
@@ -3,232 +3,6 @@ const Environment = @import("./env.zig");
const strings = @import("./string_immutable.zig");
const bun = @import("bun");
-// -- Failed Experiment --
-// Delete this code later
-// -- Failed Experiment --
-// Writing tons of < 8 byte chunks is kind of expensive
-// because we have to loop through them to copy and then again to encode
-// It would be faster if we could use SIMD
-// but the behavior is out of our control
-// so instead, we copy the _unencoded_ bytes to a buffer
-// Then, just before we send it over the network, we encode it, usually in-place
-// The caveat is if the encoding changes
-// pub const Delayer = struct {
-// last_write: u32 = 0,
-// last_encoding: Encoding = Encoding.bytes,
-
-// const log = bun.Output.scoped(.Delayer, true);
-// pub const Encoding = enum {
-// bytes,
-// utf16,
-// latin1,
-// };
-
-// fn flushLatin1(this: *Delayer, list_: BabyList(u8), allocator: std.mem.Allocator) !BabyList(u8) {
-// var list = list_;
-// var remain = list.slice()[this.last_write..];
-// const element_count = strings.elementLengthLatin1IntoUTF8([]const u8, remain);
-// log("flushLatin1({any}, {any})", .{ .element_count = element_count, .remain = remain.len });
-// // common case: nothing to do, it's just ascii
-// if (element_count == remain.len) {
-// this.last_write += @truncate(u32, remain.len);
-
-// return list;
-// }
-
-// std.debug.assert(element_count > remain.len);
-// var arraylist = list.listManaged(allocator);
-// // assert we have enough room
-// try arraylist.ensureUnusedCapacity(element_count - remain.len);
-
-// list.update(arraylist);
-// var read_remain = arraylist.items.ptr[this.last_write..arraylist.items.len];
-// var write_remain = arraylist.items.ptr[this.last_write .. arraylist.items.len + (element_count - remain.len)];
-// std.debug.assert(write_remain.len > 0);
-// std.debug.assert(read_remain.len > 0);
-// std.debug.assert(write_remain.len > read_remain.len);
-// this.last_write += @truncate(u32, write_remain.len);
-// list.len += @intCast(u32, element_count - remain.len);
-
-// // faster path: stack allocated buffer
-// if (write_remain.len <= 4096) {
-// var buf: [4096]u8 = undefined;
-// const result = strings.copyLatin1IntoUTF8(&buf, []const u8, remain);
-// std.debug.assert(@as(usize, result.written) == write_remain.len);
-// std.debug.assert(@as(usize, result.read) == read_remain.len);
-// @memcpy(write_remain.ptr, &buf, write_remain.len);
-// } else {
-// // slow path
-// var temp_buf = try allocator.dupe(u8, read_remain);
-// defer allocator.free(temp_buf);
-// const result = strings.copyLatin1IntoUTF8(write_remain, []const u8, temp_buf);
-
-// std.debug.assert(@as(usize, result.written) == write_remain.len);
-// std.debug.assert(@as(usize, result.read) == read_remain.len);
-// }
-
-// return list;
-// }
-// fn flushUTF16(this: *Delayer, list_: BabyList(u8), allocator: std.mem.Allocator) !BabyList(u8) {
-// var list = list_;
-// var remain = std.mem.bytesAsSlice(u16, list.slice()[this.last_write..]);
-// const element_count = strings.elementLengthUTF16IntoUTF8(@TypeOf(remain), remain) * 2;
-// log("flushUTF16({any}, {any})", .{ .element_count = element_count, .remain = remain.len });
-
-// var arraylist = list.listManaged(allocator);
-
-// // assert we have enough room
-// const grow = element_count - list.slice()[this.last_write..].len;
-// try arraylist.ensureUnusedCapacity(grow);
-// list.update(arraylist);
-// var write_remain = arraylist.items.ptr[this.last_write .. arraylist.items.len + grow];
-// this.last_write += @truncate(u32, grow);
-// list.len += @intCast(u32, grow);
-
-// var buf: [4096]u8 = undefined;
-
-// if (element_count < buf.len) {
-// const result = strings.copyUTF16IntoUTF8(&buf, @TypeOf(remain), remain);
-// std.debug.assert(@as(usize, result.written * 2) == write_remain.len);
-// std.debug.assert(@as(usize, result.read) == remain.len);
-// @memcpy(write_remain.ptr, &buf, write_remain.len);
-// } else {
-// // slow path
-// var temp_buf = try allocator.alloc(u16, remain.len);
-// @memcpy(std.mem.sliceAsBytes(temp_buf).ptr, std.mem.sliceAsBytes(remain).ptr, std.mem.sliceAsBytes(remain).len);
-// defer allocator.free(temp_buf);
-// const result = strings.copyUTF16IntoUTF8(write_remain, @TypeOf(temp_buf), temp_buf);
-// std.debug.assert(@as(usize, result.written * 2) == write_remain.len);
-// std.debug.assert(@as(usize, result.read) == remain.len);
-// }
-
-// return list;
-// }
-// pub fn writeUTF16(this: *Delayer, list_: BabyList(u8), str: []const u16, allocator: std.mem.Allocator) !BabyList(u8) {
-// var list = list_;
-// log("writeUTF16({any}, {any})", .{ .delayer = this, .len = str.len });
-
-// {
-// switch (this.last_encoding) {
-// .latin1 => {
-// list = try this.flushLatin1(list, allocator);
-// this.last_write = list.len;
-// var arraylist = list.listManaged(allocator);
-// var bytes = std.mem.sliceAsBytes(str);
-// try arraylist.ensureUnusedCapacity(bytes.len);
-// @memcpy(arraylist.items.ptr + arraylist.items.len, bytes.ptr, bytes.len);
-// this.last_encoding = .utf16;
-// list.update(arraylist);
-// list.len += @intCast(u32, bytes.len);
-// return list;
-// },
-// .bytes, .utf16 => |enc| {
-// if (enc == .bytes) {
-// this.last_write = list.len;
-// this.last_encoding = .utf16;
-// }
-// var arraylist = list.listManaged(allocator);
-// var bytes = std.mem.sliceAsBytes(str);
-// try arraylist.ensureUnusedCapacity(bytes.len);
-// @memcpy(arraylist.items.ptr + arraylist.items.len, bytes.ptr, bytes.len);
-// list.update(arraylist);
-// list.len += @intCast(u32, bytes.len);
-// },
-// }
-// }
-
-// return list;
-// }
-
-// pub fn flush(this: *Delayer, list_: BabyList(u8), allocator: std.mem.Allocator) !BabyList(u8) {
-// if (this.last_encoding == .bytes) {
-// std.debug.assert(this.last_write == list_.len);
-// return list_;
-// }
-
-// var list = list_;
-// switch (this.last_encoding) {
-// .utf16 => {
-// list = try this.flushUTF16(list_, allocator);
-// this.last_write = list.len;
-// this.last_encoding = .bytes;
-// },
-// .latin1 => {
-// list = try this.flushLatin1(list_, allocator);
-// this.last_write = list.len;
-// this.last_encoding = .bytes;
-// },
-// .bytes => unreachable,
-// }
-
-// return list;
-// }
-
-// pub fn writeLatin1(this: *Delayer, list_: BabyList(u8), str: []const u8, allocator: std.mem.Allocator) !BabyList(u8) {
-// var list = list_;
-// log("writeLatin1({any}, {s})", .{ .delayer = this, .str = str });
-
-// {
-// switch (this.last_encoding) {
-// .utf16 => {
-// list = try this.flushUTF16(list, allocator);
-// this.last_write = list.len;
-// var arraylist = list.listManaged(allocator);
-// var bytes = std.mem.sliceAsBytes(str);
-// try arraylist.ensureUnusedCapacity(bytes.len);
-// @memcpy(arraylist.items.ptr + arraylist.items.len, bytes.ptr, bytes.len);
-// this.last_encoding = .latin1;
-// list.update(arraylist);
-// list.len += @intCast(u32, bytes.len);
-// return list;
-// },
-// .bytes, .latin1 => |enc| {
-// if (enc == .bytes) {
-// this.last_write = list.len;
-// this.last_encoding = .latin1;
-// }
-// var arraylist = list.listManaged(allocator);
-// var bytes = std.mem.sliceAsBytes(str);
-// try arraylist.ensureUnusedCapacity(bytes.len);
-// @memcpy(arraylist.items.ptr + arraylist.items.len, bytes.ptr, bytes.len);
-// list.update(arraylist);
-// list.len += @intCast(u32, bytes.len);
-// },
-// }
-// }
-
-// return list;
-// }
-
-// pub fn writeBytes(this: *Delayer, list_: BabyList(u8), str: []const u8, allocator: std.mem.Allocator) !BabyList(u8) {
-// var list = list_;
-// log("writeBytes({any}, {any})", .{ .delayer = this, .str = str });
-
-// {
-// switch (this.last_encoding) {
-// .utf16 => {
-// list = try this.flushUTF16(list, allocator);
-// },
-// .latin1 => {
-// list = try this.flushLatin1(list, allocator);
-// },
-// else => {},
-// }
-
-// var arraylist = list.listManaged(allocator);
-// var bytes = std.mem.sliceAsBytes(str);
-// try arraylist.ensureUnusedCapacity(bytes.len);
-// @memcpy(arraylist.items.ptr + arraylist.items.len, bytes.ptr, bytes.len);
-// list.update(arraylist);
-// list.len += @intCast(u32, bytes.len);
-// this.last_write = list.len;
-// this.last_encoding = .bytes;
-// }
-
-// return list;
-// }
-// };
-
/// This is like ArrayList except it stores the length and capacity as u32
/// In practice, it is very unusual to have lengths above 4 GB
///
diff --git a/src/bun.js/bindings/bun-simdutf.zig b/src/bun.js/bindings/bun-simdutf.zig
index 531e9c3ef..4d05c4ce1 100644
--- a/src/bun.js/bindings/bun-simdutf.zig
+++ b/src/bun.js/bindings/bun-simdutf.zig
@@ -1,3 +1,5 @@
+const JSC = @import("bun").JSC;
+
pub const SIMDUTFResult = extern struct {
status: Status,
count: usize = 0,
@@ -237,6 +239,7 @@ pub const length = struct {
pub const from = struct {
pub const utf16 = struct {
pub fn le(input: []const u16) usize {
+ JSC.markBinding(@src());
return simdutf__utf8_length_from_utf16le(input.ptr, input.len);
}
pub fn be(input: []const u16) usize {