diff options
-rw-r--r-- | src/baby_list.zig | 22 | ||||
-rw-r--r-- | src/string_immutable.zig | 25 |
2 files changed, 39 insertions, 8 deletions
diff --git a/src/baby_list.zig b/src/baby_list.zig index 298727c0b..9876f5083 100644 --- a/src/baby_list.zig +++ b/src/baby_list.zig @@ -372,16 +372,32 @@ pub fn BabyList(comptime Type: type) type { var list_ = this.listManaged(allocator); const initial = this.len; - { + outer: { defer this.update(list_); - try list_.ensureTotalCapacityPrecise(list_.items.len + strings.elementLengthUTF16IntoUTF8([]const u16, str)); + const trimmed = bun.simdutf.trim.utf16(str); + if (trimmed.len == 0) + break :outer; + const available_len = (list_.capacity - list_.items.len); + + // maximum UTF-16 length is 3 times the UTF-8 length + 2 + // only do the pass over the input length if we may not have enough space + const out_len = if (available_len <= (trimmed.len * 3 + 2)) + bun.simdutf.length.utf8.from.utf16.le(trimmed) + else + str.len; + + if (out_len == 0) + break :outer; + + // intentionally over-allocate a little + try list_.ensureTotalCapacity(list_.items.len + out_len); var remain = str; while (remain.len > 0) { const orig_len = list_.items.len; var slice_ = list_.items.ptr[orig_len..list_.capacity]; - const result = strings.copyUTF16IntoUTF8(slice_, []const u16, remain); + const result = strings.copyUTF16IntoUTF8WithBuffer(slice_, []const u16, remain, trimmed, out_len); remain = remain[result.read..]; list_.items.len += @as(usize, result.written); if (result.read == 0 or result.written == 0) break; diff --git a/src/string_immutable.zig b/src/string_immutable.zig index 646917840..76b8b7073 100644 --- a/src/string_immutable.zig +++ b/src/string_immutable.zig @@ -2419,18 +2419,33 @@ pub fn latin1ToCodepointBytesAssumeNotASCII16(char: u32) u16 { } pub fn copyUTF16IntoUTF8(buf: []u8, comptime Type: type, utf16: Type) EncodeIntoResult { - var remaining = buf; - var utf16_remaining = utf16; - var ended_on_non_ascii = false; - if (comptime Type == []const u16) { if (bun.FeatureFlags.use_simdutf) { - const trimmed = bun.simdutf.trim.utf16(utf16_remaining); + if (utf16.len == 0) + return .{ .read = 0, .written = 0 }; + const trimmed = bun.simdutf.trim.utf16(utf16); + if (trimmed.len == 0) + return .{ .read = 0, .written = 0 }; + const out_len = if (buf.len <= (trimmed.len * 3 + 2)) bun.simdutf.length.utf8.from.utf16.le(trimmed) else buf.len; + return copyUTF16IntoUTF8WithBuffer(buf, Type, utf16, trimmed, out_len); + } + } + + return copyUTF16IntoUTF8WithBuffer(buf, Type, utf16, utf16, utf16.len); +} + +pub fn copyUTF16IntoUTF8WithBuffer(buf: []u8, comptime Type: type, utf16: Type, trimmed: Type, out_len: usize) EncodeIntoResult { + var remaining = buf; + var utf16_remaining = utf16; + var ended_on_non_ascii = false; + + if (comptime Type == []const u16) { + if (bun.FeatureFlags.use_simdutf) { log("UTF16 {d} -> UTF8 {d}", .{ utf16.len, out_len }); if (remaining.len >= out_len) { |