aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/baby_list.zig22
-rw-r--r--src/string_immutable.zig25
2 files changed, 39 insertions, 8 deletions
diff --git a/src/baby_list.zig b/src/baby_list.zig
index 298727c0b..9876f5083 100644
--- a/src/baby_list.zig
+++ b/src/baby_list.zig
@@ -372,16 +372,32 @@ pub fn BabyList(comptime Type: type) type {
var list_ = this.listManaged(allocator);
const initial = this.len;
- {
+ outer: {
defer this.update(list_);
- try list_.ensureTotalCapacityPrecise(list_.items.len + strings.elementLengthUTF16IntoUTF8([]const u16, str));
+ const trimmed = bun.simdutf.trim.utf16(str);
+ if (trimmed.len == 0)
+ break :outer;
+ const available_len = (list_.capacity - list_.items.len);
+
+ // maximum UTF-16 length is 3 times the UTF-8 length + 2
+ // only do the pass over the input length if we may not have enough space
+ const out_len = if (available_len <= (trimmed.len * 3 + 2))
+ bun.simdutf.length.utf8.from.utf16.le(trimmed)
+ else
+ str.len;
+
+ if (out_len == 0)
+ break :outer;
+
+ // intentionally over-allocate a little
+ try list_.ensureTotalCapacity(list_.items.len + out_len);
var remain = str;
while (remain.len > 0) {
const orig_len = list_.items.len;
var slice_ = list_.items.ptr[orig_len..list_.capacity];
- const result = strings.copyUTF16IntoUTF8(slice_, []const u16, remain);
+ const result = strings.copyUTF16IntoUTF8WithBuffer(slice_, []const u16, remain, trimmed, out_len);
remain = remain[result.read..];
list_.items.len += @as(usize, result.written);
if (result.read == 0 or result.written == 0) break;
diff --git a/src/string_immutable.zig b/src/string_immutable.zig
index 646917840..76b8b7073 100644
--- a/src/string_immutable.zig
+++ b/src/string_immutable.zig
@@ -2419,18 +2419,33 @@ pub fn latin1ToCodepointBytesAssumeNotASCII16(char: u32) u16 {
}
pub fn copyUTF16IntoUTF8(buf: []u8, comptime Type: type, utf16: Type) EncodeIntoResult {
- var remaining = buf;
- var utf16_remaining = utf16;
- var ended_on_non_ascii = false;
-
if (comptime Type == []const u16) {
if (bun.FeatureFlags.use_simdutf) {
- const trimmed = bun.simdutf.trim.utf16(utf16_remaining);
+ if (utf16.len == 0)
+ return .{ .read = 0, .written = 0 };
+ const trimmed = bun.simdutf.trim.utf16(utf16);
+ if (trimmed.len == 0)
+ return .{ .read = 0, .written = 0 };
+
const out_len = if (buf.len <= (trimmed.len * 3 + 2))
bun.simdutf.length.utf8.from.utf16.le(trimmed)
else
buf.len;
+ return copyUTF16IntoUTF8WithBuffer(buf, Type, utf16, trimmed, out_len);
+ }
+ }
+
+ return copyUTF16IntoUTF8WithBuffer(buf, Type, utf16, utf16, utf16.len);
+}
+
+pub fn copyUTF16IntoUTF8WithBuffer(buf: []u8, comptime Type: type, utf16: Type, trimmed: Type, out_len: usize) EncodeIntoResult {
+ var remaining = buf;
+ var utf16_remaining = utf16;
+ var ended_on_non_ascii = false;
+
+ if (comptime Type == []const u16) {
+ if (bun.FeatureFlags.use_simdutf) {
log("UTF16 {d} -> UTF8 {d}", .{ utf16.len, out_len });
if (remaining.len >= out_len) {