diff options
-rw-r--r-- | src/string_immutable.zig | 28 |
1 files changed, 9 insertions, 19 deletions
diff --git a/src/string_immutable.zig b/src/string_immutable.zig index 16caa133f..8f3859a34 100644 --- a/src/string_immutable.zig +++ b/src/string_immutable.zig @@ -1283,26 +1283,16 @@ pub fn utf16Codepoint(comptime Type: type, input: Type) UTF16Replacement { pub fn convertUTF16ToUTF8(list_: std.ArrayList(u8), comptime Type: type, utf16: Type) !std.ArrayList(u8) { var list = list_; - - var remaining_input = utf16; - var start: usize = 0; - - const replacement_char = [_]u8{ 239, 191, 189 }; - var result = bun.simdutf.convert.utf16.to.utf8.with_errors.le(remaining_input, list.items.ptr[start..list.capacity]); - list.items.len = result.count; - while (result.status == .surrogate) { - try list.ensureUnusedCapacity(3); - list.items.len += 3; - start += result.count; - - list.items[start..][0..replacement_char.len].* = replacement_char; - remaining_input = remaining_input[result.count + 1 ..]; - start += replacement_char.len; - - result = bun.simdutf.convert.utf16.to.utf8.with_errors.le(remaining_input, list.items.ptr[start..list.capacity]); - list.items.len += result.count; + var result = bun.simdutf.convert.utf16.to.utf8.with_errors.le( + utf16, + list.items.ptr[0..list.capacity], + ); + if (result.status == .surrogate) { + // Slow path: there was invalid UTF-16, so we need to convert it without simdutf. + return toUTF8ListWithTypeBun(list, Type, utf16); } + list.items.len = result.count; return list; } @@ -1338,7 +1328,7 @@ pub fn toUTF8ListWithTypeBun(list_: std.ArrayList(u8), comptime Type: type, utf1 const to_copy = utf16_remaining[0..i]; utf16_remaining = utf16_remaining[i..]; - const replacement = utf16Codepoint(Type, utf16_remaining); + const replacement = utf16CodepointWithFFFD(Type, utf16_remaining); utf16_remaining = utf16_remaining[replacement.len..]; const count: usize = replacement.utf8Width(); |