aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/string_immutable.zig28
1 files changed, 9 insertions, 19 deletions
diff --git a/src/string_immutable.zig b/src/string_immutable.zig
index 16caa133f..8f3859a34 100644
--- a/src/string_immutable.zig
+++ b/src/string_immutable.zig
@@ -1283,26 +1283,16 @@ pub fn utf16Codepoint(comptime Type: type, input: Type) UTF16Replacement {
pub fn convertUTF16ToUTF8(list_: std.ArrayList(u8), comptime Type: type, utf16: Type) !std.ArrayList(u8) {
var list = list_;
-
- var remaining_input = utf16;
- var start: usize = 0;
-
- const replacement_char = [_]u8{ 239, 191, 189 };
- var result = bun.simdutf.convert.utf16.to.utf8.with_errors.le(remaining_input, list.items.ptr[start..list.capacity]);
- list.items.len = result.count;
- while (result.status == .surrogate) {
- try list.ensureUnusedCapacity(3);
- list.items.len += 3;
- start += result.count;
-
- list.items[start..][0..replacement_char.len].* = replacement_char;
- remaining_input = remaining_input[result.count + 1 ..];
- start += replacement_char.len;
-
- result = bun.simdutf.convert.utf16.to.utf8.with_errors.le(remaining_input, list.items.ptr[start..list.capacity]);
- list.items.len += result.count;
+ var result = bun.simdutf.convert.utf16.to.utf8.with_errors.le(
+ utf16,
+ list.items.ptr[0..list.capacity],
+ );
+ if (result.status == .surrogate) {
+ // Slow path: there was invalid UTF-16, so we need to convert it without simdutf.
+ return toUTF8ListWithTypeBun(list, Type, utf16);
}
+ list.items.len = result.count;
return list;
}
@@ -1338,7 +1328,7 @@ pub fn toUTF8ListWithTypeBun(list_: std.ArrayList(u8), comptime Type: type, utf1
const to_copy = utf16_remaining[0..i];
utf16_remaining = utf16_remaining[i..];
- const replacement = utf16Codepoint(Type, utf16_remaining);
+ const replacement = utf16CodepointWithFFFD(Type, utf16_remaining);
utf16_remaining = utf16_remaining[replacement.len..];
const count: usize = replacement.utf8Width();