aboutsummaryrefslogtreecommitdiff
path: root/src/string_immutable.zig
diff options
context:
space:
mode:
authorGravatar Jarred Sumner <709451+Jarred-Sumner@users.noreply.github.com> 2023-03-18 16:57:55 -0700
committerGravatar Jarred Sumner <709451+Jarred-Sumner@users.noreply.github.com> 2023-03-18 16:57:55 -0700
commit7216bfe0165e619efaee963399d49f2f37ba76f2 (patch)
treed117408883f3225e9cad88a42427037ab1207c22 /src/string_immutable.zig
parent1c85e94977ef880f94ee7f57b8d542c0b8ccff89 (diff)
downloadbun-7216bfe0165e619efaee963399d49f2f37ba76f2.tar.gz
bun-7216bfe0165e619efaee963399d49f2f37ba76f2.tar.zst
bun-7216bfe0165e619efaee963399d49f2f37ba76f2.zip
Fix out of bounds access
Repro'd in Buffer tests
Diffstat (limited to 'src/string_immutable.zig')
-rw-r--r--src/string_immutable.zig28
1 files changed, 9 insertions, 19 deletions
diff --git a/src/string_immutable.zig b/src/string_immutable.zig
index 16caa133f..8f3859a34 100644
--- a/src/string_immutable.zig
+++ b/src/string_immutable.zig
@@ -1283,26 +1283,16 @@ pub fn utf16Codepoint(comptime Type: type, input: Type) UTF16Replacement {
pub fn convertUTF16ToUTF8(list_: std.ArrayList(u8), comptime Type: type, utf16: Type) !std.ArrayList(u8) {
var list = list_;
-
- var remaining_input = utf16;
- var start: usize = 0;
-
- const replacement_char = [_]u8{ 239, 191, 189 };
- var result = bun.simdutf.convert.utf16.to.utf8.with_errors.le(remaining_input, list.items.ptr[start..list.capacity]);
- list.items.len = result.count;
- while (result.status == .surrogate) {
- try list.ensureUnusedCapacity(3);
- list.items.len += 3;
- start += result.count;
-
- list.items[start..][0..replacement_char.len].* = replacement_char;
- remaining_input = remaining_input[result.count + 1 ..];
- start += replacement_char.len;
-
- result = bun.simdutf.convert.utf16.to.utf8.with_errors.le(remaining_input, list.items.ptr[start..list.capacity]);
- list.items.len += result.count;
+ var result = bun.simdutf.convert.utf16.to.utf8.with_errors.le(
+ utf16,
+ list.items.ptr[0..list.capacity],
+ );
+ if (result.status == .surrogate) {
+ // Slow path: there was invalid UTF-16, so we need to convert it without simdutf.
+ return toUTF8ListWithTypeBun(list, Type, utf16);
}
+ list.items.len = result.count;
return list;
}
@@ -1338,7 +1328,7 @@ pub fn toUTF8ListWithTypeBun(list_: std.ArrayList(u8), comptime Type: type, utf1
const to_copy = utf16_remaining[0..i];
utf16_remaining = utf16_remaining[i..];
- const replacement = utf16Codepoint(Type, utf16_remaining);
+ const replacement = utf16CodepointWithFFFD(Type, utf16_remaining);
utf16_remaining = utf16_remaining[replacement.len..];
const count: usize = replacement.utf8Width();