aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGravatar Dylan Conway <35280289+dylan-conway@users.noreply.github.com> 2023-02-01 18:48:09 -0800
committerGravatar GitHub <noreply@github.com> 2023-02-01 18:48:09 -0800
commit3c23f9ad5787bc9e3bd61b7df4c0cdb0fb9f7b99 (patch)
treeb775f00e684da35bb58e1f4d951df88e6e0dc733 /src
parent76f3c9c07b1db01ec4d0ae5361f0b1a1030ae528 (diff)
downloadbun-3c23f9ad5787bc9e3bd61b7df4c0cdb0fb9f7b99.tar.gz
bun-3c23f9ad5787bc9e3bd61b7df4c0cdb0fb9f7b99.tar.zst
bun-3c23f9ad5787bc9e3bd61b7df4c0cdb0fb9f7b99.zip
fix text encoding for utf8 (#1967)
* use character * replacement character * also test encoding decoded points * increase length by 1
Diffstat (limited to 'src')
-rw-r--r--src/bun.js/webcore/encoding.zig14
-rw-r--r--src/string_immutable.zig4
2 files changed, 13 insertions, 5 deletions
diff --git a/src/bun.js/webcore/encoding.zig b/src/bun.js/webcore/encoding.zig
index 9725073a2..6729cc4de 100644
--- a/src/bun.js/webcore/encoding.zig
+++ b/src/bun.js/webcore/encoding.zig
@@ -99,6 +99,13 @@ pub const TextEncoder = struct {
// max utf16 -> utf8 length
if (slice.len <= buf.len / 4) {
const result = strings.copyUTF16IntoUTF8(&buf, @TypeOf(slice), slice);
+ if (result.read == 0 or result.written == 0) {
+ const uint8array = JSC.JSValue.createUninitializedUint8Array(globalThis, 3);
+ const array_buffer = uint8array.asArrayBuffer(globalThis).?;
+ const replacement_char = [_]u8{ 239, 191, 189 };
+ @memcpy(array_buffer.slice().ptr, &replacement_char, replacement_char.len);
+ return uint8array;
+ }
const uint8array = JSC.JSValue.createUninitializedUint8Array(globalThis, result.written);
std.debug.assert(result.written <= buf.len);
std.debug.assert(result.read == slice.len);
@@ -214,8 +221,11 @@ pub const TextEncoder = struct {
) u64 {
var output = buf_ptr[0..buf_len];
const input = input_ptr[0..input_len];
- const result: strings.EncodeIntoResult =
- strings.copyUTF16IntoUTF8(output, []const u16, input);
+ const result: strings.EncodeIntoResult = strings.copyUTF16IntoUTF8(output, []const u16, input);
+ if (result.read == 0 or result.written == 0) {
+ const replacement_char = [_]u8{ 239, 191, 189 };
+ @memcpy(buf_ptr, &replacement_char, replacement_char.len);
+ }
const sized: [2]u32 = .{ result.read, result.written };
return @bitCast(u64, sized);
}
diff --git a/src/string_immutable.zig b/src/string_immutable.zig
index 8cc2ab7b9..cf6f6126c 100644
--- a/src/string_immutable.zig
+++ b/src/string_immutable.zig
@@ -2481,9 +2481,8 @@ const latin1_to_utf16_conversion_table = [256]u16{
};
pub fn latin1ToCodepointBytesAssumeNotASCII(char: u32) [2]u8 {
- const as_utf16 = latin1ToCodepointBytesAssumeNotASCII16(char);
var bytes = [4]u8{ 0, 0, 0, 0 };
- _ = encodeWTF8Rune(&bytes, @intCast(i32, as_utf16));
+ _ = encodeWTF8Rune(&bytes, @intCast(i32, char));
return bytes[0..2].*;
}
@@ -2567,7 +2566,6 @@ pub fn copyUTF16IntoUTF8WithBuffer(buf: []u8, comptime Type: type, utf16: Type,
},
else => {},
}
-
},
4 => {
//only 1 to 3 written