diff options
author | 2023-02-01 18:48:09 -0800 | |
---|---|---|
committer | 2023-02-01 18:48:09 -0800 | |
commit | 3c23f9ad5787bc9e3bd61b7df4c0cdb0fb9f7b99 (patch) | |
tree | b775f00e684da35bb58e1f4d951df88e6e0dc733 /src | |
parent | 76f3c9c07b1db01ec4d0ae5361f0b1a1030ae528 (diff) | |
download | bun-3c23f9ad5787bc9e3bd61b7df4c0cdb0fb9f7b99.tar.gz bun-3c23f9ad5787bc9e3bd61b7df4c0cdb0fb9f7b99.tar.zst bun-3c23f9ad5787bc9e3bd61b7df4c0cdb0fb9f7b99.zip |
fix text encoding for utf8 (#1967)
* use character
* replacement character
* also test encoding decoded points
* increase length by 1
Diffstat (limited to 'src')
-rw-r--r-- | src/bun.js/webcore/encoding.zig | 14 | ||||
-rw-r--r-- | src/string_immutable.zig | 4 |
2 files changed, 13 insertions, 5 deletions
diff --git a/src/bun.js/webcore/encoding.zig b/src/bun.js/webcore/encoding.zig index 9725073a2..6729cc4de 100644 --- a/src/bun.js/webcore/encoding.zig +++ b/src/bun.js/webcore/encoding.zig @@ -99,6 +99,13 @@ pub const TextEncoder = struct { // max utf16 -> utf8 length if (slice.len <= buf.len / 4) { const result = strings.copyUTF16IntoUTF8(&buf, @TypeOf(slice), slice); + if (result.read == 0 or result.written == 0) { + const uint8array = JSC.JSValue.createUninitializedUint8Array(globalThis, 3); + const array_buffer = uint8array.asArrayBuffer(globalThis).?; + const replacement_char = [_]u8{ 239, 191, 189 }; + @memcpy(array_buffer.slice().ptr, &replacement_char, replacement_char.len); + return uint8array; + } const uint8array = JSC.JSValue.createUninitializedUint8Array(globalThis, result.written); std.debug.assert(result.written <= buf.len); std.debug.assert(result.read == slice.len); @@ -214,8 +221,11 @@ pub const TextEncoder = struct { ) u64 { var output = buf_ptr[0..buf_len]; const input = input_ptr[0..input_len]; - const result: strings.EncodeIntoResult = - strings.copyUTF16IntoUTF8(output, []const u16, input); + const result: strings.EncodeIntoResult = strings.copyUTF16IntoUTF8(output, []const u16, input); + if (result.read == 0 or result.written == 0) { + const replacement_char = [_]u8{ 239, 191, 189 }; + @memcpy(buf_ptr, &replacement_char, replacement_char.len); + } const sized: [2]u32 = .{ result.read, result.written }; return @bitCast(u64, sized); } diff --git a/src/string_immutable.zig b/src/string_immutable.zig index 8cc2ab7b9..cf6f6126c 100644 --- a/src/string_immutable.zig +++ b/src/string_immutable.zig @@ -2481,9 +2481,8 @@ const latin1_to_utf16_conversion_table = [256]u16{ }; pub fn latin1ToCodepointBytesAssumeNotASCII(char: u32) [2]u8 { - const as_utf16 = latin1ToCodepointBytesAssumeNotASCII16(char); var bytes = [4]u8{ 0, 0, 0, 0 }; - _ = encodeWTF8Rune(&bytes, @intCast(i32, as_utf16)); + _ = encodeWTF8Rune(&bytes, @intCast(i32, char)); return bytes[0..2].*; } @@ -2567,7 +2566,6 @@ pub fn copyUTF16IntoUTF8WithBuffer(buf: []u8, comptime Type: type, utf16: Type, }, else => {}, } - }, 4 => { //only 1 to 3 written |