diff options
author | 2023-01-30 17:21:23 -0300 | |
---|---|---|
committer | 2023-01-30 12:21:23 -0800 | |
commit | aa10799d8a9a69b828e36cd9d295f6d5867fb511 (patch) | |
tree | fde30e5b530ce25acb417e766e96ee3710eedcc3 | |
parent | ec2c16fefa8b98efaa1ccf84f18eea0a12c1c9ef (diff) | |
download | bun-aa10799d8a9a69b828e36cd9d295f6d5867fb511.tar.gz bun-aa10799d8a9a69b828e36cd9d295f6d5867fb511.tar.zst bun-aa10799d8a9a69b828e36cd9d295f6d5867fb511.zip |
fix utf16le fill and utf8 partial write of utf16 (#1943)
-rw-r--r-- | src/bun.js/bindings/JSBuffer.cpp | 2 | ||||
-rw-r--r-- | src/bun.js/node/buffer.zig | 16 | ||||
-rw-r--r-- | src/bun.js/webcore/encoding.zig | 46 | ||||
-rw-r--r-- | src/napi/napi.zig | 4 | ||||
-rw-r--r-- | src/string_immutable.zig | 48 |
5 files changed, 88 insertions, 28 deletions
diff --git a/src/bun.js/bindings/JSBuffer.cpp b/src/bun.js/bindings/JSBuffer.cpp index 8436e24e7..10002b664 100644 --- a/src/bun.js/bindings/JSBuffer.cpp +++ b/src/bun.js/bindings/JSBuffer.cpp @@ -466,7 +466,7 @@ static inline JSC::EncodedJSValue jsBufferByteLengthFromStringAndEncoding(JSC::J } if (str->length() == 0) - RELEASE_AND_RETURN(scope, JSC::JSValue::encode(JSC::jsNumber(-1))); + RELEASE_AND_RETURN(scope, JSC::JSValue::encode(JSC::jsNumber(0))); int64_t written = 0; diff --git a/src/bun.js/node/buffer.zig b/src/bun.js/node/buffer.zig index 5637e45b6..8ede45f5d 100644 --- a/src/bun.js/node/buffer.zig +++ b/src/bun.js/node/buffer.zig @@ -28,37 +28,37 @@ pub const BufferVectorized = struct { const written = switch (encoding) { JSC.Node.Encoding.utf8 => if (str.is16Bit()) - JSC.WebCore.Encoder.writeU16(str.utf16SliceAligned().ptr, str.utf16SliceAligned().len, buf.ptr, buf.len, JSC.Node.Encoding.utf8) + JSC.WebCore.Encoder.writeU16(str.utf16SliceAligned().ptr, str.utf16SliceAligned().len, buf.ptr, buf.len, JSC.Node.Encoding.utf8, true) else JSC.WebCore.Encoder.writeU8(str.slice().ptr, str.slice().len, buf.ptr, buf.len, JSC.Node.Encoding.utf8), JSC.Node.Encoding.ascii => if (str.is16Bit()) - JSC.WebCore.Encoder.writeU16(str.utf16SliceAligned().ptr, str.utf16SliceAligned().len, buf.ptr, buf.len, JSC.Node.Encoding.ascii) + JSC.WebCore.Encoder.writeU16(str.utf16SliceAligned().ptr, str.utf16SliceAligned().len, buf.ptr, buf.len, JSC.Node.Encoding.ascii, true) else JSC.WebCore.Encoder.writeU8(str.slice().ptr, str.slice().len, buf.ptr, buf.len, JSC.Node.Encoding.ascii), JSC.Node.Encoding.latin1 => if (str.is16Bit()) - JSC.WebCore.Encoder.writeU16(str.utf16SliceAligned().ptr, str.utf16SliceAligned().len, buf.ptr, buf.len, JSC.Node.Encoding.latin1) + JSC.WebCore.Encoder.writeU16(str.utf16SliceAligned().ptr, str.utf16SliceAligned().len, buf.ptr, buf.len, JSC.Node.Encoding.latin1, true) else JSC.WebCore.Encoder.writeU8(str.slice().ptr, str.slice().len, buf.ptr, buf.len, JSC.Node.Encoding.latin1), JSC.Node.Encoding.buffer => if (str.is16Bit()) - JSC.WebCore.Encoder.writeU16(str.utf16SliceAligned().ptr, str.utf16SliceAligned().len, buf.ptr, buf.len, JSC.Node.Encoding.buffer) + JSC.WebCore.Encoder.writeU16(str.utf16SliceAligned().ptr, str.utf16SliceAligned().len, buf.ptr, buf.len, JSC.Node.Encoding.buffer, true) else JSC.WebCore.Encoder.writeU8(str.slice().ptr, str.slice().len, buf.ptr, buf.len, JSC.Node.Encoding.buffer), JSC.Node.Encoding.utf16le, JSC.Node.Encoding.ucs2, => if (str.is16Bit()) - JSC.WebCore.Encoder.writeU16(str.utf16SliceAligned().ptr, str.utf16SliceAligned().len, buf.ptr, buf.len, JSC.Node.Encoding.utf16le) + JSC.WebCore.Encoder.writeU16(str.utf16SliceAligned().ptr, str.utf16SliceAligned().len, buf.ptr, buf.len, JSC.Node.Encoding.utf16le, true) else JSC.WebCore.Encoder.writeU8(str.slice().ptr, str.slice().len, buf.ptr, buf.len, JSC.Node.Encoding.utf16le), JSC.Node.Encoding.base64 => if (str.is16Bit()) - JSC.WebCore.Encoder.writeU16(str.utf16SliceAligned().ptr, str.utf16SliceAligned().len, buf.ptr, buf.len, JSC.Node.Encoding.base64) + JSC.WebCore.Encoder.writeU16(str.utf16SliceAligned().ptr, str.utf16SliceAligned().len, buf.ptr, buf.len, JSC.Node.Encoding.base64, true) else JSC.WebCore.Encoder.writeU8(str.slice().ptr, str.slice().len, buf.ptr, buf.len, JSC.Node.Encoding.base64), JSC.Node.Encoding.base64url => if (str.is16Bit()) - JSC.WebCore.Encoder.writeU16(str.utf16SliceAligned().ptr, str.utf16SliceAligned().len, buf.ptr, buf.len, JSC.Node.Encoding.base64url) + JSC.WebCore.Encoder.writeU16(str.utf16SliceAligned().ptr, str.utf16SliceAligned().len, buf.ptr, buf.len, JSC.Node.Encoding.base64url, true) else JSC.WebCore.Encoder.writeU8(str.slice().ptr, str.slice().len, buf.ptr, buf.len, JSC.Node.Encoding.base64url), JSC.Node.Encoding.hex => if (str.is16Bit()) - JSC.WebCore.Encoder.writeU16(str.utf16SliceAligned().ptr, str.utf16SliceAligned().len, buf.ptr, buf.len, JSC.Node.Encoding.hex) + JSC.WebCore.Encoder.writeU16(str.utf16SliceAligned().ptr, str.utf16SliceAligned().len, buf.ptr, buf.len, JSC.Node.Encoding.hex, true) else JSC.WebCore.Encoder.writeU8(str.slice().ptr, str.slice().len, buf.ptr, buf.len, JSC.Node.Encoding.hex), }; diff --git a/src/bun.js/webcore/encoding.zig b/src/bun.js/webcore/encoding.zig index 59c3f3866..d0b4bdd9a 100644 --- a/src/bun.js/webcore/encoding.zig +++ b/src/bun.js/webcore/encoding.zig @@ -688,14 +688,14 @@ pub const Encoder = struct { } export fn Bun__encoding__writeUTF16(input: [*]const u16, len: usize, to: [*]u8, to_len: usize, encoding: u8) i64 { return switch (@intToEnum(JSC.Node.Encoding, encoding)) { - .utf8 => writeU16(input, len, to, to_len, .utf8), - .latin1 => writeU16(input, len, to, to_len, .ascii), - .ascii => writeU16(input, len, to, to_len, .ascii), - .ucs2 => writeU16(input, len, to, to_len, .utf16le), - .utf16le => writeU16(input, len, to, to_len, .utf16le), - .base64 => writeU16(input, len, to, to_len, .base64), - .base64url => writeU16(input, len, to, to_len, .base64url), - .hex => writeU16(input, len, to, to_len, .hex), + .utf8 => writeU16(input, len, to, to_len, .utf8, false), + .latin1 => writeU16(input, len, to, to_len, .ascii, false), + .ascii => writeU16(input, len, to, to_len, .ascii, false), + .ucs2 => writeU16(input, len, to, to_len, .utf16le, false), + .utf16le => writeU16(input, len, to, to_len, .utf16le, false), + .base64 => writeU16(input, len, to, to_len, .base64, false), + .base64url => writeU16(input, len, to, to_len, .base64url, false), + .hex => writeU16(input, len, to, to_len, .hex, false), else => unreachable, }; } @@ -882,6 +882,9 @@ pub const Encoder = struct { }, // encode latin1 into UTF16 JSC.Node.Encoding.ucs2, JSC.Node.Encoding.utf16le => { + Output.println("writeU8 ucs2/utf16 {any} {any}", .{ len, to_len}); + Output.flush(); + if (to_len < 2) return 0; @@ -954,7 +957,7 @@ pub const Encoder = struct { } } - pub fn writeU16(input: [*]const u16, len: usize, to: [*]u8, to_len: usize, comptime encoding: JSC.Node.Encoding) i64 { + pub fn writeU16(input: [*]const u16, len: usize, to: [*]u8, to_len: usize, comptime encoding: JSC.Node.Encoding, comptime allow_partial_write: bool) i64 { if (len == 0) return 0; @@ -969,14 +972,23 @@ pub const Encoder = struct { }, // string is already encoded, just need to copy the data JSC.Node.Encoding.ucs2, JSC.Node.Encoding.utf16le => { - const bytes_input_len = len * 2; - const written = @min(bytes_input_len, to_len); - if (written < 2) return 0; - - const fixed_len = (written / 2) * 2; - const input_u8 = @ptrCast([*]const u8, input); - strings.copyU16IntoU8(to[0..written], []const u8, input_u8[0..fixed_len]); - return @intCast(i64, fixed_len); + if(allow_partial_write) { + const bytes_input_len = len * 2; + const written = @min(bytes_input_len, to_len); + const input_u8 = @ptrCast([*]const u8, input); + strings.copyU16IntoU8(to[0..written], []const u8, input_u8[0..written]); + return @intCast(i64, written); + } else { + const bytes_input_len = len * 2; + const written = @min(bytes_input_len, to_len); + if (written < 2) return 0; + + const fixed_len = (written / 2) * 2; + const input_u8 = @ptrCast([*]const u8, input); + strings.copyU16IntoU8(to[0..written], []const u8, input_u8[0..fixed_len]); + return @intCast(i64, fixed_len); + } + }, JSC.Node.Encoding.hex => { diff --git a/src/napi/napi.zig b/src/napi/napi.zig index a65143a09..046ad36af 100644 --- a/src/napi/napi.zig +++ b/src/napi/napi.zig @@ -340,7 +340,7 @@ pub export fn napi_get_value_string_latin1(env: napi_env, value: napi_value, buf if (zig_str.is16Bit()) { const utf16 = zig_str.utf16SliceAligned(); - const wrote = JSC.WebCore.Encoder.writeU16(utf16.ptr, utf16.len, buf, buf_.len, .latin1); + const wrote = JSC.WebCore.Encoder.writeU16(utf16.ptr, utf16.len, buf, buf_.len, .latin1, false); if (wrote < 0) { return .generic_failure; } @@ -404,7 +404,7 @@ pub export fn napi_get_value_string_utf8(env: napi_env, value: napi_value, buf_p if (zig_str.is16Bit()) { const utf16 = zig_str.utf16SliceAligned(); - const wrote = JSC.WebCore.Encoder.writeU16(utf16.ptr, utf16.len, buf, buf_.len, .utf8); + const wrote = JSC.WebCore.Encoder.writeU16(utf16.ptr, utf16.len, buf, buf_.len, .utf8, false); if (wrote < 0) { return .generic_failure; } diff --git a/src/string_immutable.zig b/src/string_immutable.zig index e949892e1..8cc2ab7b9 100644 --- a/src/string_immutable.zig +++ b/src/string_immutable.zig @@ -2545,6 +2545,54 @@ pub fn copyUTF16IntoUTF8WithBuffer(buf: []u8, comptime Type: type, utf16: Type, const width: usize = replacement.utf8Width(); if (width > remaining.len) { ended_on_non_ascii = width > 1; + switch (width) { + 2 => { + if (remaining.len > 0) { + //only first will be written + remaining[0] = @truncate(u8, 0xC0 | (replacement.code_point >> 6)); + remaining = remaining[remaining.len..]; + } + }, + 3 => { + //only first to second written + switch (remaining.len) { + 1 => { + remaining[0] = @truncate(u8, 0xE0 | (replacement.code_point >> 12)); + remaining = remaining[remaining.len..]; + }, + 2 => { + remaining[0] = @truncate(u8, 0xE0 | (replacement.code_point >> 12)); + remaining[1] = @truncate(u8, 0x80 | (replacement.code_point >> 6) & 0x3F); + remaining = remaining[remaining.len..]; + }, + else => {}, + } + + }, + 4 => { + //only 1 to 3 written + switch (remaining.len) { + 1 => { + remaining[0] = @truncate(u8, 0xF0 | (replacement.code_point >> 18)); + remaining = remaining[remaining.len..]; + }, + 2 => { + remaining[0] = @truncate(u8, 0xF0 | (replacement.code_point >> 18)); + remaining[1] = @truncate(u8, 0x80 | (replacement.code_point >> 12) & 0x3F); + remaining = remaining[remaining.len..]; + }, + 3 => { + remaining[0] = @truncate(u8, 0xF0 | (replacement.code_point >> 18)); + remaining[1] = @truncate(u8, 0x80 | (replacement.code_point >> 12) & 0x3F); + remaining[3] = @truncate(u8, 0x80 | (replacement.code_point >> 0) & 0x3F); + remaining = remaining[remaining.len..]; + }, + else => {}, + } + }, + + else => {}, + } break; } |