aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Ciro Spaciari <ciro.spaciari@gmail.com> 2023-01-30 17:21:23 -0300
committerGravatar GitHub <noreply@github.com> 2023-01-30 12:21:23 -0800
commitaa10799d8a9a69b828e36cd9d295f6d5867fb511 (patch)
treefde30e5b530ce25acb417e766e96ee3710eedcc3
parentec2c16fefa8b98efaa1ccf84f18eea0a12c1c9ef (diff)
downloadbun-aa10799d8a9a69b828e36cd9d295f6d5867fb511.tar.gz
bun-aa10799d8a9a69b828e36cd9d295f6d5867fb511.tar.zst
bun-aa10799d8a9a69b828e36cd9d295f6d5867fb511.zip
fix utf16le fill and utf8 partial write of utf16 (#1943)
-rw-r--r--src/bun.js/bindings/JSBuffer.cpp2
-rw-r--r--src/bun.js/node/buffer.zig16
-rw-r--r--src/bun.js/webcore/encoding.zig46
-rw-r--r--src/napi/napi.zig4
-rw-r--r--src/string_immutable.zig48
5 files changed, 88 insertions, 28 deletions
diff --git a/src/bun.js/bindings/JSBuffer.cpp b/src/bun.js/bindings/JSBuffer.cpp
index 8436e24e7..10002b664 100644
--- a/src/bun.js/bindings/JSBuffer.cpp
+++ b/src/bun.js/bindings/JSBuffer.cpp
@@ -466,7 +466,7 @@ static inline JSC::EncodedJSValue jsBufferByteLengthFromStringAndEncoding(JSC::J
}
if (str->length() == 0)
- RELEASE_AND_RETURN(scope, JSC::JSValue::encode(JSC::jsNumber(-1)));
+ RELEASE_AND_RETURN(scope, JSC::JSValue::encode(JSC::jsNumber(0)));
int64_t written = 0;
diff --git a/src/bun.js/node/buffer.zig b/src/bun.js/node/buffer.zig
index 5637e45b6..8ede45f5d 100644
--- a/src/bun.js/node/buffer.zig
+++ b/src/bun.js/node/buffer.zig
@@ -28,37 +28,37 @@ pub const BufferVectorized = struct {
const written = switch (encoding) {
JSC.Node.Encoding.utf8 => if (str.is16Bit())
- JSC.WebCore.Encoder.writeU16(str.utf16SliceAligned().ptr, str.utf16SliceAligned().len, buf.ptr, buf.len, JSC.Node.Encoding.utf8)
+ JSC.WebCore.Encoder.writeU16(str.utf16SliceAligned().ptr, str.utf16SliceAligned().len, buf.ptr, buf.len, JSC.Node.Encoding.utf8, true)
else
JSC.WebCore.Encoder.writeU8(str.slice().ptr, str.slice().len, buf.ptr, buf.len, JSC.Node.Encoding.utf8),
JSC.Node.Encoding.ascii => if (str.is16Bit())
- JSC.WebCore.Encoder.writeU16(str.utf16SliceAligned().ptr, str.utf16SliceAligned().len, buf.ptr, buf.len, JSC.Node.Encoding.ascii)
+ JSC.WebCore.Encoder.writeU16(str.utf16SliceAligned().ptr, str.utf16SliceAligned().len, buf.ptr, buf.len, JSC.Node.Encoding.ascii, true)
else
JSC.WebCore.Encoder.writeU8(str.slice().ptr, str.slice().len, buf.ptr, buf.len, JSC.Node.Encoding.ascii),
JSC.Node.Encoding.latin1 => if (str.is16Bit())
- JSC.WebCore.Encoder.writeU16(str.utf16SliceAligned().ptr, str.utf16SliceAligned().len, buf.ptr, buf.len, JSC.Node.Encoding.latin1)
+ JSC.WebCore.Encoder.writeU16(str.utf16SliceAligned().ptr, str.utf16SliceAligned().len, buf.ptr, buf.len, JSC.Node.Encoding.latin1, true)
else
JSC.WebCore.Encoder.writeU8(str.slice().ptr, str.slice().len, buf.ptr, buf.len, JSC.Node.Encoding.latin1),
JSC.Node.Encoding.buffer => if (str.is16Bit())
- JSC.WebCore.Encoder.writeU16(str.utf16SliceAligned().ptr, str.utf16SliceAligned().len, buf.ptr, buf.len, JSC.Node.Encoding.buffer)
+ JSC.WebCore.Encoder.writeU16(str.utf16SliceAligned().ptr, str.utf16SliceAligned().len, buf.ptr, buf.len, JSC.Node.Encoding.buffer, true)
else
JSC.WebCore.Encoder.writeU8(str.slice().ptr, str.slice().len, buf.ptr, buf.len, JSC.Node.Encoding.buffer),
JSC.Node.Encoding.utf16le,
JSC.Node.Encoding.ucs2,
=> if (str.is16Bit())
- JSC.WebCore.Encoder.writeU16(str.utf16SliceAligned().ptr, str.utf16SliceAligned().len, buf.ptr, buf.len, JSC.Node.Encoding.utf16le)
+ JSC.WebCore.Encoder.writeU16(str.utf16SliceAligned().ptr, str.utf16SliceAligned().len, buf.ptr, buf.len, JSC.Node.Encoding.utf16le, true)
else
JSC.WebCore.Encoder.writeU8(str.slice().ptr, str.slice().len, buf.ptr, buf.len, JSC.Node.Encoding.utf16le),
JSC.Node.Encoding.base64 => if (str.is16Bit())
- JSC.WebCore.Encoder.writeU16(str.utf16SliceAligned().ptr, str.utf16SliceAligned().len, buf.ptr, buf.len, JSC.Node.Encoding.base64)
+ JSC.WebCore.Encoder.writeU16(str.utf16SliceAligned().ptr, str.utf16SliceAligned().len, buf.ptr, buf.len, JSC.Node.Encoding.base64, true)
else
JSC.WebCore.Encoder.writeU8(str.slice().ptr, str.slice().len, buf.ptr, buf.len, JSC.Node.Encoding.base64),
JSC.Node.Encoding.base64url => if (str.is16Bit())
- JSC.WebCore.Encoder.writeU16(str.utf16SliceAligned().ptr, str.utf16SliceAligned().len, buf.ptr, buf.len, JSC.Node.Encoding.base64url)
+ JSC.WebCore.Encoder.writeU16(str.utf16SliceAligned().ptr, str.utf16SliceAligned().len, buf.ptr, buf.len, JSC.Node.Encoding.base64url, true)
else
JSC.WebCore.Encoder.writeU8(str.slice().ptr, str.slice().len, buf.ptr, buf.len, JSC.Node.Encoding.base64url),
JSC.Node.Encoding.hex => if (str.is16Bit())
- JSC.WebCore.Encoder.writeU16(str.utf16SliceAligned().ptr, str.utf16SliceAligned().len, buf.ptr, buf.len, JSC.Node.Encoding.hex)
+ JSC.WebCore.Encoder.writeU16(str.utf16SliceAligned().ptr, str.utf16SliceAligned().len, buf.ptr, buf.len, JSC.Node.Encoding.hex, true)
else
JSC.WebCore.Encoder.writeU8(str.slice().ptr, str.slice().len, buf.ptr, buf.len, JSC.Node.Encoding.hex),
};
diff --git a/src/bun.js/webcore/encoding.zig b/src/bun.js/webcore/encoding.zig
index 59c3f3866..d0b4bdd9a 100644
--- a/src/bun.js/webcore/encoding.zig
+++ b/src/bun.js/webcore/encoding.zig
@@ -688,14 +688,14 @@ pub const Encoder = struct {
}
export fn Bun__encoding__writeUTF16(input: [*]const u16, len: usize, to: [*]u8, to_len: usize, encoding: u8) i64 {
return switch (@intToEnum(JSC.Node.Encoding, encoding)) {
- .utf8 => writeU16(input, len, to, to_len, .utf8),
- .latin1 => writeU16(input, len, to, to_len, .ascii),
- .ascii => writeU16(input, len, to, to_len, .ascii),
- .ucs2 => writeU16(input, len, to, to_len, .utf16le),
- .utf16le => writeU16(input, len, to, to_len, .utf16le),
- .base64 => writeU16(input, len, to, to_len, .base64),
- .base64url => writeU16(input, len, to, to_len, .base64url),
- .hex => writeU16(input, len, to, to_len, .hex),
+ .utf8 => writeU16(input, len, to, to_len, .utf8, false),
+ .latin1 => writeU16(input, len, to, to_len, .ascii, false),
+ .ascii => writeU16(input, len, to, to_len, .ascii, false),
+ .ucs2 => writeU16(input, len, to, to_len, .utf16le, false),
+ .utf16le => writeU16(input, len, to, to_len, .utf16le, false),
+ .base64 => writeU16(input, len, to, to_len, .base64, false),
+ .base64url => writeU16(input, len, to, to_len, .base64url, false),
+ .hex => writeU16(input, len, to, to_len, .hex, false),
else => unreachable,
};
}
@@ -882,6 +882,9 @@ pub const Encoder = struct {
},
// encode latin1 into UTF16
JSC.Node.Encoding.ucs2, JSC.Node.Encoding.utf16le => {
+ Output.println("writeU8 ucs2/utf16 {any} {any}", .{ len, to_len});
+ Output.flush();
+
if (to_len < 2)
return 0;
@@ -954,7 +957,7 @@ pub const Encoder = struct {
}
}
- pub fn writeU16(input: [*]const u16, len: usize, to: [*]u8, to_len: usize, comptime encoding: JSC.Node.Encoding) i64 {
+ pub fn writeU16(input: [*]const u16, len: usize, to: [*]u8, to_len: usize, comptime encoding: JSC.Node.Encoding, comptime allow_partial_write: bool) i64 {
if (len == 0)
return 0;
@@ -969,14 +972,23 @@ pub const Encoder = struct {
},
// string is already encoded, just need to copy the data
JSC.Node.Encoding.ucs2, JSC.Node.Encoding.utf16le => {
- const bytes_input_len = len * 2;
- const written = @min(bytes_input_len, to_len);
- if (written < 2) return 0;
-
- const fixed_len = (written / 2) * 2;
- const input_u8 = @ptrCast([*]const u8, input);
- strings.copyU16IntoU8(to[0..written], []const u8, input_u8[0..fixed_len]);
- return @intCast(i64, fixed_len);
+ if(allow_partial_write) {
+ const bytes_input_len = len * 2;
+ const written = @min(bytes_input_len, to_len);
+ const input_u8 = @ptrCast([*]const u8, input);
+ strings.copyU16IntoU8(to[0..written], []const u8, input_u8[0..written]);
+ return @intCast(i64, written);
+ } else {
+ const bytes_input_len = len * 2;
+ const written = @min(bytes_input_len, to_len);
+ if (written < 2) return 0;
+
+ const fixed_len = (written / 2) * 2;
+ const input_u8 = @ptrCast([*]const u8, input);
+ strings.copyU16IntoU8(to[0..written], []const u8, input_u8[0..fixed_len]);
+ return @intCast(i64, fixed_len);
+ }
+
},
JSC.Node.Encoding.hex => {
diff --git a/src/napi/napi.zig b/src/napi/napi.zig
index a65143a09..046ad36af 100644
--- a/src/napi/napi.zig
+++ b/src/napi/napi.zig
@@ -340,7 +340,7 @@ pub export fn napi_get_value_string_latin1(env: napi_env, value: napi_value, buf
if (zig_str.is16Bit()) {
const utf16 = zig_str.utf16SliceAligned();
- const wrote = JSC.WebCore.Encoder.writeU16(utf16.ptr, utf16.len, buf, buf_.len, .latin1);
+ const wrote = JSC.WebCore.Encoder.writeU16(utf16.ptr, utf16.len, buf, buf_.len, .latin1, false);
if (wrote < 0) {
return .generic_failure;
}
@@ -404,7 +404,7 @@ pub export fn napi_get_value_string_utf8(env: napi_env, value: napi_value, buf_p
if (zig_str.is16Bit()) {
const utf16 = zig_str.utf16SliceAligned();
- const wrote = JSC.WebCore.Encoder.writeU16(utf16.ptr, utf16.len, buf, buf_.len, .utf8);
+ const wrote = JSC.WebCore.Encoder.writeU16(utf16.ptr, utf16.len, buf, buf_.len, .utf8, false);
if (wrote < 0) {
return .generic_failure;
}
diff --git a/src/string_immutable.zig b/src/string_immutable.zig
index e949892e1..8cc2ab7b9 100644
--- a/src/string_immutable.zig
+++ b/src/string_immutable.zig
@@ -2545,6 +2545,54 @@ pub fn copyUTF16IntoUTF8WithBuffer(buf: []u8, comptime Type: type, utf16: Type,
const width: usize = replacement.utf8Width();
if (width > remaining.len) {
ended_on_non_ascii = width > 1;
+ switch (width) {
+ 2 => {
+ if (remaining.len > 0) {
+ //only first will be written
+ remaining[0] = @truncate(u8, 0xC0 | (replacement.code_point >> 6));
+ remaining = remaining[remaining.len..];
+ }
+ },
+ 3 => {
+ //only first to second written
+ switch (remaining.len) {
+ 1 => {
+ remaining[0] = @truncate(u8, 0xE0 | (replacement.code_point >> 12));
+ remaining = remaining[remaining.len..];
+ },
+ 2 => {
+ remaining[0] = @truncate(u8, 0xE0 | (replacement.code_point >> 12));
+ remaining[1] = @truncate(u8, 0x80 | (replacement.code_point >> 6) & 0x3F);
+ remaining = remaining[remaining.len..];
+ },
+ else => {},
+ }
+
+ },
+ 4 => {
+ //only 1 to 3 written
+ switch (remaining.len) {
+ 1 => {
+ remaining[0] = @truncate(u8, 0xF0 | (replacement.code_point >> 18));
+ remaining = remaining[remaining.len..];
+ },
+ 2 => {
+ remaining[0] = @truncate(u8, 0xF0 | (replacement.code_point >> 18));
+ remaining[1] = @truncate(u8, 0x80 | (replacement.code_point >> 12) & 0x3F);
+ remaining = remaining[remaining.len..];
+ },
+ 3 => {
+ remaining[0] = @truncate(u8, 0xF0 | (replacement.code_point >> 18));
+ remaining[1] = @truncate(u8, 0x80 | (replacement.code_point >> 12) & 0x3F);
+ remaining[3] = @truncate(u8, 0x80 | (replacement.code_point >> 0) & 0x3F);
+ remaining = remaining[remaining.len..];
+ },
+ else => {},
+ }
+ },
+
+ else => {},
+ }
break;
}