aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Jarred Sumner <709451+Jarred-Sumner@users.noreply.github.com> 2023-07-03 13:16:45 -0700
committerGravatar Jarred Sumner <709451+Jarred-Sumner@users.noreply.github.com> 2023-07-03 13:16:57 -0700
commit983039a18afccb2f7d78dfdb06724d1ea58edde6 (patch)
treeef2380e26e558747ae2d25af4ada78513ff0787f
parenta7a01bd52f20e7908f06d4de9a1814902b838a4b (diff)
downloadbun-983039a18afccb2f7d78dfdb06724d1ea58edde6.tar.gz
bun-983039a18afccb2f7d78dfdb06724d1ea58edde6.tar.zst
bun-983039a18afccb2f7d78dfdb06724d1ea58edde6.zip
Fixes #3508
-rw-r--r--src/bun.js/bindings/bindings.zig22
-rw-r--r--src/bun.js/webcore/encoding.zig8
-rw-r--r--src/napi/napi.zig94
-rw-r--r--src/string.zig52
4 files changed, 105 insertions, 71 deletions
diff --git a/src/bun.js/bindings/bindings.zig b/src/bun.js/bindings/bindings.zig
index 777860d3c..277172b81 100644
--- a/src/bun.js/bindings/bindings.zig
+++ b/src/bun.js/bindings/bindings.zig
@@ -291,7 +291,27 @@ pub const ZigString = extern struct {
return this.len * 2;
}
- /// Count the number of code points in the string.
+ pub fn utf16ByteLength(this: ZigString) usize {
+ if (this.isUTF8()) {
+ return bun.simdutf.length.utf16.from.utf8.le(this.slice());
+ }
+
+ if (this.is16Bit()) {
+ return this.len * 2;
+ }
+
+ return JSC.WebCore.Encoder.byteLengthU8(this.slice().ptr, this.slice().len, .utf16le);
+ }
+
+ pub fn latin1ByteLength(this: ZigString) usize {
+ if (this.isUTF8()) {
+ @panic("TODO");
+ }
+
+ return this.len;
+ }
+
+ /// Count the number of bytes in the UTF-8 version of the string.
/// This function is slow. Use maxUITF8ByteLength() to get a quick estimate
pub fn utf8ByteLength(this: ZigString) usize {
if (this.isUTF8()) {
diff --git a/src/bun.js/webcore/encoding.zig b/src/bun.js/webcore/encoding.zig
index 061a25eed..bb1180acb 100644
--- a/src/bun.js/webcore/encoding.zig
+++ b/src/bun.js/webcore/encoding.zig
@@ -985,6 +985,14 @@ pub const Encoder = struct {
}
}
+ pub fn encodeIntoFrom16(input: []const u16, to: []u8, comptime encoding: JSC.Node.Encoding, comptime allow_partial_write: bool) !usize {
+ return writeU16(input.ptr, input.len, to.ptr, to.len, encoding, allow_partial_write);
+ }
+
+ pub fn encodeIntoFrom8(input: []const u8, to: []u8, comptime encoding: JSC.Node.Encoding) !usize {
+ return writeU8(input.ptr, input.len, to.ptr, to.len, encoding);
+ }
+
pub fn writeU16(input: [*]const u16, len: usize, to: [*]u8, to_len: usize, comptime encoding: JSC.Node.Encoding, comptime allow_partial_write: bool) !usize {
if (len == 0)
return 0;
diff --git a/src/napi/napi.zig b/src/napi/napi.zig
index 439319489..d9c7b5993 100644
--- a/src/napi/napi.zig
+++ b/src/napi/napi.zig
@@ -289,7 +289,10 @@ pub export fn napi_create_string_utf8(env: napi_env, str: [*]const u8, length: u
log("napi_create_string_utf8: {s}", .{slice});
- setNapiValue(result, JSC.ZigString.fromUTF8(slice).toValueGC(env));
+ var string = bun.String.create(slice);
+ defer string.deref();
+
+ setNapiValue(result, string.toJS(env));
return .ok;
}
pub export fn napi_create_string_utf16(env: napi_env, str: [*]const char16_t, length: usize, result: *napi_value) napi_status {
@@ -340,18 +343,14 @@ inline fn maybeAppendNull(ptr: anytype, doit: bool) void {
pub export fn napi_get_value_string_latin1(env: napi_env, value: napi_value, buf_ptr: [*c]u8, bufsize: usize, result: *usize) napi_status {
log("napi_get_value_string_latin1", .{});
defer value.ensureStillAlive();
- const zig_str = value.getZigString(env);
+ const str = value.toBunString(env);
var buf = buf_ptr orelse {
- result.* = if (!zig_str.is16Bit())
- zig_str.len
- else
- // should be same length if valid latin1
- strings.elementLengthUTF16IntoUTF8([]const u16, zig_str.utf16SliceAligned());
+ result.* = str.latin1ByteLength();
return .ok;
};
- if (zig_str.len == 0) {
+ if (str.isEmpty()) {
result.* = 0;
buf[0] = 0;
@@ -367,18 +366,7 @@ pub export fn napi_get_value_string_latin1(env: napi_env, value: napi_value, buf
return .ok;
}
}
-
- if (zig_str.is16Bit()) {
- const utf16 = zig_str.utf16SliceAligned();
- const wrote = JSC.WebCore.Encoder.writeU16(utf16.ptr, utf16.len, buf, buf_.len, .latin1, false) catch return genericFailure();
- maybeAppendNull(&buf[wrote], bufsize == 0);
- // if zero terminated, report the length of the string without the null
- result.* = @intCast(@TypeOf(result.*), wrote);
- return .ok;
- }
- const to_copy = @min(zig_str.len, buf_.len);
- @memcpy(buf[0..to_copy], zig_str.slice().ptr[0..to_copy]);
- buf[to_copy] = 0;
+ const to_copy = str.encodeInto(buf_, .latin1) catch unreachable;
// if zero terminated, report the length of the string without the null
result.* = to_copy;
return .ok;
@@ -399,24 +387,22 @@ pub export fn napi_get_value_string_utf8(env: napi_env, value: napi_value, buf_p
return .string_expected;
}
- const zig_str = value.getZigString(env);
- var buf = buf_ptr orelse {
+ const str = value.toBunString(env);
+
+ if (str.isEmpty()) {
if (result_ptr) |result| {
- result.* = if (!zig_str.is16Bit())
- zig_str.len
- else
- JSC.WebCore.Encoder.byteLengthU16(zig_str.utf16SliceAligned().ptr, zig_str.utf16SliceAligned().len, .utf8);
+ result.* = 0;
}
-
return .ok;
- };
+ }
- if (zig_str.len == 0) {
+ var buf = buf_ptr orelse {
if (result_ptr) |result| {
- result.* = 0;
+ result.* = str.utf8ByteLength();
}
+
return .ok;
- }
+ };
var buf_ = buf[0..bufsize];
@@ -430,44 +416,29 @@ pub export fn napi_get_value_string_utf8(env: napi_env, value: napi_value, buf_p
}
}
- if (zig_str.is16Bit()) {
- const utf16 = zig_str.utf16SliceAligned();
- const wrote = JSC.WebCore.Encoder.writeU16(utf16.ptr, utf16.len, buf, buf_.len, .utf8, false) catch return genericFailure();
- buf[wrote] = 0;
- if (result_ptr) |result| {
- result.* = @intCast(@TypeOf(result.*), wrote);
- }
-
- return .ok;
- }
+ const written = str.encodeInto(buf_, .utf8) catch unreachable;
- const to_copy = @min(zig_str.len, buf_.len);
- @memcpy(buf[0..to_copy], zig_str.slice().ptr[0..to_copy]);
- buf[to_copy] = 0;
if (result_ptr) |result| {
- result.* = @intCast(@TypeOf(result.*), to_copy);
+ result.* = written;
}
- log("napi_get_value_string_utf8: {s}", .{buf[0..to_copy]});
+ log("napi_get_value_string_utf8: {s}", .{buf[0..written]});
return .ok;
}
pub export fn napi_get_value_string_utf16(env: napi_env, value: napi_value, buf_ptr: [*c]char16_t, bufsize: usize, result_ptr: ?*usize) napi_status {
log("napi_get_value_string_utf16", .{});
defer value.ensureStillAlive();
- const zig_str = value.getZigString(env);
+ const str = value.toBunString(env);
var buf = buf_ptr orelse {
if (result_ptr) |result| {
- result.* = if (zig_str.is16Bit())
- zig_str.len
- else
- JSC.WebCore.Encoder.byteLengthU16(zig_str.utf16SliceAligned().ptr, zig_str.utf16SliceAligned().len, .latin1);
+ result.* = str.utf16ByteLength();
}
return .ok;
};
- if (zig_str.len == 0) {
+ if (str.isEmpty()) {
if (result_ptr) |result| {
result.* = 0;
}
@@ -487,20 +458,7 @@ pub export fn napi_get_value_string_utf16(env: napi_env, value: napi_value, buf_
return .ok;
}
}
-
- if (!zig_str.is16Bit()) {
- const slice = zig_str.slice();
- const encode_into_result = strings.copyLatin1IntoUTF16([]char16_t, buf_, []const u8, slice);
- buf[@intCast(usize, encode_into_result.written)] = 0;
-
- if (result_ptr) |result| {
- result.* = encode_into_result.written;
- }
- return .ok;
- }
-
- const to_copy = @min(zig_str.len, buf_.len) * 2;
- @memcpy(std.mem.sliceAsBytes(buf_)[0..to_copy], std.mem.sliceAsBytes(zig_str.utf16SliceAligned())[0..to_copy]);
+ const to_copy = (str.encodeInto(std.mem.sliceAsBytes(buf_), .utf16le) catch unreachable) >> 1;
buf[to_copy] = 0;
// if zero terminated, report the length of the string without the null
if (result_ptr) |result| {
@@ -509,9 +467,9 @@ pub export fn napi_get_value_string_utf16(env: napi_env, value: napi_value, buf_
return .ok;
}
-pub export fn napi_coerce_to_bool(_: napi_env, value: napi_value, result: *napi_value) napi_status {
+pub export fn napi_coerce_to_bool(env: napi_env, value: napi_value, result: *napi_value) napi_status {
log("napi_coerce_to_bool", .{});
- result.* = JSValue.jsBoolean(value.to(bool));
+ result.* = JSValue.jsBoolean(value.coerce(bool, env));
return .ok;
}
pub export fn napi_coerce_to_number(env: napi_env, value: napi_value, result: *napi_value) napi_status {
diff --git a/src/string.zig b/src/string.zig
index 3c0c99ce5..166a0a6f7 100644
--- a/src/string.zig
+++ b/src/string.zig
@@ -160,6 +160,17 @@ pub const WTFStringImplStruct = extern struct {
}
}
+ pub fn utf16ByteLength(this: WTFStringImpl) usize {
+ // All latin1 characters fit in a single UTF-16 code unit.
+ return this.length() * 2;
+ }
+
+ pub fn latin1ByteLength(this: WTFStringImpl) usize {
+ // Not all UTF-16 characters fit are representable in latin1.
+ // Those get truncated?
+ return this.length();
+ }
+
pub fn refCountAllocator(self: WTFStringImpl) std.mem.Allocator {
return std.mem.Allocator{ .ptr = self, .vtable = StringImplAllocator.VTablePtr };
}
@@ -286,6 +297,31 @@ pub const String = extern struct {
return this;
}
+ pub fn utf8ByteLength(this: String) usize {
+ return switch (this.tag) {
+ .WTFStringImpl => this.value.WTFStringImpl.utf8ByteLength(),
+ .ZigString => this.value.ZigString.utf8ByteLength(),
+ .StaticZigString => this.value.StaticZigString.utf8ByteLength(),
+ .Dead, .Empty => 0,
+ };
+ }
+
+ pub fn utf16ByteLength(this: String) usize {
+ return switch (this.tag) {
+ .WTFStringImpl => this.value.WTFStringImpl.utf16ByteLength(),
+ .StaticZigString, .ZigString => this.value.ZigString.utf16ByteLength(),
+ .Dead, .Empty => 0,
+ };
+ }
+
+ pub fn latin1ByteLength(this: String) usize {
+ return switch (this.tag) {
+ .WTFStringImpl => this.value.WTFStringImpl.latin1ByteLength(),
+ .StaticZigString, .ZigString => this.value.ZigString.latin1ByteLength(),
+ .Dead, .Empty => 0,
+ };
+ }
+
pub fn initWithType(comptime Type: type, value: Type) String {
switch (comptime Type) {
ZigString => return String{ .tag = .ZigString, .value = .{ .ZigString = value } },
@@ -431,7 +467,7 @@ pub const String = extern struct {
}
pub fn isUTF8(self: String) bool {
- if (!self.tag == .ZigString or self.tag == .StaticZigString)
+ if (!(self.tag == .ZigString or self.tag == .StaticZigString))
return false;
return self.value.ZigString.isUTF8();
@@ -466,11 +502,23 @@ pub const String = extern struct {
return !self.value.WTFStringImpl.is8Bit();
if (self.tag == .ZigString or self.tag == .StaticZigString)
- return self.value.ZigString.isUTF16();
+ return self.value.ZigString.is16Bit();
return false;
}
+ pub fn encodeInto(self: String, out: []u8, comptime enc: JSC.Node.Encoding) !usize {
+ if (self.isUTF16()) {
+ return JSC.WebCore.Encoder.encodeIntoFrom16(self.utf16(), out, enc, true);
+ }
+
+ if (self.isUTF8()) {
+ @panic("TODO");
+ }
+
+ return JSC.WebCore.Encoder.encodeIntoFrom8(self.latin1(), out, enc);
+ }
+
pub inline fn utf8(self: String) []const u8 {
if (comptime bun.Environment.allow_assert)
std.debug.assert(self.canBeUTF8());