diff options
author | 2023-07-03 13:16:45 -0700 | |
---|---|---|
committer | 2023-07-03 13:16:57 -0700 | |
commit | 983039a18afccb2f7d78dfdb06724d1ea58edde6 (patch) | |
tree | ef2380e26e558747ae2d25af4ada78513ff0787f | |
parent | a7a01bd52f20e7908f06d4de9a1814902b838a4b (diff) | |
download | bun-983039a18afccb2f7d78dfdb06724d1ea58edde6.tar.gz bun-983039a18afccb2f7d78dfdb06724d1ea58edde6.tar.zst bun-983039a18afccb2f7d78dfdb06724d1ea58edde6.zip |
Fixes #3508
-rw-r--r-- | src/bun.js/bindings/bindings.zig | 22 | ||||
-rw-r--r-- | src/bun.js/webcore/encoding.zig | 8 | ||||
-rw-r--r-- | src/napi/napi.zig | 94 | ||||
-rw-r--r-- | src/string.zig | 52 |
4 files changed, 105 insertions, 71 deletions
diff --git a/src/bun.js/bindings/bindings.zig b/src/bun.js/bindings/bindings.zig index 777860d3c..277172b81 100644 --- a/src/bun.js/bindings/bindings.zig +++ b/src/bun.js/bindings/bindings.zig @@ -291,7 +291,27 @@ pub const ZigString = extern struct { return this.len * 2; } - /// Count the number of code points in the string. + pub fn utf16ByteLength(this: ZigString) usize { + if (this.isUTF8()) { + return bun.simdutf.length.utf16.from.utf8.le(this.slice()); + } + + if (this.is16Bit()) { + return this.len * 2; + } + + return JSC.WebCore.Encoder.byteLengthU8(this.slice().ptr, this.slice().len, .utf16le); + } + + pub fn latin1ByteLength(this: ZigString) usize { + if (this.isUTF8()) { + @panic("TODO"); + } + + return this.len; + } + + /// Count the number of bytes in the UTF-8 version of the string. /// This function is slow. Use maxUITF8ByteLength() to get a quick estimate pub fn utf8ByteLength(this: ZigString) usize { if (this.isUTF8()) { diff --git a/src/bun.js/webcore/encoding.zig b/src/bun.js/webcore/encoding.zig index 061a25eed..bb1180acb 100644 --- a/src/bun.js/webcore/encoding.zig +++ b/src/bun.js/webcore/encoding.zig @@ -985,6 +985,14 @@ pub const Encoder = struct { } } + pub fn encodeIntoFrom16(input: []const u16, to: []u8, comptime encoding: JSC.Node.Encoding, comptime allow_partial_write: bool) !usize { + return writeU16(input.ptr, input.len, to.ptr, to.len, encoding, allow_partial_write); + } + + pub fn encodeIntoFrom8(input: []const u8, to: []u8, comptime encoding: JSC.Node.Encoding) !usize { + return writeU8(input.ptr, input.len, to.ptr, to.len, encoding); + } + pub fn writeU16(input: [*]const u16, len: usize, to: [*]u8, to_len: usize, comptime encoding: JSC.Node.Encoding, comptime allow_partial_write: bool) !usize { if (len == 0) return 0; diff --git a/src/napi/napi.zig b/src/napi/napi.zig index 439319489..d9c7b5993 100644 --- a/src/napi/napi.zig +++ b/src/napi/napi.zig @@ -289,7 +289,10 @@ pub export fn napi_create_string_utf8(env: napi_env, str: [*]const u8, length: u log("napi_create_string_utf8: {s}", .{slice}); - setNapiValue(result, JSC.ZigString.fromUTF8(slice).toValueGC(env)); + var string = bun.String.create(slice); + defer string.deref(); + + setNapiValue(result, string.toJS(env)); return .ok; } pub export fn napi_create_string_utf16(env: napi_env, str: [*]const char16_t, length: usize, result: *napi_value) napi_status { @@ -340,18 +343,14 @@ inline fn maybeAppendNull(ptr: anytype, doit: bool) void { pub export fn napi_get_value_string_latin1(env: napi_env, value: napi_value, buf_ptr: [*c]u8, bufsize: usize, result: *usize) napi_status { log("napi_get_value_string_latin1", .{}); defer value.ensureStillAlive(); - const zig_str = value.getZigString(env); + const str = value.toBunString(env); var buf = buf_ptr orelse { - result.* = if (!zig_str.is16Bit()) - zig_str.len - else - // should be same length if valid latin1 - strings.elementLengthUTF16IntoUTF8([]const u16, zig_str.utf16SliceAligned()); + result.* = str.latin1ByteLength(); return .ok; }; - if (zig_str.len == 0) { + if (str.isEmpty()) { result.* = 0; buf[0] = 0; @@ -367,18 +366,7 @@ pub export fn napi_get_value_string_latin1(env: napi_env, value: napi_value, buf return .ok; } } - - if (zig_str.is16Bit()) { - const utf16 = zig_str.utf16SliceAligned(); - const wrote = JSC.WebCore.Encoder.writeU16(utf16.ptr, utf16.len, buf, buf_.len, .latin1, false) catch return genericFailure(); - maybeAppendNull(&buf[wrote], bufsize == 0); - // if zero terminated, report the length of the string without the null - result.* = @intCast(@TypeOf(result.*), wrote); - return .ok; - } - const to_copy = @min(zig_str.len, buf_.len); - @memcpy(buf[0..to_copy], zig_str.slice().ptr[0..to_copy]); - buf[to_copy] = 0; + const to_copy = str.encodeInto(buf_, .latin1) catch unreachable; // if zero terminated, report the length of the string without the null result.* = to_copy; return .ok; @@ -399,24 +387,22 @@ pub export fn napi_get_value_string_utf8(env: napi_env, value: napi_value, buf_p return .string_expected; } - const zig_str = value.getZigString(env); - var buf = buf_ptr orelse { + const str = value.toBunString(env); + + if (str.isEmpty()) { if (result_ptr) |result| { - result.* = if (!zig_str.is16Bit()) - zig_str.len - else - JSC.WebCore.Encoder.byteLengthU16(zig_str.utf16SliceAligned().ptr, zig_str.utf16SliceAligned().len, .utf8); + result.* = 0; } - return .ok; - }; + } - if (zig_str.len == 0) { + var buf = buf_ptr orelse { if (result_ptr) |result| { - result.* = 0; + result.* = str.utf8ByteLength(); } + return .ok; - } + }; var buf_ = buf[0..bufsize]; @@ -430,44 +416,29 @@ pub export fn napi_get_value_string_utf8(env: napi_env, value: napi_value, buf_p } } - if (zig_str.is16Bit()) { - const utf16 = zig_str.utf16SliceAligned(); - const wrote = JSC.WebCore.Encoder.writeU16(utf16.ptr, utf16.len, buf, buf_.len, .utf8, false) catch return genericFailure(); - buf[wrote] = 0; - if (result_ptr) |result| { - result.* = @intCast(@TypeOf(result.*), wrote); - } - - return .ok; - } + const written = str.encodeInto(buf_, .utf8) catch unreachable; - const to_copy = @min(zig_str.len, buf_.len); - @memcpy(buf[0..to_copy], zig_str.slice().ptr[0..to_copy]); - buf[to_copy] = 0; if (result_ptr) |result| { - result.* = @intCast(@TypeOf(result.*), to_copy); + result.* = written; } - log("napi_get_value_string_utf8: {s}", .{buf[0..to_copy]}); + log("napi_get_value_string_utf8: {s}", .{buf[0..written]}); return .ok; } pub export fn napi_get_value_string_utf16(env: napi_env, value: napi_value, buf_ptr: [*c]char16_t, bufsize: usize, result_ptr: ?*usize) napi_status { log("napi_get_value_string_utf16", .{}); defer value.ensureStillAlive(); - const zig_str = value.getZigString(env); + const str = value.toBunString(env); var buf = buf_ptr orelse { if (result_ptr) |result| { - result.* = if (zig_str.is16Bit()) - zig_str.len - else - JSC.WebCore.Encoder.byteLengthU16(zig_str.utf16SliceAligned().ptr, zig_str.utf16SliceAligned().len, .latin1); + result.* = str.utf16ByteLength(); } return .ok; }; - if (zig_str.len == 0) { + if (str.isEmpty()) { if (result_ptr) |result| { result.* = 0; } @@ -487,20 +458,7 @@ pub export fn napi_get_value_string_utf16(env: napi_env, value: napi_value, buf_ return .ok; } } - - if (!zig_str.is16Bit()) { - const slice = zig_str.slice(); - const encode_into_result = strings.copyLatin1IntoUTF16([]char16_t, buf_, []const u8, slice); - buf[@intCast(usize, encode_into_result.written)] = 0; - - if (result_ptr) |result| { - result.* = encode_into_result.written; - } - return .ok; - } - - const to_copy = @min(zig_str.len, buf_.len) * 2; - @memcpy(std.mem.sliceAsBytes(buf_)[0..to_copy], std.mem.sliceAsBytes(zig_str.utf16SliceAligned())[0..to_copy]); + const to_copy = (str.encodeInto(std.mem.sliceAsBytes(buf_), .utf16le) catch unreachable) >> 1; buf[to_copy] = 0; // if zero terminated, report the length of the string without the null if (result_ptr) |result| { @@ -509,9 +467,9 @@ pub export fn napi_get_value_string_utf16(env: napi_env, value: napi_value, buf_ return .ok; } -pub export fn napi_coerce_to_bool(_: napi_env, value: napi_value, result: *napi_value) napi_status { +pub export fn napi_coerce_to_bool(env: napi_env, value: napi_value, result: *napi_value) napi_status { log("napi_coerce_to_bool", .{}); - result.* = JSValue.jsBoolean(value.to(bool)); + result.* = JSValue.jsBoolean(value.coerce(bool, env)); return .ok; } pub export fn napi_coerce_to_number(env: napi_env, value: napi_value, result: *napi_value) napi_status { diff --git a/src/string.zig b/src/string.zig index 3c0c99ce5..166a0a6f7 100644 --- a/src/string.zig +++ b/src/string.zig @@ -160,6 +160,17 @@ pub const WTFStringImplStruct = extern struct { } } + pub fn utf16ByteLength(this: WTFStringImpl) usize { + // All latin1 characters fit in a single UTF-16 code unit. + return this.length() * 2; + } + + pub fn latin1ByteLength(this: WTFStringImpl) usize { + // Not all UTF-16 characters fit are representable in latin1. + // Those get truncated? + return this.length(); + } + pub fn refCountAllocator(self: WTFStringImpl) std.mem.Allocator { return std.mem.Allocator{ .ptr = self, .vtable = StringImplAllocator.VTablePtr }; } @@ -286,6 +297,31 @@ pub const String = extern struct { return this; } + pub fn utf8ByteLength(this: String) usize { + return switch (this.tag) { + .WTFStringImpl => this.value.WTFStringImpl.utf8ByteLength(), + .ZigString => this.value.ZigString.utf8ByteLength(), + .StaticZigString => this.value.StaticZigString.utf8ByteLength(), + .Dead, .Empty => 0, + }; + } + + pub fn utf16ByteLength(this: String) usize { + return switch (this.tag) { + .WTFStringImpl => this.value.WTFStringImpl.utf16ByteLength(), + .StaticZigString, .ZigString => this.value.ZigString.utf16ByteLength(), + .Dead, .Empty => 0, + }; + } + + pub fn latin1ByteLength(this: String) usize { + return switch (this.tag) { + .WTFStringImpl => this.value.WTFStringImpl.latin1ByteLength(), + .StaticZigString, .ZigString => this.value.ZigString.latin1ByteLength(), + .Dead, .Empty => 0, + }; + } + pub fn initWithType(comptime Type: type, value: Type) String { switch (comptime Type) { ZigString => return String{ .tag = .ZigString, .value = .{ .ZigString = value } }, @@ -431,7 +467,7 @@ pub const String = extern struct { } pub fn isUTF8(self: String) bool { - if (!self.tag == .ZigString or self.tag == .StaticZigString) + if (!(self.tag == .ZigString or self.tag == .StaticZigString)) return false; return self.value.ZigString.isUTF8(); @@ -466,11 +502,23 @@ pub const String = extern struct { return !self.value.WTFStringImpl.is8Bit(); if (self.tag == .ZigString or self.tag == .StaticZigString) - return self.value.ZigString.isUTF16(); + return self.value.ZigString.is16Bit(); return false; } + pub fn encodeInto(self: String, out: []u8, comptime enc: JSC.Node.Encoding) !usize { + if (self.isUTF16()) { + return JSC.WebCore.Encoder.encodeIntoFrom16(self.utf16(), out, enc, true); + } + + if (self.isUTF8()) { + @panic("TODO"); + } + + return JSC.WebCore.Encoder.encodeIntoFrom8(self.latin1(), out, enc); + } + pub inline fn utf8(self: String) []const u8 { if (comptime bun.Environment.allow_assert) std.debug.assert(self.canBeUTF8()); |