diff options
author | 2022-04-26 01:12:28 -0700 | |
---|---|---|
committer | 2022-04-26 01:12:28 -0700 | |
commit | 77fbfb3fbb4259150d6b59fe182ccd64e1fb9f0d (patch) | |
tree | df930fa1bb1e7eb408762e8138aec580fed4e4e8 /src/javascript | |
parent | 6590d1f8bf09838e4530848af74990c5ba89eb81 (diff) | |
download | bun-77fbfb3fbb4259150d6b59fe182ccd64e1fb9f0d.tar.gz bun-77fbfb3fbb4259150d6b59fe182ccd64e1fb9f0d.tar.zst bun-77fbfb3fbb4259150d6b59fe182ccd64e1fb9f0d.zip |
Most of Buffer.toString
Diffstat (limited to 'src/javascript')
-rw-r--r-- | src/javascript/jsc/bindings/Buffer.h | 8 | ||||
-rw-r--r-- | src/javascript/jsc/bindings/JSBuffer.cpp | 102 | ||||
-rw-r--r-- | src/javascript/jsc/bindings/bindings.zig | 7 | ||||
-rw-r--r-- | src/javascript/jsc/bindings/headers-handwritten.h | 1 | ||||
-rw-r--r-- | src/javascript/jsc/bindings/wtf-bindings.cpp | 8 | ||||
-rw-r--r-- | src/javascript/jsc/bindings/wtf-bindings.h | 3 | ||||
-rw-r--r-- | src/javascript/jsc/webcore/encoding.zig | 122 |
7 files changed, 242 insertions, 9 deletions
diff --git a/src/javascript/jsc/bindings/Buffer.h b/src/javascript/jsc/bindings/Buffer.h index ac57e4d25..8efc95787 100644 --- a/src/javascript/jsc/bindings/Buffer.h +++ b/src/javascript/jsc/bindings/Buffer.h @@ -10,6 +10,14 @@ #include "BufferEncodingType.h" #include "JavaScriptCore/GenericTypedArrayView.h" +extern "C" JSC__JSValue Bun__encoding__toStringUTF16(const uint8_t* input, size_t len, JSC__JSGlobalObject* globalObject); +extern "C" JSC__JSValue Bun__encoding__toStringUTF8(const uint8_t* input, size_t len, JSC__JSGlobalObject* globalObject); +extern "C" JSC__JSValue Bun__encoding__toStringASCII(const uint8_t* input, size_t len, JSC__JSGlobalObject* globalObject); +extern "C" JSC__JSValue Bun__encoding__toStringLatin1(const uint8_t* input, size_t len, JSC__JSGlobalObject* globalObject); +extern "C" JSC__JSValue Bun__encoding__toStringHex(const uint8_t* input, size_t len, JSC__JSGlobalObject* globalObject); +extern "C" JSC__JSValue Bun__encoding__toStringBase64(const uint8_t* input, size_t len, JSC__JSGlobalObject* globalObject); +extern "C" JSC__JSValue Bun__encoding__toStringURLSafeBase64(const uint8_t* input, size_t len, JSC__JSGlobalObject* globalObject); + namespace WebCore { class Buffer final : public RefCounted<Buffer> { diff --git a/src/javascript/jsc/bindings/JSBuffer.cpp b/src/javascript/jsc/bindings/JSBuffer.cpp index f45fd2a9a..05cc1f23a 100644 --- a/src/javascript/jsc/bindings/JSBuffer.cpp +++ b/src/javascript/jsc/bindings/JSBuffer.cpp @@ -568,7 +568,107 @@ static inline JSC::EncodedJSValue jsBufferPrototypeFunction_swap64Body(JSC::JSGl static inline JSC::EncodedJSValue jsBufferPrototypeFunction_toStringBody(JSC::JSGlobalObject* lexicalGlobalObject, JSC::CallFrame* callFrame, typename IDLOperation<JSBuffer>::ClassParameter castedThis) { auto& vm = JSC::getVM(lexicalGlobalObject); - return JSC::JSValue::encode(jsUndefined()); + uint32_t offset = 0; + uint32_t length = castedThis->length(); + WebCore::BufferEncodingType encoding = WebCore::BufferEncodingType::utf8; + + if (length == 0) + return JSC::JSValue::encode(JSC::jsEmptyString(vm)); + + auto scope = DECLARE_THROW_SCOPE(vm); + + switch (callFrame->argumentCount()) { + case 0: { + break; + } + case 2: + case 3: + case 1: { + JSC::JSValue arg1 = callFrame->uncheckedArgument(0); + std::optional<BufferEncodingType> encoded = parseEnumeration<BufferEncodingType>(*lexicalGlobalObject, arg1); + if (!encoded) { + throwTypeError(lexicalGlobalObject, scope, "Invalid encoding"); + return JSC::JSValue::encode(jsUndefined()); + } + + encoding = encoded.value(); + if (callFrame->argumentCount() == 1) + break; + } + // any + case 5: { + JSC::JSValue arg2 = callFrame->uncheckedArgument(1); + int32_t ioffset = arg2.toInt32(lexicalGlobalObject); + if (ioffset < 0) { + throwTypeError(lexicalGlobalObject, scope, "Offset must be a positive integer"); + return JSC::JSValue::encode(jsUndefined()); + } + offset = static_cast<uint32_t>(ioffset); + + if (callFrame->argumentCount() == 2) + break; + } + + default: { + length = static_cast<uint32_t>(callFrame->argument(2).toInt32(lexicalGlobalObject)); + break; + } + } + + length -= std::min(offset, length); + + if (UNLIKELY(length == 0)) { + RELEASE_AND_RETURN(scope, JSC::JSValue::encode(JSC::jsEmptyString(vm))); + } + + JSC::EncodedJSValue ret = 0; + + switch (encoding) { + case WebCore::BufferEncodingType::buffer: + case WebCore::BufferEncodingType::utf8: { + ret = Bun__encoding__toStringUTF8(castedThis->typedVector() + offset, length, lexicalGlobalObject); + break; + } + + case WebCore::BufferEncodingType::latin1: + case WebCore::BufferEncodingType::ascii: { + ret = Bun__encoding__toStringASCII(castedThis->typedVector() + offset, length, lexicalGlobalObject); + break; + } + + case WebCore::BufferEncodingType::ucs2: + case WebCore::BufferEncodingType::utf16le: { + ret = Bun__encoding__toStringUTF16(castedThis->typedVector() + offset, length, lexicalGlobalObject); + break; + } + + case WebCore::BufferEncodingType::base64: { + ret = Bun__encoding__toStringBase64(castedThis->typedVector() + offset, length, lexicalGlobalObject); + break; + } + + case WebCore::BufferEncodingType::base64url: { + ret = Bun__encoding__toStringURLSafeBase64(castedThis->typedVector() + offset, length, lexicalGlobalObject); + break; + } + + case WebCore::BufferEncodingType::hex: { + ret = Bun__encoding__toStringHex(castedThis->typedVector() + offset, length, lexicalGlobalObject); + break; + } + default: { + throwTypeError(lexicalGlobalObject, scope, "Unsupported encoding? This shouldn't happen"); + break; + } + } + + JSC::JSValue retValue = JSC::JSValue::decode(ret); + if (UNLIKELY(!retValue.isString())) { + scope.throwException(lexicalGlobalObject, retValue); + return JSC::JSValue::encode(jsUndefined()); + } + + RELEASE_AND_RETURN(scope, JSC::JSValue::encode(retValue)); } static inline JSC::EncodedJSValue jsBufferPrototypeFunction_writeBody(JSC::JSGlobalObject* lexicalGlobalObject, JSC::CallFrame* callFrame, typename IDLOperation<JSBuffer>::ClassParameter castedThis) { diff --git a/src/javascript/jsc/bindings/bindings.zig b/src/javascript/jsc/bindings/bindings.zig index f09d56e3e..2643abfed 100644 --- a/src/javascript/jsc/bindings/bindings.zig +++ b/src/javascript/jsc/bindings/bindings.zig @@ -3463,6 +3463,7 @@ pub const StringView = extern struct { pub const WTF = struct { extern fn WTF__copyLCharsFromUCharSource(dest: [*]u8, source: *const anyopaque, len: usize) void; + extern fn WTF__toBase64URLStringValue(bytes: [*]const u8, length: usize, globalObject: *JSGlobalObject) JSValue; /// This uses SSE2 instructions and/or ARM NEON to copy 16-bit characters efficiently /// See wtf/Text/ASCIIFastPath.h for details @@ -3472,6 +3473,12 @@ pub const WTF = struct { // This is any alignment WTF__copyLCharsFromUCharSource(destination, source.ptr, source.len); } + + /// Encode a byte array to a URL-safe base64 string for use with JS + /// Memory is managed by JavaScriptCore instead of us + pub fn toBase64URLStringValue(bytes: []const u8, globalObject: *JSGlobalObject) JSValue { + return WTF__toBase64URLStringValue(bytes.ptr, bytes.len, globalObject); + } }; pub const Callback = struct { diff --git a/src/javascript/jsc/bindings/headers-handwritten.h b/src/javascript/jsc/bindings/headers-handwritten.h index d955e3138..3d03187b7 100644 --- a/src/javascript/jsc/bindings/headers-handwritten.h +++ b/src/javascript/jsc/bindings/headers-handwritten.h @@ -217,4 +217,5 @@ extern "C" int64_t Bun__encoding__writeLatin1AsUTF8(const unsigned char* ptr, si extern "C" int64_t Bun__encoding__writeUTF16AsUTF8(const UChar* ptr, size_t len, unsigned char* to, size_t other_len); extern "C" int64_t Bun__encoding__writeLatin1AsASCII(const unsigned char* ptr, size_t len, unsigned char* to, size_t other_len); extern "C" int64_t Bun__encoding__writeUTF16AsASCII(const UChar* ptr, size_t len, unsigned char* to, size_t other_len); + #endif diff --git a/src/javascript/jsc/bindings/wtf-bindings.cpp b/src/javascript/jsc/bindings/wtf-bindings.cpp index 5dae85930..bc3b96f73 100644 --- a/src/javascript/jsc/bindings/wtf-bindings.cpp +++ b/src/javascript/jsc/bindings/wtf-bindings.cpp @@ -1,6 +1,14 @@ #include "wtf-bindings.h" +#include "wtf/text/Base64.h" extern "C" void WTF__copyLCharsFromUCharSource(LChar* destination, const UChar* source, size_t length) { WTF::copyLCharsFromUCharSource(destination, source, length); +} + +extern "C" JSC::EncodedJSValue WTF__toBase64URLStringValue(const uint8_t* bytes, size_t length, JSC::JSGlobalObject* globalObject) +{ + WTF::String string = WTF::base64URLEncodeToString(reinterpret_cast<const LChar*>(bytes), static_cast<unsigned int>(length)); + string.impl()->ref(); + return JSC::JSValue::encode(JSC::jsString(globalObject->vm(), string.impl())); }
\ No newline at end of file diff --git a/src/javascript/jsc/bindings/wtf-bindings.h b/src/javascript/jsc/bindings/wtf-bindings.h index 3f71ff0c2..2abd398fe 100644 --- a/src/javascript/jsc/bindings/wtf-bindings.h +++ b/src/javascript/jsc/bindings/wtf-bindings.h @@ -3,4 +3,5 @@ #include "root.h" #include "wtf/text/ASCIIFastPath.h" -extern "C" void WTF__copyLCharsFromUCharSource(LChar* destination, const UChar* source, size_t length);
\ No newline at end of file +extern "C" void WTF__copyLCharsFromUCharSource(LChar* destination, const UChar* source, size_t length); +extern "C" JSC::EncodedJSValue WTF__toBase64URLStringValue(const uint8_t* bytes, size_t length, JSC::JSGlobalObject* globalObject);
\ No newline at end of file diff --git a/src/javascript/jsc/webcore/encoding.zig b/src/javascript/jsc/webcore/encoding.zig index 78d3f2877..b4ac59fa8 100644 --- a/src/javascript/jsc/webcore/encoding.zig +++ b/src/javascript/jsc/webcore/encoding.zig @@ -688,11 +688,99 @@ pub const Encoder = struct { return writeU8(input, len, to, to_len, .ascii); } + export fn Bun__encoding__toStringUTF16(input: [*]const u8, len: usize, globalObject: *JSC.JSGlobalObject) JSValue { + return toString(input, len, globalObject, JSC.Node.Encoding.utf16le); + } + export fn Bun__encoding__toStringUTF8(input: [*]const u8, len: usize, globalObject: *JSC.JSGlobalObject) JSValue { + return toString(input, len, globalObject, .utf8); + } + export fn Bun__encoding__toStringASCII(input: [*]const u8, len: usize, globalObject: *JSC.JSGlobalObject) JSValue { + return toString(input, len, globalObject, .ascii); + } + export fn Bun__encoding__toStringLatin1(input: [*]const u8, len: usize, globalObject: *JSC.JSGlobalObject) JSValue { + return toString(input, len, globalObject, .latin1); + } + + export fn Bun__encoding__toStringHex(input: [*]const u8, len: usize, globalObject: *JSC.JSGlobalObject) JSValue { + return toString(input, len, globalObject, .hex); + } + + export fn Bun__encoding__toStringBase64(input: [*]const u8, len: usize, globalObject: *JSC.JSGlobalObject) JSValue { + return toString(input, len, globalObject, .base64); + } + + export fn Bun__encoding__toStringURLSafeBase64(input: [*]const u8, len: usize, globalObject: *JSC.JSGlobalObject) JSValue { + return toString(input, len, globalObject, .base64url); + } + // pub fn writeUTF16AsUTF8(utf16: [*]const u16, len: usize, to: [*]u8, to_len: usize) callconv(.C) i32 { // return @intCast(i32, strings.copyUTF16IntoUTF8(to[0..to_len], []const u16, utf16[0..len]).written); // } - // pub fn toString(input: [*]const u8, len: usize, zig_str: *ZigString, comptime encoding: JSC.Node.Encoding) callconv(.C) i64 {} + pub fn toString(input_ptr: [*]const u8, len: usize, global: *JSGlobalObject, comptime encoding: JSC.Node.Encoding) JSValue { + if (len == 0) + return ZigString.Empty.toValue(global); + + const input = input_ptr[0..len]; + const allocator = VirtualMachine.vm.allocator; + + switch (comptime encoding) { + .latin1, .ascii => { + var to = allocator.alloc(u8, len) catch return ZigString.init("Out of memory").toErrorInstance(global); + + @memcpy(to.ptr, input_ptr, to.len); + + // Hoping this gets auto vectorized + for (to[0..to.len]) |c, i| { + to[i] = @as(u8, @truncate(u7, c)); + } + + return ZigString.init(to).toExternalValue(global); + }, + .buffer, .utf8 => { + // JSC only supports UTF-16 strings for non-ascii text + const converted = strings.toUTF16Alloc(allocator, input, false) catch return ZigString.init("Out of memory").toErrorInstance(global); + if (converted) |utf16| { + return ZigString.toExternalU16(utf16.ptr, utf16.len, global); + } + + // If we get here, it means we can safely assume the string is 100% ASCII characters + // For this, we rely on the GC to manage the memory to minimize potential for memory leaks + return ZigString.init(input).toValueGC(global); + }, + // potentially convert UTF-16 to UTF-8 + JSC.Node.Encoding.ucs2, JSC.Node.Encoding.utf16le => { + const converted = strings.toUTF16Alloc(allocator, input, false) catch return ZigString.init("Out of memory").toErrorInstance(global); + if (converted) |utf16| { + return ZigString.toExternalU16(utf16.ptr, utf16.len, global); + } + + var output = allocator.alloc(u8, input.len) catch return ZigString.init("Out of memory").toErrorInstance(global); + JSC.WTF.copyLCharsFromUCharSource(output.ptr, []align(1) const u16, @ptrCast([*]align(1) const u16, input.ptr)[0 .. input.len / 2]); + return ZigString.init(output).toExternalValue(global); + }, + + JSC.Node.Encoding.hex => { + var output = allocator.alloc(u8, input.len * 2) catch return ZigString.init("Out of memory").toErrorInstance(global); + const wrote = strings.encodeBytesToHex(output, input); + std.debug.assert(wrote == output.len); + var val = ZigString.init(output); + val.mark(); + return val.toExternalValue(global); + }, + + JSC.Node.Encoding.base64url => { + return JSC.WTF.toBase64URLStringValue(input, global); + }, + + JSC.Node.Encoding.base64 => { + const to_len = bun.base64.encodeLen(input); + var to = allocator.alloc(u8, to_len) catch return ZigString.init("Out of memory").toErrorInstance(global); + const wrote = bun.base64.encode(to, input); + return ZigString.init(to[0..wrote]).toExternalValue(global); + }, + } + } pub fn writeU8(input: [*]const u8, len: usize, to: [*]u8, to_len: usize, comptime encoding: JSC.Node.Encoding) i64 { if (len == 0 or to_len == 0) @@ -706,14 +794,26 @@ pub const Encoder = struct { // if (comptime encoding.isBinaryToText()) {} switch (comptime encoding) { - JSC.Node.Encoding.ascii => { - const written = @truncate(u32, @minimum(len, to_len)); + JSC.Node.Encoding.buffer => { + const written = @minimum(len, to_len); @memcpy(to, input, written); - return @intCast(i32, written); + + return @intCast(i64, written); + }, + .latin1, .ascii => { + const written = @minimum(len, to_len); + @memcpy(to, input, written); + + // Hoping this gets auto vectorized + for (to[0..written]) |c, i| { + to[i] = @as(u8, @truncate(u7, c)); + } + + return @intCast(i64, written); }, .utf8 => { // need to encode - return @intCast(i32, strings.copyLatin1IntoUTF8(to[0..to_len], []const u8, input[0..len]).written); + return @intCast(i64, strings.copyLatin1IntoUTF8(to[0..to_len], []const u8, input[0..len]).written); }, // encode latin1 into UTF16 JSC.Node.Encoding.ucs2, JSC.Node.Encoding.utf16le => { @@ -762,7 +862,7 @@ pub const Encoder = struct { return @intCast(i64, bun.base64.decode(to[0..outlen], slice).written); }, - else => return 0, + // else => return 0, } } @@ -780,7 +880,7 @@ pub const Encoder = struct { return @intCast(i32, strings.copyUTF16IntoUTF8(to[0..to_len], []const u16, input[0..len]).written); }, // string is already encoded, just need to copy the data - JSC.Node.Encoding.ucs2, JSC.Node.Encoding.ascii, JSC.Node.Encoding.utf16le => { + JSC.Node.Encoding.ascii, JSC.Node.Encoding.ucs2, JSC.Node.Encoding.buffer, JSC.Node.Encoding.utf16le => { strings.copyU16IntoU8(to[0..to_len], []const u16, input[0..len]); return @intCast(i64, @minimum(len, to_len)); @@ -868,6 +968,14 @@ pub const Encoder = struct { _ = Bun__encoding__writeUTF16AsUTF8; _ = Bun__encoding__writeLatin1AsASCII; _ = Bun__encoding__writeUTF16AsASCII; + + _ = Bun__encoding__toStringUTF16; + _ = Bun__encoding__toStringUTF8; + _ = Bun__encoding__toStringASCII; + _ = Bun__encoding__toStringLatin1; + _ = Bun__encoding__toStringHex; + _ = Bun__encoding__toStringBase64; + _ = Bun__encoding__toStringURLSafeBase64; } } }; |