aboutsummaryrefslogtreecommitdiff
path: root/src/javascript
diff options
context:
space:
mode:
authorGravatar Jarred Sumner <jarred@jarredsumner.com> 2022-04-26 01:12:28 -0700
committerGravatar Jarred Sumner <jarred@jarredsumner.com> 2022-04-26 01:12:28 -0700
commit77fbfb3fbb4259150d6b59fe182ccd64e1fb9f0d (patch)
treedf930fa1bb1e7eb408762e8138aec580fed4e4e8 /src/javascript
parent6590d1f8bf09838e4530848af74990c5ba89eb81 (diff)
downloadbun-77fbfb3fbb4259150d6b59fe182ccd64e1fb9f0d.tar.gz
bun-77fbfb3fbb4259150d6b59fe182ccd64e1fb9f0d.tar.zst
bun-77fbfb3fbb4259150d6b59fe182ccd64e1fb9f0d.zip
Most of Buffer.toString
Diffstat (limited to 'src/javascript')
-rw-r--r--src/javascript/jsc/bindings/Buffer.h8
-rw-r--r--src/javascript/jsc/bindings/JSBuffer.cpp102
-rw-r--r--src/javascript/jsc/bindings/bindings.zig7
-rw-r--r--src/javascript/jsc/bindings/headers-handwritten.h1
-rw-r--r--src/javascript/jsc/bindings/wtf-bindings.cpp8
-rw-r--r--src/javascript/jsc/bindings/wtf-bindings.h3
-rw-r--r--src/javascript/jsc/webcore/encoding.zig122
7 files changed, 242 insertions, 9 deletions
diff --git a/src/javascript/jsc/bindings/Buffer.h b/src/javascript/jsc/bindings/Buffer.h
index ac57e4d25..8efc95787 100644
--- a/src/javascript/jsc/bindings/Buffer.h
+++ b/src/javascript/jsc/bindings/Buffer.h
@@ -10,6 +10,14 @@
#include "BufferEncodingType.h"
#include "JavaScriptCore/GenericTypedArrayView.h"
+extern "C" JSC__JSValue Bun__encoding__toStringUTF16(const uint8_t* input, size_t len, JSC__JSGlobalObject* globalObject);
+extern "C" JSC__JSValue Bun__encoding__toStringUTF8(const uint8_t* input, size_t len, JSC__JSGlobalObject* globalObject);
+extern "C" JSC__JSValue Bun__encoding__toStringASCII(const uint8_t* input, size_t len, JSC__JSGlobalObject* globalObject);
+extern "C" JSC__JSValue Bun__encoding__toStringLatin1(const uint8_t* input, size_t len, JSC__JSGlobalObject* globalObject);
+extern "C" JSC__JSValue Bun__encoding__toStringHex(const uint8_t* input, size_t len, JSC__JSGlobalObject* globalObject);
+extern "C" JSC__JSValue Bun__encoding__toStringBase64(const uint8_t* input, size_t len, JSC__JSGlobalObject* globalObject);
+extern "C" JSC__JSValue Bun__encoding__toStringURLSafeBase64(const uint8_t* input, size_t len, JSC__JSGlobalObject* globalObject);
+
namespace WebCore {
class Buffer final : public RefCounted<Buffer> {
diff --git a/src/javascript/jsc/bindings/JSBuffer.cpp b/src/javascript/jsc/bindings/JSBuffer.cpp
index f45fd2a9a..05cc1f23a 100644
--- a/src/javascript/jsc/bindings/JSBuffer.cpp
+++ b/src/javascript/jsc/bindings/JSBuffer.cpp
@@ -568,7 +568,107 @@ static inline JSC::EncodedJSValue jsBufferPrototypeFunction_swap64Body(JSC::JSGl
static inline JSC::EncodedJSValue jsBufferPrototypeFunction_toStringBody(JSC::JSGlobalObject* lexicalGlobalObject, JSC::CallFrame* callFrame, typename IDLOperation<JSBuffer>::ClassParameter castedThis)
{
auto& vm = JSC::getVM(lexicalGlobalObject);
- return JSC::JSValue::encode(jsUndefined());
+ uint32_t offset = 0;
+ uint32_t length = castedThis->length();
+ WebCore::BufferEncodingType encoding = WebCore::BufferEncodingType::utf8;
+
+ if (length == 0)
+ return JSC::JSValue::encode(JSC::jsEmptyString(vm));
+
+ auto scope = DECLARE_THROW_SCOPE(vm);
+
+ switch (callFrame->argumentCount()) {
+ case 0: {
+ break;
+ }
+ case 2:
+ case 3:
+ case 1: {
+ JSC::JSValue arg1 = callFrame->uncheckedArgument(0);
+ std::optional<BufferEncodingType> encoded = parseEnumeration<BufferEncodingType>(*lexicalGlobalObject, arg1);
+ if (!encoded) {
+ throwTypeError(lexicalGlobalObject, scope, "Invalid encoding");
+ return JSC::JSValue::encode(jsUndefined());
+ }
+
+ encoding = encoded.value();
+ if (callFrame->argumentCount() == 1)
+ break;
+ }
+ // any
+ case 5: {
+ JSC::JSValue arg2 = callFrame->uncheckedArgument(1);
+ int32_t ioffset = arg2.toInt32(lexicalGlobalObject);
+ if (ioffset < 0) {
+ throwTypeError(lexicalGlobalObject, scope, "Offset must be a positive integer");
+ return JSC::JSValue::encode(jsUndefined());
+ }
+ offset = static_cast<uint32_t>(ioffset);
+
+ if (callFrame->argumentCount() == 2)
+ break;
+ }
+
+ default: {
+ length = static_cast<uint32_t>(callFrame->argument(2).toInt32(lexicalGlobalObject));
+ break;
+ }
+ }
+
+ length -= std::min(offset, length);
+
+ if (UNLIKELY(length == 0)) {
+ RELEASE_AND_RETURN(scope, JSC::JSValue::encode(JSC::jsEmptyString(vm)));
+ }
+
+ JSC::EncodedJSValue ret = 0;
+
+ switch (encoding) {
+ case WebCore::BufferEncodingType::buffer:
+ case WebCore::BufferEncodingType::utf8: {
+ ret = Bun__encoding__toStringUTF8(castedThis->typedVector() + offset, length, lexicalGlobalObject);
+ break;
+ }
+
+ case WebCore::BufferEncodingType::latin1:
+ case WebCore::BufferEncodingType::ascii: {
+ ret = Bun__encoding__toStringASCII(castedThis->typedVector() + offset, length, lexicalGlobalObject);
+ break;
+ }
+
+ case WebCore::BufferEncodingType::ucs2:
+ case WebCore::BufferEncodingType::utf16le: {
+ ret = Bun__encoding__toStringUTF16(castedThis->typedVector() + offset, length, lexicalGlobalObject);
+ break;
+ }
+
+ case WebCore::BufferEncodingType::base64: {
+ ret = Bun__encoding__toStringBase64(castedThis->typedVector() + offset, length, lexicalGlobalObject);
+ break;
+ }
+
+ case WebCore::BufferEncodingType::base64url: {
+ ret = Bun__encoding__toStringURLSafeBase64(castedThis->typedVector() + offset, length, lexicalGlobalObject);
+ break;
+ }
+
+ case WebCore::BufferEncodingType::hex: {
+ ret = Bun__encoding__toStringHex(castedThis->typedVector() + offset, length, lexicalGlobalObject);
+ break;
+ }
+ default: {
+ throwTypeError(lexicalGlobalObject, scope, "Unsupported encoding? This shouldn't happen");
+ break;
+ }
+ }
+
+ JSC::JSValue retValue = JSC::JSValue::decode(ret);
+ if (UNLIKELY(!retValue.isString())) {
+ scope.throwException(lexicalGlobalObject, retValue);
+ return JSC::JSValue::encode(jsUndefined());
+ }
+
+ RELEASE_AND_RETURN(scope, JSC::JSValue::encode(retValue));
}
static inline JSC::EncodedJSValue jsBufferPrototypeFunction_writeBody(JSC::JSGlobalObject* lexicalGlobalObject, JSC::CallFrame* callFrame, typename IDLOperation<JSBuffer>::ClassParameter castedThis)
{
diff --git a/src/javascript/jsc/bindings/bindings.zig b/src/javascript/jsc/bindings/bindings.zig
index f09d56e3e..2643abfed 100644
--- a/src/javascript/jsc/bindings/bindings.zig
+++ b/src/javascript/jsc/bindings/bindings.zig
@@ -3463,6 +3463,7 @@ pub const StringView = extern struct {
pub const WTF = struct {
extern fn WTF__copyLCharsFromUCharSource(dest: [*]u8, source: *const anyopaque, len: usize) void;
+ extern fn WTF__toBase64URLStringValue(bytes: [*]const u8, length: usize, globalObject: *JSGlobalObject) JSValue;
/// This uses SSE2 instructions and/or ARM NEON to copy 16-bit characters efficiently
/// See wtf/Text/ASCIIFastPath.h for details
@@ -3472,6 +3473,12 @@ pub const WTF = struct {
// This is any alignment
WTF__copyLCharsFromUCharSource(destination, source.ptr, source.len);
}
+
+ /// Encode a byte array to a URL-safe base64 string for use with JS
+ /// Memory is managed by JavaScriptCore instead of us
+ pub fn toBase64URLStringValue(bytes: []const u8, globalObject: *JSGlobalObject) JSValue {
+ return WTF__toBase64URLStringValue(bytes.ptr, bytes.len, globalObject);
+ }
};
pub const Callback = struct {
diff --git a/src/javascript/jsc/bindings/headers-handwritten.h b/src/javascript/jsc/bindings/headers-handwritten.h
index d955e3138..3d03187b7 100644
--- a/src/javascript/jsc/bindings/headers-handwritten.h
+++ b/src/javascript/jsc/bindings/headers-handwritten.h
@@ -217,4 +217,5 @@ extern "C" int64_t Bun__encoding__writeLatin1AsUTF8(const unsigned char* ptr, si
extern "C" int64_t Bun__encoding__writeUTF16AsUTF8(const UChar* ptr, size_t len, unsigned char* to, size_t other_len);
extern "C" int64_t Bun__encoding__writeLatin1AsASCII(const unsigned char* ptr, size_t len, unsigned char* to, size_t other_len);
extern "C" int64_t Bun__encoding__writeUTF16AsASCII(const UChar* ptr, size_t len, unsigned char* to, size_t other_len);
+
#endif
diff --git a/src/javascript/jsc/bindings/wtf-bindings.cpp b/src/javascript/jsc/bindings/wtf-bindings.cpp
index 5dae85930..bc3b96f73 100644
--- a/src/javascript/jsc/bindings/wtf-bindings.cpp
+++ b/src/javascript/jsc/bindings/wtf-bindings.cpp
@@ -1,6 +1,14 @@
#include "wtf-bindings.h"
+#include "wtf/text/Base64.h"
extern "C" void WTF__copyLCharsFromUCharSource(LChar* destination, const UChar* source, size_t length)
{
WTF::copyLCharsFromUCharSource(destination, source, length);
+}
+
+extern "C" JSC::EncodedJSValue WTF__toBase64URLStringValue(const uint8_t* bytes, size_t length, JSC::JSGlobalObject* globalObject)
+{
+ WTF::String string = WTF::base64URLEncodeToString(reinterpret_cast<const LChar*>(bytes), static_cast<unsigned int>(length));
+ string.impl()->ref();
+ return JSC::JSValue::encode(JSC::jsString(globalObject->vm(), string.impl()));
} \ No newline at end of file
diff --git a/src/javascript/jsc/bindings/wtf-bindings.h b/src/javascript/jsc/bindings/wtf-bindings.h
index 3f71ff0c2..2abd398fe 100644
--- a/src/javascript/jsc/bindings/wtf-bindings.h
+++ b/src/javascript/jsc/bindings/wtf-bindings.h
@@ -3,4 +3,5 @@
#include "root.h"
#include "wtf/text/ASCIIFastPath.h"
-extern "C" void WTF__copyLCharsFromUCharSource(LChar* destination, const UChar* source, size_t length); \ No newline at end of file
+extern "C" void WTF__copyLCharsFromUCharSource(LChar* destination, const UChar* source, size_t length);
+extern "C" JSC::EncodedJSValue WTF__toBase64URLStringValue(const uint8_t* bytes, size_t length, JSC::JSGlobalObject* globalObject); \ No newline at end of file
diff --git a/src/javascript/jsc/webcore/encoding.zig b/src/javascript/jsc/webcore/encoding.zig
index 78d3f2877..b4ac59fa8 100644
--- a/src/javascript/jsc/webcore/encoding.zig
+++ b/src/javascript/jsc/webcore/encoding.zig
@@ -688,11 +688,99 @@ pub const Encoder = struct {
return writeU8(input, len, to, to_len, .ascii);
}
+ export fn Bun__encoding__toStringUTF16(input: [*]const u8, len: usize, globalObject: *JSC.JSGlobalObject) JSValue {
+ return toString(input, len, globalObject, JSC.Node.Encoding.utf16le);
+ }
+ export fn Bun__encoding__toStringUTF8(input: [*]const u8, len: usize, globalObject: *JSC.JSGlobalObject) JSValue {
+ return toString(input, len, globalObject, .utf8);
+ }
+ export fn Bun__encoding__toStringASCII(input: [*]const u8, len: usize, globalObject: *JSC.JSGlobalObject) JSValue {
+ return toString(input, len, globalObject, .ascii);
+ }
+ export fn Bun__encoding__toStringLatin1(input: [*]const u8, len: usize, globalObject: *JSC.JSGlobalObject) JSValue {
+ return toString(input, len, globalObject, .latin1);
+ }
+
+ export fn Bun__encoding__toStringHex(input: [*]const u8, len: usize, globalObject: *JSC.JSGlobalObject) JSValue {
+ return toString(input, len, globalObject, .hex);
+ }
+
+ export fn Bun__encoding__toStringBase64(input: [*]const u8, len: usize, globalObject: *JSC.JSGlobalObject) JSValue {
+ return toString(input, len, globalObject, .base64);
+ }
+
+ export fn Bun__encoding__toStringURLSafeBase64(input: [*]const u8, len: usize, globalObject: *JSC.JSGlobalObject) JSValue {
+ return toString(input, len, globalObject, .base64url);
+ }
+
// pub fn writeUTF16AsUTF8(utf16: [*]const u16, len: usize, to: [*]u8, to_len: usize) callconv(.C) i32 {
// return @intCast(i32, strings.copyUTF16IntoUTF8(to[0..to_len], []const u16, utf16[0..len]).written);
// }
- // pub fn toString(input: [*]const u8, len: usize, zig_str: *ZigString, comptime encoding: JSC.Node.Encoding) callconv(.C) i64 {}
+ pub fn toString(input_ptr: [*]const u8, len: usize, global: *JSGlobalObject, comptime encoding: JSC.Node.Encoding) JSValue {
+ if (len == 0)
+ return ZigString.Empty.toValue(global);
+
+ const input = input_ptr[0..len];
+ const allocator = VirtualMachine.vm.allocator;
+
+ switch (comptime encoding) {
+ .latin1, .ascii => {
+ var to = allocator.alloc(u8, len) catch return ZigString.init("Out of memory").toErrorInstance(global);
+
+ @memcpy(to.ptr, input_ptr, to.len);
+
+ // Hoping this gets auto vectorized
+ for (to[0..to.len]) |c, i| {
+ to[i] = @as(u8, @truncate(u7, c));
+ }
+
+ return ZigString.init(to).toExternalValue(global);
+ },
+ .buffer, .utf8 => {
+ // JSC only supports UTF-16 strings for non-ascii text
+ const converted = strings.toUTF16Alloc(allocator, input, false) catch return ZigString.init("Out of memory").toErrorInstance(global);
+ if (converted) |utf16| {
+ return ZigString.toExternalU16(utf16.ptr, utf16.len, global);
+ }
+
+ // If we get here, it means we can safely assume the string is 100% ASCII characters
+ // For this, we rely on the GC to manage the memory to minimize potential for memory leaks
+ return ZigString.init(input).toValueGC(global);
+ },
+ // potentially convert UTF-16 to UTF-8
+ JSC.Node.Encoding.ucs2, JSC.Node.Encoding.utf16le => {
+ const converted = strings.toUTF16Alloc(allocator, input, false) catch return ZigString.init("Out of memory").toErrorInstance(global);
+ if (converted) |utf16| {
+ return ZigString.toExternalU16(utf16.ptr, utf16.len, global);
+ }
+
+ var output = allocator.alloc(u8, input.len) catch return ZigString.init("Out of memory").toErrorInstance(global);
+ JSC.WTF.copyLCharsFromUCharSource(output.ptr, []align(1) const u16, @ptrCast([*]align(1) const u16, input.ptr)[0 .. input.len / 2]);
+ return ZigString.init(output).toExternalValue(global);
+ },
+
+ JSC.Node.Encoding.hex => {
+ var output = allocator.alloc(u8, input.len * 2) catch return ZigString.init("Out of memory").toErrorInstance(global);
+ const wrote = strings.encodeBytesToHex(output, input);
+ std.debug.assert(wrote == output.len);
+ var val = ZigString.init(output);
+ val.mark();
+ return val.toExternalValue(global);
+ },
+
+ JSC.Node.Encoding.base64url => {
+ return JSC.WTF.toBase64URLStringValue(input, global);
+ },
+
+ JSC.Node.Encoding.base64 => {
+ const to_len = bun.base64.encodeLen(input);
+ var to = allocator.alloc(u8, to_len) catch return ZigString.init("Out of memory").toErrorInstance(global);
+ const wrote = bun.base64.encode(to, input);
+ return ZigString.init(to[0..wrote]).toExternalValue(global);
+ },
+ }
+ }
pub fn writeU8(input: [*]const u8, len: usize, to: [*]u8, to_len: usize, comptime encoding: JSC.Node.Encoding) i64 {
if (len == 0 or to_len == 0)
@@ -706,14 +794,26 @@ pub const Encoder = struct {
// if (comptime encoding.isBinaryToText()) {}
switch (comptime encoding) {
- JSC.Node.Encoding.ascii => {
- const written = @truncate(u32, @minimum(len, to_len));
+ JSC.Node.Encoding.buffer => {
+ const written = @minimum(len, to_len);
@memcpy(to, input, written);
- return @intCast(i32, written);
+
+ return @intCast(i64, written);
+ },
+ .latin1, .ascii => {
+ const written = @minimum(len, to_len);
+ @memcpy(to, input, written);
+
+ // Hoping this gets auto vectorized
+ for (to[0..written]) |c, i| {
+ to[i] = @as(u8, @truncate(u7, c));
+ }
+
+ return @intCast(i64, written);
},
.utf8 => {
// need to encode
- return @intCast(i32, strings.copyLatin1IntoUTF8(to[0..to_len], []const u8, input[0..len]).written);
+ return @intCast(i64, strings.copyLatin1IntoUTF8(to[0..to_len], []const u8, input[0..len]).written);
},
// encode latin1 into UTF16
JSC.Node.Encoding.ucs2, JSC.Node.Encoding.utf16le => {
@@ -762,7 +862,7 @@ pub const Encoder = struct {
return @intCast(i64, bun.base64.decode(to[0..outlen], slice).written);
},
- else => return 0,
+ // else => return 0,
}
}
@@ -780,7 +880,7 @@ pub const Encoder = struct {
return @intCast(i32, strings.copyUTF16IntoUTF8(to[0..to_len], []const u16, input[0..len]).written);
},
// string is already encoded, just need to copy the data
- JSC.Node.Encoding.ucs2, JSC.Node.Encoding.ascii, JSC.Node.Encoding.utf16le => {
+ JSC.Node.Encoding.ascii, JSC.Node.Encoding.ucs2, JSC.Node.Encoding.buffer, JSC.Node.Encoding.utf16le => {
strings.copyU16IntoU8(to[0..to_len], []const u16, input[0..len]);
return @intCast(i64, @minimum(len, to_len));
@@ -868,6 +968,14 @@ pub const Encoder = struct {
_ = Bun__encoding__writeUTF16AsUTF8;
_ = Bun__encoding__writeLatin1AsASCII;
_ = Bun__encoding__writeUTF16AsASCII;
+
+ _ = Bun__encoding__toStringUTF16;
+ _ = Bun__encoding__toStringUTF8;
+ _ = Bun__encoding__toStringASCII;
+ _ = Bun__encoding__toStringLatin1;
+ _ = Bun__encoding__toStringHex;
+ _ = Bun__encoding__toStringBase64;
+ _ = Bun__encoding__toStringURLSafeBase64;
}
}
};