diff options
author | 2023-02-05 07:07:35 -0800 | |
---|---|---|
committer | 2023-02-05 07:07:35 -0800 | |
commit | ca2e708be11e9481938965ee92acc38c01a7726a (patch) | |
tree | 5af19d1f7de4807d333334f804d286f230612164 /src/bun.js | |
parent | 864913684558466f8517de9d474180a6bc19915c (diff) | |
download | bun-ca2e708be11e9481938965ee92acc38c01a7726a.tar.gz bun-ca2e708be11e9481938965ee92acc38c01a7726a.tar.zst bun-ca2e708be11e9481938965ee92acc38c01a7726a.zip |
speed it up a little more
Diffstat (limited to 'src/bun.js')
-rw-r--r-- | src/bun.js/bindings/JSBuffer.cpp | 141 | ||||
-rw-r--r-- | src/bun.js/webcore/encoding.zig | 72 |
2 files changed, 117 insertions, 96 deletions
diff --git a/src/bun.js/bindings/JSBuffer.cpp b/src/bun.js/bindings/JSBuffer.cpp index a6c735af7..2ce07617a 100644 --- a/src/bun.js/bindings/JSBuffer.cpp +++ b/src/bun.js/bindings/JSBuffer.cpp @@ -1350,6 +1350,73 @@ static inline JSC::EncodedJSValue jsBufferPrototypeFunction_swap64Body(JSC::JSGl return JSC::JSValue::encode(castedThis); } +static inline JSC::EncodedJSValue jsBufferToString(JSC::VM& vm, JSC::JSGlobalObject* lexicalGlobalObject, JSC::JSUint8Array* castedThis, size_t offset, size_t length, WebCore::BufferEncodingType encoding) +{ + auto scope = DECLARE_THROW_SCOPE(vm); + + if (UNLIKELY(length == 0)) { + RELEASE_AND_RETURN(scope, JSC::JSValue::encode(JSC::jsEmptyString(vm))); + } + + JSC::EncodedJSValue ret = 0; + + switch (encoding) { + case WebCore::BufferEncodingType::latin1: { + LChar* data = nullptr; + auto str = String::createUninitialized(length, data); + memcpy(data, reinterpret_cast<const char*>(castedThis->typedVector() + offset), length); + return JSC::JSValue::encode(JSC::jsString(vm, WTFMove(str))); + } + + case WebCore::BufferEncodingType::ucs2: + case WebCore::BufferEncodingType::utf16le: { + UChar* data = nullptr; + size_t u16length = length / 2; + if (u16length == 0) { + return JSC::JSValue::encode(JSC::jsEmptyString(vm)); + } else { + auto str = String::createUninitialized(u16length, data); + // always zero out the last byte of the string incase the buffer is not a multiple of 2 + data[u16length - 1] = 0; + memcpy(data, reinterpret_cast<const char*>(castedThis->typedVector() + offset), length); + return JSC::JSValue::encode(JSC::jsString(vm, WTFMove(str))); + } + + break; + } + + case WebCore::BufferEncodingType::ascii: { + // ascii: we always know the length + // so we might as well allocate upfront + LChar* data = nullptr; + auto str = String::createUninitialized(length, data); + Bun__encoding__writeLatin1(castedThis->typedVector() + offset, length, data, length, static_cast<uint8_t>(encoding)); + return JSC::JSValue::encode(JSC::jsString(vm, WTFMove(str))); + } + + case WebCore::BufferEncodingType::buffer: + case WebCore::BufferEncodingType::utf8: + case WebCore::BufferEncodingType::base64: + case WebCore::BufferEncodingType::base64url: + case WebCore::BufferEncodingType::hex: { + ret = Bun__encoding__toString(castedThis->typedVector() + offset, length, lexicalGlobalObject, static_cast<uint8_t>(encoding)); + break; + } + default: { + throwTypeError(lexicalGlobalObject, scope, "Unsupported encoding? This shouldn't happen"_s); + break; + } + } + + JSC::JSValue retValue = JSC::JSValue::decode(ret); + if (UNLIKELY(!retValue.isString())) { + scope.throwException(lexicalGlobalObject, retValue); + return JSC::JSValue::encode(jsUndefined()); + } + + RELEASE_AND_RETURN(scope, JSC::JSValue::encode(retValue)); +} + static inline JSC::EncodedJSValue jsBufferPrototypeFunction_toStringBody(JSC::JSGlobalObject* lexicalGlobalObject, JSC::CallFrame* callFrame, typename IDLOperation<JSArrayBufferView>::ClassParameter castedThis) { auto& vm = JSC::getVM(lexicalGlobalObject); @@ -1361,8 +1428,6 @@ static inline JSC::EncodedJSValue jsBufferPrototypeFunction_toStringBody(JSC::JS if (length == 0) return JSC::JSValue::encode(JSC::jsEmptyString(vm)); - auto scope = DECLARE_THROW_SCOPE(vm); - switch (callFrame->argumentCount()) { case 0: { break; @@ -1374,6 +1439,8 @@ static inline JSC::EncodedJSValue jsBufferPrototypeFunction_toStringBody(JSC::JS if (arg1.value().isString()) { std::optional<BufferEncodingType> encoded = parseEnumeration<BufferEncodingType>(*lexicalGlobalObject, arg1.value()); if (!encoded) { + auto scope = DECLARE_THROW_SCOPE(vm); + throwTypeError(lexicalGlobalObject, scope, "Invalid encoding"_s); return JSC::JSValue::encode(jsUndefined()); } @@ -1389,6 +1456,8 @@ static inline JSC::EncodedJSValue jsBufferPrototypeFunction_toStringBody(JSC::JS JSC::JSValue arg2 = callFrame->uncheckedArgument(1); int32_t ioffset = arg2.toInt32(lexicalGlobalObject); if (ioffset < 0) { + auto scope = DECLARE_THROW_SCOPE(vm); + throwTypeError(lexicalGlobalObject, scope, "Offset must be a positive integer"_s); return JSC::JSValue::encode(jsUndefined()); } @@ -1406,61 +1475,33 @@ static inline JSC::EncodedJSValue jsBufferPrototypeFunction_toStringBody(JSC::JS length -= std::min(offset, length); - if (UNLIKELY(length == 0)) { - RELEASE_AND_RETURN(scope, JSC::JSValue::encode(JSC::jsEmptyString(vm))); - } + return jsBufferToString(vm, lexicalGlobalObject, castedThis, offset, length, encoding); +} - JSC::EncodedJSValue ret = 0; +// DOMJIT makes it slower! TODO: investigate why +// JSC_DECLARE_JIT_OPERATION_WITHOUT_WTF_INTERNAL(jsBufferPrototypeToStringWithoutTypeChecks, JSValue, (JSC::JSGlobalObject * lexicalGlobalObject, JSC::JSUint8Array* thisValue, JSC::JSString* encodingValue)); - switch (encoding) { - case WebCore::BufferEncodingType::latin1: { - LChar* data = nullptr; - auto str = String::createUninitialized(length, data); - memcpy(data, reinterpret_cast<const char*>(castedThis->typedVector() + offset), length); - ret = JSC::JSValue::encode(JSC::jsString(vm, WTFMove(str))); - break; - } +// JSC_DEFINE_JIT_OPERATION(jsBufferPrototypeToStringWithoutTypeChecks, JSValue, (JSC::JSGlobalObject * lexicalGlobalObject, JSUint8Array* thisValue, JSString* encodingValue)) +// { +// VM& vm = JSC::getVM(lexicalGlobalObject); +// IGNORE_WARNINGS_BEGIN("frame-address") +// CallFrame* callFrame = DECLARE_CALL_FRAME(vm); +// IGNORE_WARNINGS_END +// JSC::JITOperationPrologueCallFrameTracer tracer(vm, callFrame); - case WebCore::BufferEncodingType::ucs2: - case WebCore::BufferEncodingType::utf16le: { - UChar* data = nullptr; - size_t u16length = length / 2; - if (u16length == 0) { - ret = JSC::JSValue::encode(JSC::jsEmptyString(vm)); - } else { - auto str = String::createUninitialized(u16length, data); - // always zero out the last byte of the string incase the buffer is not a multiple of 2 - data[u16length - 1] = 0; - memcpy(data, reinterpret_cast<const char*>(castedThis->typedVector() + offset), length); - ret = JSC::JSValue::encode(JSC::jsString(vm, WTFMove(str))); - } +// std::optional<BufferEncodingType> encoded = parseEnumeration<BufferEncodingType>(*lexicalGlobalObject, encodingValue); +// if (!encoded) { +// auto scope = DECLARE_THROW_SCOPE(vm); - break; - } +// throwTypeError(lexicalGlobalObject, scope, "Invalid encoding"_s); +// return {}; +// } - case WebCore::BufferEncodingType::buffer: - case WebCore::BufferEncodingType::utf8: - case WebCore::BufferEncodingType::ascii: - case WebCore::BufferEncodingType::base64: - case WebCore::BufferEncodingType::base64url: - case WebCore::BufferEncodingType::hex: { - ret = Bun__encoding__toString(castedThis->typedVector() + offset, length, lexicalGlobalObject, static_cast<uint8_t>(encoding)); - break; - } - default: { - throwTypeError(lexicalGlobalObject, scope, "Unsupported encoding? This shouldn't happen"_s); - break; - } - } +// auto encoding = encoded.value(); - JSC::JSValue retValue = JSC::JSValue::decode(ret); - if (UNLIKELY(!retValue.isString())) { - scope.throwException(lexicalGlobalObject, retValue); - return JSC::JSValue::encode(jsUndefined()); - } +// return JSValue::decode(jsBufferToString(vm, lexicalGlobalObject, thisValue, 0, thisValue->byteLength(), encoding)); +// } - RELEASE_AND_RETURN(scope, JSC::JSValue::encode(retValue)); -} static inline JSC::EncodedJSValue jsBufferPrototypeFunction_writeBody(JSC::JSGlobalObject* lexicalGlobalObject, JSC::CallFrame* callFrame, typename IDLOperation<JSArrayBufferView>::ClassParameter castedThis) { auto& vm = JSC::getVM(lexicalGlobalObject); diff --git a/src/bun.js/webcore/encoding.zig b/src/bun.js/webcore/encoding.zig index 3172966ef..8a6e3224b 100644 --- a/src/bun.js/webcore/encoding.zig +++ b/src/bun.js/webcore/encoding.zig @@ -795,43 +795,20 @@ pub const Encoder = struct { switch (comptime encoding) { .ascii => { - var to = allocator.alloc(u8, len) catch return ZigString.init("Out of memory").toErrorInstance(global); - var complete = to; - var remain = input; - - if (comptime bun.Environment.enableSIMD) { - const vector_size = 16; - // https://zig.godbolt.org/z/qezsY8T3W - var remain_in_u64 = remain[0 .. remain.len - (remain.len % vector_size)]; - var to_in_u64 = to[0 .. to.len - (to.len % vector_size)]; - var remain_as_u64 = std.mem.bytesAsSlice(u64, remain_in_u64); - var to_as_u64 = std.mem.bytesAsSlice(u64, to_in_u64); - const inner_vector_size = vector_size / 8; - const end_vector_len = @min(remain_as_u64.len, to_as_u64.len); - remain_as_u64 = remain_as_u64[0..end_vector_len]; - to_as_u64 = to_as_u64[0..end_vector_len]; - const end_ptr = remain_as_u64.ptr + remain_as_u64.len; - // using the pointer instead of the length is super important for the codegen - while (end_ptr != remain_as_u64.ptr) { - const buf = @as(@Vector(inner_vector_size, u64), remain_as_u64[0..inner_vector_size].*); - const mask = @splat(inner_vector_size, @as(u64, 0x7f7f7f7f7f7f7f7f)); - to_as_u64[0..inner_vector_size].* = buf & mask; - - remain_as_u64 = remain_as_u64[inner_vector_size..]; - to_as_u64 = to_as_u64[inner_vector_size..]; - } - remain = remain[remain_in_u64.len..]; - to = to[to_in_u64.len..]; + if (bun.simdutf.validate.ascii(input)) { + return ZigString.init(input).toValueGC(global); } - const end_ptr = to.ptr + to.len; - while (to.ptr != end_ptr) { - to[0] = @as(u8, @truncate(u7, remain[0])); - to = to[1..]; - remain = remain[1..]; + if (input.len < 512) { + var buf: [512]u8 = undefined; + var to = buf[0..input.len]; + strings.copyLatin1IntoASCII(to, input); + return ZigString.init(to).toValueGC(global); } - return ZigString.init(complete).toExternalValue(global); + var to = allocator.alloc(u8, len) catch return ZigString.init("Out of memory").toErrorInstance(global); + strings.copyLatin1IntoASCII(to, input); + return ZigString.init(to).toExternalValue(global); }, .latin1 => { var to = allocator.alloc(u8, len) catch return ZigString.init("Out of memory").toErrorInstance(global); @@ -884,7 +861,7 @@ pub const Encoder = struct { } } - pub fn writeU8(input: [*]const u8, len: usize, to: [*]u8, to_len: usize, comptime encoding: JSC.Node.Encoding) i64 { + pub fn writeU8(input: [*]const u8, len: usize, to_ptr: [*]u8, to_len: usize, comptime encoding: JSC.Node.Encoding) i64 { if (len == 0 or to_len == 0) return 0; @@ -898,39 +875,42 @@ pub const Encoder = struct { switch (comptime encoding) { JSC.Node.Encoding.buffer => { const written = @min(len, to_len); - @memcpy(to, input, written); + @memcpy(to_ptr, input, written); return @intCast(i64, written); }, .latin1, .ascii => { const written = @min(len, to_len); - @memcpy(to, input, written); - // Hoping this gets auto vectorized - for (to[0..written]) |c, i| { - to[i] = @as(u8, @truncate(u7, c)); + var to = to_ptr[0..written]; + var remain = input[0..written]; + + if (bun.simdutf.validate.ascii(remain)) { + @memcpy(to.ptr, remain.ptr, written); + } else { + strings.copyLatin1IntoASCII(to, remain); } return @intCast(i64, written); }, .utf8 => { // need to encode - return @intCast(i64, strings.copyLatin1IntoUTF8(to[0..to_len], []const u8, input[0..len]).written); + return @intCast(i64, strings.copyLatin1IntoUTF8(to_ptr[0..to_len], []const u8, input[0..len]).written); }, // encode latin1 into UTF16 JSC.Node.Encoding.ucs2, JSC.Node.Encoding.utf16le => { if (to_len < 2) return 0; - if (std.mem.isAligned(@ptrToInt(to), @alignOf([*]u16))) { + if (std.mem.isAligned(@ptrToInt(to_ptr), @alignOf([*]u16))) { var buf = input[0..len]; - var output = @ptrCast([*]u16, @alignCast(@alignOf(u16), to))[0 .. to_len / 2]; + var output = @ptrCast([*]u16, @alignCast(@alignOf(u16), to_ptr))[0 .. to_len / 2]; var written = strings.copyLatin1IntoUTF16([]u16, output, []const u8, buf).written; return written * 2; } else { var buf = input[0..len]; - var output = @ptrCast([*]align(1) u16, to)[0 .. to_len / 2]; + var output = @ptrCast([*]align(1) u16, to_ptr)[0 .. to_len / 2]; var written = strings.copyLatin1IntoUTF16([]align(1) u16, output, []const u8, buf).written; return written * 2; @@ -938,7 +918,7 @@ pub const Encoder = struct { }, JSC.Node.Encoding.hex => { - return @intCast(i64, strings.decodeHexToBytes(to[0..to_len], u8, input[0..len])); + return @intCast(i64, strings.decodeHexToBytes(to_ptr[0..to_len], u8, input[0..len])); }, JSC.Node.Encoding.base64url => { @@ -952,12 +932,12 @@ pub const Encoder = struct { slice = slice[0 .. slice.len - 1]; } - const wrote = bun.base64.decodeURLSafe(to[0..to_len], slice).written; + const wrote = bun.base64.decodeURLSafe(to_ptr[0..to_len], slice).written; return @intCast(i64, wrote); }, JSC.Node.Encoding.base64 => { - return @intCast(i64, bun.base64.decode(to[0..to_len], input[0..len]).written); + return @intCast(i64, bun.base64.decode(to_ptr[0..to_len], input[0..len]).written); }, // else => return 0, } |