diff options
author | 2022-05-10 19:09:28 -0700 | |
---|---|---|
committer | 2022-05-10 19:09:28 -0700 | |
commit | 8def37c14e59b381d7165cd2bd79e8499e52ecf1 (patch) | |
tree | d67a766833c119f2e34234517351f5fd3f5ada99 | |
parent | b3760cd72349eb743c5dcfd679f3b64612d6b7ff (diff) | |
download | bun-8def37c14e59b381d7165cd2bd79e8499e52ecf1.tar.gz bun-8def37c14e59b381d7165cd2bd79e8499e52ecf1.tar.zst bun-8def37c14e59b381d7165cd2bd79e8499e52ecf1.zip |
[bun.js] Implement `Buffer.byteLength`
-rw-r--r-- | src/javascript/jsc/bindings/JSBuffer.cpp | 103 | ||||
-rw-r--r-- | src/javascript/jsc/bindings/headers-cpp.h | 2 | ||||
-rw-r--r-- | src/javascript/jsc/bindings/headers-handwritten.h | 13 | ||||
-rw-r--r-- | src/javascript/jsc/bindings/headers.h | 2 | ||||
-rw-r--r-- | src/javascript/jsc/bindings/napi.cpp | 142 | ||||
-rw-r--r-- | src/javascript/jsc/webcore/encoding.zig | 110 | ||||
-rw-r--r-- | src/string_immutable.zig | 118 |
7 files changed, 443 insertions, 47 deletions
diff --git a/src/javascript/jsc/bindings/JSBuffer.cpp b/src/javascript/jsc/bindings/JSBuffer.cpp index e67e39f19..301663613 100644 --- a/src/javascript/jsc/bindings/JSBuffer.cpp +++ b/src/javascript/jsc/bindings/JSBuffer.cpp @@ -360,7 +360,108 @@ static inline JSC::EncodedJSValue jsBufferConstructorFunction_allocUnsafeSlowBod static inline JSC::EncodedJSValue jsBufferConstructorFunction_byteLengthBody(JSC::JSGlobalObject* lexicalGlobalObject, JSC::CallFrame* callFrame, typename IDLOperation<JSBuffer>::ClassParameter castedThis) { auto& vm = JSC::getVM(lexicalGlobalObject); - return JSValue::encode(jsUndefined()); + + uint32_t offset = 0; + uint32_t length = castedThis->length(); + WebCore::BufferEncodingType encoding = WebCore::BufferEncodingType::utf8; + + auto scope = DECLARE_THROW_SCOPE(vm); + + if (UNLIKELY(callFrame->argumentCount() == 0)) { + throwTypeError(lexicalGlobalObject, scope, "Not enough arguments"_s); + return JSC::JSValue::encode(jsUndefined()); + } + + EnsureStillAliveScope arg0 = callFrame->argument(0); + auto input = arg0.value(); + if (JSC::JSArrayBufferView* view = JSC::jsDynamicCast<JSC::JSArrayBufferView*>(input)) { + RELEASE_AND_RETURN(scope, JSValue::encode(JSC::jsNumber(view->byteLength()))); + } + auto* str = arg0.value().toStringOrNull(lexicalGlobalObject); + + if (!str) { + throwTypeError(lexicalGlobalObject, scope, "byteLength() expects a string"_s); + return JSC::JSValue::encode(jsUndefined()); + } + + EnsureStillAliveScope arg1 = callFrame->argument(1); + + if (str->length() == 0) + return JSC::JSValue::encode(JSC::jsNumber(0)); + + if (callFrame->argumentCount() > 1) { + if (arg1.value().isString()) { + std::optional<BufferEncodingType> encoded = parseEnumeration<BufferEncodingType>(*lexicalGlobalObject, arg1.value()); + if (!encoded) { + throwTypeError(lexicalGlobalObject, scope, "Invalid encoding"_s); + return JSC::JSValue::encode(jsUndefined()); + } + + encoding = encoded.value(); + } + } + + auto view = str->tryGetValue(lexicalGlobalObject); + int64_t written = 0; + + switch (encoding) { + case WebCore::BufferEncodingType::utf8: { + if (view.is8Bit()) { + written = Bun__encoding__byteLengthLatin1AsUTF8(view.characters8(), view.length()); + } else { + written = Bun__encoding__byteLengthUTF16AsUTF8(view.characters16(), view.length()); + } + break; + } + + case WebCore::BufferEncodingType::latin1: + case WebCore::BufferEncodingType::ascii: { + if (view.is8Bit()) { + written = Bun__encoding__byteLengthLatin1AsASCII(view.characters8(), view.length()); + } else { + written = Bun__encoding__byteLengthUTF16AsASCII(view.characters16(), view.length()); + } + break; + } + case WebCore::BufferEncodingType::ucs2: + case WebCore::BufferEncodingType::utf16le: { + if (view.is8Bit()) { + written = Bun__encoding__byteLengthLatin1AsUTF16(view.characters8(), view.length()); + } else { + written = Bun__encoding__byteLengthUTF16AsUTF16(view.characters16(), view.length()); + } + break; + } + + case WebCore::BufferEncodingType::base64: { + if (view.is8Bit()) { + written = Bun__encoding__byteLengthLatin1AsBase64(view.characters8(), view.length()); + } else { + written = Bun__encoding__byteLengthUTF16AsBase64(view.characters16(), view.length()); + } + break; + } + + case WebCore::BufferEncodingType::base64url: { + if (view.is8Bit()) { + written = Bun__encoding__byteLengthLatin1AsURLSafeBase64(view.characters8(), view.length()); + } else { + written = Bun__encoding__byteLengthUTF16AsURLSafeBase64(view.characters16(), view.length()); + } + break; + } + + case WebCore::BufferEncodingType::hex: { + if (view.is8Bit()) { + written = Bun__encoding__byteLengthLatin1AsHex(view.characters8(), view.length()); + } else { + written = Bun__encoding__byteLengthUTF16AsHex(view.characters16(), view.length()); + } + break; + } + } + + RELEASE_AND_RETURN(scope, JSC::JSValue::encode(JSC::jsNumber(written))); } static inline JSC::EncodedJSValue jsBufferConstructorFunction_compareBody(JSC::JSGlobalObject* lexicalGlobalObject, JSC::CallFrame* callFrame, typename IDLOperation<JSBuffer>::ClassParameter castedThis) diff --git a/src/javascript/jsc/bindings/headers-cpp.h b/src/javascript/jsc/bindings/headers-cpp.h index 6b4c06ebe..08c528863 100644 --- a/src/javascript/jsc/bindings/headers-cpp.h +++ b/src/javascript/jsc/bindings/headers-cpp.h @@ -1,4 +1,4 @@ -//-- AUTOGENERATED FILE -- 1651982852 +//-- AUTOGENERATED FILE -- 1652089399 // clang-format off #pragma once diff --git a/src/javascript/jsc/bindings/headers-handwritten.h b/src/javascript/jsc/bindings/headers-handwritten.h index e285d5ffc..710e9290d 100644 --- a/src/javascript/jsc/bindings/headers-handwritten.h +++ b/src/javascript/jsc/bindings/headers-handwritten.h @@ -218,6 +218,19 @@ extern "C" int64_t Bun__encoding__writeUTF16AsUTF8(const UChar* ptr, size_t len, extern "C" int64_t Bun__encoding__writeLatin1AsASCII(const unsigned char* ptr, size_t len, unsigned char* to, size_t other_len); extern "C" int64_t Bun__encoding__writeUTF16AsASCII(const UChar* ptr, size_t len, unsigned char* to, size_t other_len); +extern "C" size_t Bun__encoding__byteLengthLatin1AsHex(const unsigned char* ptr, size_t len); +extern "C" size_t Bun__encoding__byteLengthUTF16AsHex(const UChar* ptr, size_t len); +extern "C" size_t Bun__encoding__byteLengthLatin1AsURLSafeBase64(const unsigned char* ptr, size_t len); +extern "C" size_t Bun__encoding__byteLengthUTF16AsURLSafeBase64(const UChar* ptr, size_t len); +extern "C" size_t Bun__encoding__byteLengthLatin1AsBase64(const unsigned char* ptr, size_t len); +extern "C" size_t Bun__encoding__byteLengthUTF16AsBase64(const UChar* ptr, size_t len); +extern "C" size_t Bun__encoding__byteLengthLatin1AsUTF16(const unsigned char* ptr, size_t len); +extern "C" size_t Bun__encoding__byteLengthUTF16AsUTF16(const UChar* ptr, size_t len); +extern "C" size_t Bun__encoding__byteLengthLatin1AsUTF8(const unsigned char* ptr, size_t len); +extern "C" size_t Bun__encoding__byteLengthUTF16AsUTF8(const UChar* ptr, size_t len); +extern "C" size_t Bun__encoding__byteLengthLatin1AsASCII(const unsigned char* ptr, size_t len); +extern "C" size_t Bun__encoding__byteLengthUTF16AsASCII(const UChar* ptr, size_t len); + extern "C" int64_t Bun__encoding__constructFromLatin1AsHex(void*, const unsigned char* ptr, size_t len); extern "C" int64_t Bun__encoding__constructFromUTF16AsHex(void*, const UChar* ptr, size_t len); extern "C" int64_t Bun__encoding__constructFromLatin1AsURLSafeBase64(void*, const unsigned char* ptr, size_t len); diff --git a/src/javascript/jsc/bindings/headers.h b/src/javascript/jsc/bindings/headers.h index 07a86385f..32de25cf5 100644 --- a/src/javascript/jsc/bindings/headers.h +++ b/src/javascript/jsc/bindings/headers.h @@ -1,5 +1,5 @@ // clang-format: off -//-- AUTOGENERATED FILE -- 1651982852 +//-- AUTOGENERATED FILE -- 1652089399 #pragma once #include <stddef.h> diff --git a/src/javascript/jsc/bindings/napi.cpp b/src/javascript/jsc/bindings/napi.cpp index 9a8a03405..3335cde9e 100644 --- a/src/javascript/jsc/bindings/napi.cpp +++ b/src/javascript/jsc/bindings/napi.cpp @@ -55,13 +55,12 @@ namespace Napi { JSC::SourceCode generateSourceCode(WTF::String keyString, JSC::VM& vm, JSC::JSObject* object, JSC::JSGlobalObject* globalObject) { - + JSC::gcProtect(object); JSC::JSArray* exportKeys = ownPropertyKeys(globalObject, object, PropertyNameMode::StringsAndSymbols, DontEnumPropertiesMode::Include, std::nullopt); - auto symbol = vm.symbolRegistry().symbolForKey("__BunTemporaryGlobal"_s); - JSC::Identifier ident = JSC::Identifier::fromUid(symbol); + JSC::Identifier ident = JSC::Identifier::fromString(vm, "__BunTemporaryGlobal"_s); WTF::StringBuilder sourceCodeBuilder = WTF::StringBuilder(); // TODO: handle symbol collision - sourceCodeBuilder.append("var $$TempSymbol = Symbol.for('__BunTemporaryGlobal'), $$NativeModule = globalThis[$$TempSymbol]; globalThis[$$TempSymbol] = null;\n if (!$$NativeModule) { throw new Error('Assertion failure: Native module not found'); }\n\n"_s); + sourceCodeBuilder.append("\nvar $$NativeModule = globalThis['__BunTemporaryGlobal']; console.log($$NativeModule); globalThis['__BunTemporaryGlobal'] = null;\n if (!$$NativeModule) { throw new Error('Assertion failure: Native module not found'); }\n\n"_s); for (unsigned i = 0; i < exportKeys->length(); i++) { auto key = exportKeys->getIndexQuickly(i); @@ -207,17 +206,32 @@ static void defineNapiProperty(Zig::GlobalObject* globalObject, JSC::JSObject* t } WTF::String nameStr; if (property.utf8name != nullptr) { - nameStr = WTF::String::fromUTF8(property.utf8name); + nameStr = WTF::String::fromUTF8(property.utf8name).isolatedCopy(); } else if (property.name) { - nameStr = toJS(property.name).toWTFString(globalObject); + nameStr = toJS(property.name).toWTFString(globalObject).isolatedCopy(); } auto propertyName = JSC::PropertyName(JSC::Identifier::fromString(vm, nameStr)); if (property.method) { - auto function = Zig::JSFFIFunction::create(vm, globalObject, 1, nameStr, reinterpret_cast<Zig::FFIFunction>(property.method)); - function->dataPtr = dataPtr; - JSC::JSValue value = JSC::JSValue(function); + JSC::JSValue value; + auto method = reinterpret_cast<Zig::FFIFunction>(property.method); + if (!dataPtr) { + JSC::JSNativeStdFunction* func = JSC::JSNativeStdFunction::create( + globalObject->vm(), globalObject, 1, String(), [method](JSC::JSGlobalObject* globalObject, JSC::CallFrame* callFrame) -> JSC::EncodedJSValue { + JSC::MarkedArgumentBuffer values; + values.append(callFrame->thisValue()); + for (int i = 0; i < callFrame->argumentCount(); i++) { + values.append(callFrame->argument(i)); + } + return method(globalObject, callFrame); + }); + value = JSC::JSValue(func); + } else { + auto function = Zig::JSFFIFunction::create(vm, globalObject, 1, nameStr, method); + function->dataPtr = dataPtr; + value = JSC::JSValue(function); + } to->putDirect(vm, propertyName, value, getPropertyAttributes(property) | JSC::PropertyAttribute::Function); return; @@ -233,6 +247,8 @@ static void defineNapiProperty(Zig::GlobalObject* globalObject, JSC::JSObject* t if (getterProperty) { JSC::JSNativeStdFunction* getterFunction = JSC::JSNativeStdFunction::create( globalObject->vm(), globalObject, 0, String(), [getterProperty](JSC::JSGlobalObject* globalObject, JSC::CallFrame* callFrame) -> JSC::EncodedJSValue { + JSC::MarkedArgumentBufferWithSize values; + values.append(callFrame->thisValue()); return getterProperty(globalObject, callFrame); }); getter = getterFunction; @@ -247,8 +263,10 @@ static void defineNapiProperty(Zig::GlobalObject* globalObject, JSC::JSObject* t if (setterProperty) { JSC::JSNativeStdFunction* setterFunction = JSC::JSNativeStdFunction::create( globalObject->vm(), globalObject, 1, String(), [setterProperty](JSC::JSGlobalObject* globalObject, JSC::CallFrame* callFrame) -> JSC::EncodedJSValue { - setterProperty(globalObject, callFrame); - return JSC::JSValue::encode(JSC::jsBoolean(true)); + JSC::MarkedArgumentBufferWithSize values; + values.append(callFrame->thisValue()); + values.append(callFrame->uncheckedArgument(0)); + return setterProperty(globalObject, callFrame); }); setter = setterFunction; } else { @@ -500,9 +518,9 @@ extern "C" napi_status napi_wrap(napi_env env, extern "C" napi_status napi_unwrap(napi_env env, napi_value js_object, void** result) { - // if (!toJS(js_object).isObject()) { - // return NAPI_OBJECT_EXPECTED; - // } + if (!toJS(js_object).isObject()) { + return NAPI_OBJECT_EXPECTED; + } auto* globalObject = toJS(env); auto& vm = globalObject->vm(); auto* object = JSC::jsDynamicCast<NapiPrototype*>(toJS(js_object)); @@ -521,12 +539,27 @@ extern "C" napi_status napi_create_function(napi_env env, const char* utf8name, { Zig::GlobalObject* globalObject = toJS(env); JSC::VM& vm = globalObject->vm(); - auto name = WTF::String::fromUTF8(utf8name, length); + auto name = WTF::String::fromUTF8(utf8name, length == NAPI_AUTO_LENGTH ? strlen(utf8name) : length).isolatedCopy(); + auto method = reinterpret_cast<Zig::FFIFunction>(cb); + if (data) { + auto function = Zig::JSFFIFunction::create(vm, globalObject, 1, name, method); + function->dataPtr = data; + *result = toNapi(JSC::JSValue(function)); + } else { + JSC::JSNativeStdFunction* func = JSC::JSNativeStdFunction::create( + globalObject->vm(), globalObject, 1, String(), [method](JSC::JSGlobalObject* globalObject, JSC::CallFrame* callFrame) -> JSC::EncodedJSValue { + JSC::MarkedArgumentBuffer values; + values.append(callFrame->thisValue()); + for (int i = 0; i < callFrame->argumentCount(); i++) { + values.append(callFrame->argument(i)); + } + return method(globalObject, callFrame); + }); + *result = toNapi(JSC::JSValue(func)); + } + // std::cout << "napi_create_function: " << utf8name << std::endl; - auto function = Zig::JSFFIFunction::create(vm, globalObject, 1, name, reinterpret_cast<Zig::FFIFunction>(cb)); - function->dataPtr = data; - JSC::JSValue functionValue = JSC::JSValue(function); - *reinterpret_cast<JSC::EncodedJSValue*>(result) = JSC::JSValue::encode(functionValue); + return napi_ok; } @@ -559,8 +592,9 @@ extern "C" napi_status napi_get_cb_info( } } + JSC::JSValue thisValue = callFrame->thisValue(); + if (this_arg != nullptr) { - JSC::JSValue thisValue = callFrame->thisValue(); *this_arg = toNapi(thisValue); } @@ -568,8 +602,14 @@ extern "C" napi_status napi_get_cb_info( JSC::JSValue callee = JSC::JSValue(callFrame->jsCallee()); if (Zig::JSFFIFunction* ffiFunction = JSC::jsDynamicCast<Zig::JSFFIFunction*>(callee)) { *data = reinterpret_cast<void*>(ffiFunction->dataPtr); - } else if (NapiPrototype* proto = JSC::jsDynamicCast<NapiPrototype*>(callee)) { + } else if (auto* proto = JSC::jsDynamicCast<NapiPrototype*>(callee)) { *data = proto->napiRef ? proto->napiRef->data : nullptr; + } else if (auto* proto = JSC::jsDynamicCast<NapiClass*>(callee)) { + *data = proto->dataPtr; + } else if (auto* proto = JSC::jsDynamicCast<NapiPrototype*>(thisValue)) { + *data = proto->napiRef ? proto->napiRef->data : nullptr; + } else if (auto* proto = JSC::jsDynamicCast<NapiClass*>(thisValue)) { + *data = proto->dataPtr; } else { *data = nullptr; } @@ -595,6 +635,8 @@ napi_define_properties(napi_env env, napi_value object, size_t property_count, void* inheritedDataPtr = nullptr; if (NapiPrototype* proto = jsDynamicCast<NapiPrototype*>(objectValue)) { inheritedDataPtr = proto->napiRef ? proto->napiRef->data : nullptr; + } else if (NapiClass* proto = jsDynamicCast<NapiClass*>(objectValue)) { + inheritedDataPtr = proto->dataPtr; } for (size_t i = 0; i < property_count; i++) { @@ -980,6 +1022,55 @@ static JSC_DEFINE_HOST_FUNCTION(NapiClass_ConstructorFunction, callFrame->setThisValue(prototype->subclass(newTarget)); napi->constructor()(globalObject, callFrame); + size_t count = callFrame->argumentCount(); + + switch (count) { + case 0: { + break; + } + case 1: { + JSC::ensureStillAliveHere(callFrame->argument(0)); + break; + } + case 2: { + JSC::ensureStillAliveHere(callFrame->argument(0)); + JSC::ensureStillAliveHere(callFrame->argument(1)); + break; + } + case 3: { + JSC::ensureStillAliveHere(callFrame->argument(0)); + JSC::ensureStillAliveHere(callFrame->argument(1)); + JSC::ensureStillAliveHere(callFrame->argument(2)); + break; + } + case 4: { + JSC::ensureStillAliveHere(callFrame->argument(0)); + JSC::ensureStillAliveHere(callFrame->argument(1)); + JSC::ensureStillAliveHere(callFrame->argument(2)); + JSC::ensureStillAliveHere(callFrame->argument(3)); + break; + } + case 5: { + JSC::ensureStillAliveHere(callFrame->argument(0)); + JSC::ensureStillAliveHere(callFrame->argument(1)); + JSC::ensureStillAliveHere(callFrame->argument(2)); + JSC::ensureStillAliveHere(callFrame->argument(3)); + JSC::ensureStillAliveHere(callFrame->argument(4)); + break; + } + default: { + JSC::ensureStillAliveHere(callFrame->argument(0)); + JSC::ensureStillAliveHere(callFrame->argument(1)); + JSC::ensureStillAliveHere(callFrame->argument(2)); + JSC::ensureStillAliveHere(callFrame->argument(3)); + JSC::ensureStillAliveHere(callFrame->argument(4)); + JSC::ensureStillAliveHere(callFrame->argument(5)); + for (int i = 6; i < count; i++) { + JSC::ensureStillAliveHere(callFrame->argument(i)); + } + break; + } + } RETURN_IF_EXCEPTION(scope, {}); RELEASE_AND_RETURN(scope, JSValue::encode(callFrame->thisValue())); @@ -992,7 +1083,7 @@ NapiClass* NapiClass::create(VM& vm, Zig::GlobalObject* globalObject, const char size_t property_count, const napi_property_descriptor* properties) { - WTF::String name = WTF::String::fromUTF8(utf8name, length); + WTF::String name = WTF::String::fromUTF8(utf8name, length).isolatedCopy(); NativeExecutable* executable = vm.getHostFunction(NapiClass_ConstructorFunction, NapiClass_ConstructorFunction, name); Structure* structure = globalObject->NapiClassStructure(); @@ -1098,8 +1189,11 @@ extern "C" napi_status napi_define_class(napi_env env, { Zig::GlobalObject* globalObject = toJS(env); JSC::VM& vm = globalObject->vm(); - - NapiClass* napiClass = NapiClass::create(vm, globalObject, utf8name, length, constructor, data, property_count, properties); + size_t len = length; + if (len == NAPI_AUTO_LENGTH) { + len = strlen(utf8name); + } + NapiClass* napiClass = NapiClass::create(vm, globalObject, utf8name, len, constructor, data, property_count, properties); JSC::JSValue value = JSC::JSValue(napiClass); if (data != nullptr) { napiClass->dataPtr = data; diff --git a/src/javascript/jsc/webcore/encoding.zig b/src/javascript/jsc/webcore/encoding.zig index 2e7e41c27..245f6127d 100644 --- a/src/javascript/jsc/webcore/encoding.zig +++ b/src/javascript/jsc/webcore/encoding.zig @@ -692,6 +692,43 @@ pub const Encoder = struct { return writeU8(input, len, to, to_len, .ascii); } + export fn Bun__encoding__byteLengthLatin1AsHex(input: [*]const u8, len: usize) usize { + return byteLengthU8(input, len, .hex); + } + export fn Bun__encoding__byteLengthLatin1AsASCII(input: [*]const u8, len: usize) usize { + return byteLengthU8(input, len, .ascii); + } + export fn Bun__encoding__byteLengthLatin1AsURLSafeBase64(input: [*]const u8, len: usize) usize { + return byteLengthU8(input, len, .base64url); + } + export fn Bun__encoding__byteLengthLatin1AsUTF16(input: [*]const u8, len: usize) usize { + return byteLengthU8(input, len, .utf16le); + } + export fn Bun__encoding__byteLengthLatin1AsUTF8(input: [*]const u8, len: usize) usize { + return byteLengthU8(input, len, .utf8); + } + export fn Bun__encoding__byteLengthLatin1AsBase64(input: [*]const u8, len: usize) usize { + return byteLengthU8(input, len, .base64); + } + export fn Bun__encoding__byteLengthUTF16AsBase64(input: [*]const u16, len: usize) usize { + return byteLengthU16(input, len, .base64); + } + export fn Bun__encoding__byteLengthUTF16AsHex(input: [*]const u16, len: usize) usize { + return byteLengthU16(input, len, .hex); + } + export fn Bun__encoding__byteLengthUTF16AsURLSafeBase64(input: [*]const u16, len: usize) usize { + return byteLengthU16(input, len, .base64url); + } + export fn Bun__encoding__byteLengthUTF16AsUTF16(input: [*]const u16, len: usize) usize { + return byteLengthU16(input, len, .utf16le); + } + export fn Bun__encoding__byteLengthUTF16AsUTF8(input: [*]const u16, len: usize) usize { + return byteLengthU16(input, len, .utf8); + } + export fn Bun__encoding__byteLengthUTF16AsASCII(input: [*]const u8, len: usize) usize { + return byteLengthU8(input, len, .ascii); + } + export fn Bun__encoding__constructFromLatin1AsHex(globalObject: *JSGlobalObject, input: [*]const u8, len: usize) JSValue { var slice = constructFromU8(input, len, .hex); return JSC.JSValue.createBuffer(globalObject, slice, VirtualMachine.vm.allocator); @@ -919,15 +956,37 @@ pub const Encoder = struct { } } - pub fn writeU16(input: [*]const u16, len: usize, to: [*]u8, to_len: usize, comptime encoding: JSC.Node.Encoding) i64 { + pub fn byteLengthU8(input: [*]const u8, len: usize, comptime encoding: JSC.Node.Encoding) usize { if (len == 0) return 0; - // TODO: increase temporary buffer size for larger amounts of data - // defer { - // if (comptime encoding.isBinaryToText()) {} - // } - // if (comptime encoding.isBinaryToText()) {} + switch (comptime encoding) { + .utf8 => { + return strings.elementLengthLatin1IntoUTF8([]const u8, input[0..len]); + }, + + .latin1, JSC.Node.Encoding.ascii, JSC.Node.Encoding.buffer => { + return len; + }, + + JSC.Node.Encoding.ucs2, JSC.Node.Encoding.utf16le => { + return strings.elementLengthUTF8IntoUTF16([]const u8, input[0..len]) * 2; + }, + + JSC.Node.Encoding.hex => { + return len * 2; + }, + + JSC.Node.Encoding.base64, JSC.Node.Encoding.base64url => { + return bun.base64.encodeLen(input[0..len]); + }, + // else => return &[_]u8{}; + } + } + + pub fn writeU16(input: [*]const u16, len: usize, to: [*]u8, to_len: usize, comptime encoding: JSC.Node.Encoding) i64 { + if (len == 0) + return 0; switch (comptime encoding) { .utf8 => { @@ -958,6 +1017,32 @@ pub const Encoder = struct { } } + /// Node returns imprecise byte length here + /// Should be fast enough for us to return precise length + pub fn byteLengthU16(input: [*]const u16, len: usize, comptime encoding: JSC.Node.Encoding) usize { + if (len == 0) + return 0; + + switch (comptime encoding) { + // these should be the same size + .ascii, .latin1, .utf8 => { + return strings.elementLengthUTF16IntoUTF8([]const u16, input[0..len]); + }, + JSC.Node.Encoding.ucs2, JSC.Node.Encoding.buffer, JSC.Node.Encoding.utf16le => { + return len * 2; + }, + + JSC.Node.Encoding.hex => { + return len; + }, + + JSC.Node.Encoding.base64, JSC.Node.Encoding.base64url => { + return bun.base64.encodeLen(input[0..len]); + }, + // else => return &[_]u8{}; + } + } + pub fn constructFromU8(input: [*]const u8, len: usize, comptime encoding: JSC.Node.Encoding) []u8 { if (len == 0) return &[_]u8{}; @@ -1105,6 +1190,19 @@ pub const Encoder = struct { _ = Bun__encoding__writeLatin1AsASCII; _ = Bun__encoding__writeUTF16AsASCII; + _ = Bun__encoding__byteLengthLatin1AsHex; + _ = Bun__encoding__byteLengthLatin1AsURLSafeBase64; + _ = Bun__encoding__byteLengthLatin1AsUTF16; + _ = Bun__encoding__byteLengthLatin1AsUTF8; + _ = Bun__encoding__byteLengthLatin1AsBase64; + _ = Bun__encoding__byteLengthUTF16AsBase64; + _ = Bun__encoding__byteLengthUTF16AsHex; + _ = Bun__encoding__byteLengthUTF16AsURLSafeBase64; + _ = Bun__encoding__byteLengthUTF16AsUTF16; + _ = Bun__encoding__byteLengthUTF16AsUTF8; + _ = Bun__encoding__byteLengthLatin1AsASCII; + _ = Bun__encoding__byteLengthUTF16AsASCII; + _ = Bun__encoding__toStringUTF16; _ = Bun__encoding__toStringUTF8; _ = Bun__encoding__toStringASCII; diff --git a/src/string_immutable.zig b/src/string_immutable.zig index a928f45a6..1eb88675f 100644 --- a/src/string_immutable.zig +++ b/src/string_immutable.zig @@ -699,7 +699,7 @@ pub inline fn copyU8IntoU16(output_: []u16, input_: []const u8) void { // https://zig.godbolt.org/z/9rTn1orcY - const group = if (Environment.isAarch64) + const group = comptime if (Environment.isAarch64) // on ARM64, 128 seems to be the best choice judging by lines of ASM 128 else @@ -945,12 +945,7 @@ pub fn toUTF8AllocWithType(allocator: std.mem.Allocator, comptime Type: type, ut const replacement = utf16Codepoint(Type, utf16_remaining); utf16_remaining = utf16_remaining[replacement.len..]; - const count: usize = switch (replacement.code_point) { - 0...0x7F => 1, - (0x7F + 1)...0x7FF => 2, - (0x7FF + 1)...0xFFFF => 3, - else => 4, - }; + const count: usize = replacement.utf8Width(); try list.ensureUnusedCapacity(i + count); list.items.len += i; @@ -1009,6 +1004,15 @@ pub fn allocateLatin1IntoUTF8(allocator: std.mem.Allocator, comptime Type: type, pub const UTF16Replacement = struct { code_point: u32 = unicode_replacement, len: u3 = 0, + + pub inline fn utf8Width(replacement: UTF16Replacement) usize { + return switch (replacement.code_point) { + 0...0x7F => 1, + (0x7F + 1)...0x7FF => 2, + (0x7FF + 1)...0xFFFF => 3, + else => 4, + }; + } }; // This variation matches WebKit behavior. @@ -1138,6 +1142,36 @@ pub fn copyLatin1IntoUTF8(buf_: []u8, comptime Type: type, latin1_: Type) Encode }; } +pub fn elementLengthLatin1IntoUTF8(comptime Type: type, latin1_: Type) usize { + var latin1 = latin1_; + var count: usize = 0; + while (latin1.len > 0) { + var read: usize = 0; + + while (latin1.len > ascii_vector_size) { + const vec: AsciiVector = latin1[0..ascii_vector_size].*; + + if (@reduce(.Max, vec) > 127) { + break; + } + + latin1 = latin1[ascii_vector_size..]; + count += ascii_vector_size; + } + + while (read < latin1.len and latin1[read] < 0x80) : (read += 1) {} + + count += read; + latin1 = latin1[read..]; + if (latin1.len > 0) { + latin1 = latin1[1..]; + count += 2; + } + } + + return count; +} + const JSC = @import("javascript_core"); pub fn copyLatin1IntoUTF16(comptime Buffer: type, buf_: Buffer, comptime Type: type, latin1_: Type) EncodeIntoResult { @@ -1161,6 +1195,28 @@ pub fn copyLatin1IntoUTF16(comptime Buffer: type, buf_: Buffer, comptime Type: t }; } +pub fn elementLengthLatin1IntoUTF16(comptime Type: type, latin1_: Type) usize { + // latin1 is always at most 1 UTF-16 code unit long + if (comptime std.meta.Child(u16) == Type) { + return latin1_.len; + } + + var count: usize = 0; + var latin1 = latin1_; + while (latin1.len > 0) { + const function = comptime if (std.meta.Child(Type) == u8) strings.firstNonASCIIWithType else strings.firstNonASCII16; + const to_write = function(Type, latin1) orelse @truncate(u32, latin1.len); + count += to_write; + latin1 = latin1[to_write..]; + if (latin1.len > 0) { + count += comptime if (std.meta.Child(Type) == u8) 2 else 1; + latin1 = latin1[1..]; + } + } + + return count; +} + test "copyLatin1IntoUTF8" { var input: string = "hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!"; var output = std.mem.zeroes([500]u8); @@ -1213,13 +1269,7 @@ pub fn copyUTF16IntoUTF8(buf: []u8, comptime Type: type, utf16: Type) EncodeInto const replacement = utf16Codepoint(Type, utf16_remaining); - const width: usize = switch (replacement.code_point) { - 0...0x7F => 1, - (0x7F + 1)...0x7FF => 2, - (0x7FF + 1)...0xFFFF => 3, - else => 4, - }; - + const width: usize = replacement.utf8Width(); if (width > remaining.len) { ended_on_non_ascii = width > 1; break; @@ -1243,6 +1293,42 @@ pub fn copyUTF16IntoUTF8(buf: []u8, comptime Type: type, utf16: Type) EncodeInto }; } +pub fn elementLengthUTF16IntoUTF8(comptime Type: type, utf16: Type) usize { + var utf16_remaining = utf16; + var count: usize = 0; + + while (firstNonASCII16(Type, utf16_remaining)) |i| { + count += i; + + utf16_remaining = utf16_remaining[i..]; + + const replacement = utf16Codepoint(Type, utf16_remaining); + + count += replacement.utf8Width(); + utf16_remaining = utf16_remaining[replacement.len..]; + } + + return count + utf16_remaining.len; +} + +pub fn elementLengthUTF8IntoUTF16(comptime Type: type, utf8: Type) usize { + var utf8_remaining = utf8; + var count: usize = 0; + + while (firstNonASCII(utf8_remaining)) |i| { + count += i; + + utf8_remaining = utf8_remaining[i..]; + + const replacement = utf16Codepoint(Type, utf8_remaining); + + count += replacement.len; + utf8_remaining = utf8_remaining[@minimum(replacement.utf8Width(), utf8_remaining.len)..]; + } + + return count + utf8_remaining.len; +} + // Check utf16 string equals utf8 string without allocating extra memory pub fn utf16EqlString(text: []const u16, str: string) bool { if (text.len > str.len) { @@ -1462,6 +1548,10 @@ pub inline fn u16Len(supplementary: anytype) u2 { } pub fn firstNonASCII(slice: []const u8) ?u32 { + return firstNonASCIIWithType([]const u8, slice); +} + +pub fn firstNonASCIIWithType(comptime Type: type, slice: Type) ?u32 { var remaining = slice; if (comptime Environment.isAarch64 or Environment.isX64) { |