aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Jarred Sumner <jarred@jarredsumner.com> 2022-05-10 19:09:28 -0700
committerGravatar Jarred Sumner <jarred@jarredsumner.com> 2022-05-10 19:09:28 -0700
commit8def37c14e59b381d7165cd2bd79e8499e52ecf1 (patch)
treed67a766833c119f2e34234517351f5fd3f5ada99
parentb3760cd72349eb743c5dcfd679f3b64612d6b7ff (diff)
downloadbun-8def37c14e59b381d7165cd2bd79e8499e52ecf1.tar.gz
bun-8def37c14e59b381d7165cd2bd79e8499e52ecf1.tar.zst
bun-8def37c14e59b381d7165cd2bd79e8499e52ecf1.zip
[bun.js] Implement `Buffer.byteLength`
-rw-r--r--src/javascript/jsc/bindings/JSBuffer.cpp103
-rw-r--r--src/javascript/jsc/bindings/headers-cpp.h2
-rw-r--r--src/javascript/jsc/bindings/headers-handwritten.h13
-rw-r--r--src/javascript/jsc/bindings/headers.h2
-rw-r--r--src/javascript/jsc/bindings/napi.cpp142
-rw-r--r--src/javascript/jsc/webcore/encoding.zig110
-rw-r--r--src/string_immutable.zig118
7 files changed, 443 insertions, 47 deletions
diff --git a/src/javascript/jsc/bindings/JSBuffer.cpp b/src/javascript/jsc/bindings/JSBuffer.cpp
index e67e39f19..301663613 100644
--- a/src/javascript/jsc/bindings/JSBuffer.cpp
+++ b/src/javascript/jsc/bindings/JSBuffer.cpp
@@ -360,7 +360,108 @@ static inline JSC::EncodedJSValue jsBufferConstructorFunction_allocUnsafeSlowBod
static inline JSC::EncodedJSValue jsBufferConstructorFunction_byteLengthBody(JSC::JSGlobalObject* lexicalGlobalObject, JSC::CallFrame* callFrame, typename IDLOperation<JSBuffer>::ClassParameter castedThis)
{
auto& vm = JSC::getVM(lexicalGlobalObject);
- return JSValue::encode(jsUndefined());
+
+ uint32_t offset = 0;
+ uint32_t length = castedThis->length();
+ WebCore::BufferEncodingType encoding = WebCore::BufferEncodingType::utf8;
+
+ auto scope = DECLARE_THROW_SCOPE(vm);
+
+ if (UNLIKELY(callFrame->argumentCount() == 0)) {
+ throwTypeError(lexicalGlobalObject, scope, "Not enough arguments"_s);
+ return JSC::JSValue::encode(jsUndefined());
+ }
+
+ EnsureStillAliveScope arg0 = callFrame->argument(0);
+ auto input = arg0.value();
+ if (JSC::JSArrayBufferView* view = JSC::jsDynamicCast<JSC::JSArrayBufferView*>(input)) {
+ RELEASE_AND_RETURN(scope, JSValue::encode(JSC::jsNumber(view->byteLength())));
+ }
+ auto* str = arg0.value().toStringOrNull(lexicalGlobalObject);
+
+ if (!str) {
+ throwTypeError(lexicalGlobalObject, scope, "byteLength() expects a string"_s);
+ return JSC::JSValue::encode(jsUndefined());
+ }
+
+ EnsureStillAliveScope arg1 = callFrame->argument(1);
+
+ if (str->length() == 0)
+ return JSC::JSValue::encode(JSC::jsNumber(0));
+
+ if (callFrame->argumentCount() > 1) {
+ if (arg1.value().isString()) {
+ std::optional<BufferEncodingType> encoded = parseEnumeration<BufferEncodingType>(*lexicalGlobalObject, arg1.value());
+ if (!encoded) {
+ throwTypeError(lexicalGlobalObject, scope, "Invalid encoding"_s);
+ return JSC::JSValue::encode(jsUndefined());
+ }
+
+ encoding = encoded.value();
+ }
+ }
+
+ auto view = str->tryGetValue(lexicalGlobalObject);
+ int64_t written = 0;
+
+ switch (encoding) {
+ case WebCore::BufferEncodingType::utf8: {
+ if (view.is8Bit()) {
+ written = Bun__encoding__byteLengthLatin1AsUTF8(view.characters8(), view.length());
+ } else {
+ written = Bun__encoding__byteLengthUTF16AsUTF8(view.characters16(), view.length());
+ }
+ break;
+ }
+
+ case WebCore::BufferEncodingType::latin1:
+ case WebCore::BufferEncodingType::ascii: {
+ if (view.is8Bit()) {
+ written = Bun__encoding__byteLengthLatin1AsASCII(view.characters8(), view.length());
+ } else {
+ written = Bun__encoding__byteLengthUTF16AsASCII(view.characters16(), view.length());
+ }
+ break;
+ }
+ case WebCore::BufferEncodingType::ucs2:
+ case WebCore::BufferEncodingType::utf16le: {
+ if (view.is8Bit()) {
+ written = Bun__encoding__byteLengthLatin1AsUTF16(view.characters8(), view.length());
+ } else {
+ written = Bun__encoding__byteLengthUTF16AsUTF16(view.characters16(), view.length());
+ }
+ break;
+ }
+
+ case WebCore::BufferEncodingType::base64: {
+ if (view.is8Bit()) {
+ written = Bun__encoding__byteLengthLatin1AsBase64(view.characters8(), view.length());
+ } else {
+ written = Bun__encoding__byteLengthUTF16AsBase64(view.characters16(), view.length());
+ }
+ break;
+ }
+
+ case WebCore::BufferEncodingType::base64url: {
+ if (view.is8Bit()) {
+ written = Bun__encoding__byteLengthLatin1AsURLSafeBase64(view.characters8(), view.length());
+ } else {
+ written = Bun__encoding__byteLengthUTF16AsURLSafeBase64(view.characters16(), view.length());
+ }
+ break;
+ }
+
+ case WebCore::BufferEncodingType::hex: {
+ if (view.is8Bit()) {
+ written = Bun__encoding__byteLengthLatin1AsHex(view.characters8(), view.length());
+ } else {
+ written = Bun__encoding__byteLengthUTF16AsHex(view.characters16(), view.length());
+ }
+ break;
+ }
+ }
+
+ RELEASE_AND_RETURN(scope, JSC::JSValue::encode(JSC::jsNumber(written)));
}
static inline JSC::EncodedJSValue jsBufferConstructorFunction_compareBody(JSC::JSGlobalObject* lexicalGlobalObject, JSC::CallFrame* callFrame, typename IDLOperation<JSBuffer>::ClassParameter castedThis)
diff --git a/src/javascript/jsc/bindings/headers-cpp.h b/src/javascript/jsc/bindings/headers-cpp.h
index 6b4c06ebe..08c528863 100644
--- a/src/javascript/jsc/bindings/headers-cpp.h
+++ b/src/javascript/jsc/bindings/headers-cpp.h
@@ -1,4 +1,4 @@
-//-- AUTOGENERATED FILE -- 1651982852
+//-- AUTOGENERATED FILE -- 1652089399
// clang-format off
#pragma once
diff --git a/src/javascript/jsc/bindings/headers-handwritten.h b/src/javascript/jsc/bindings/headers-handwritten.h
index e285d5ffc..710e9290d 100644
--- a/src/javascript/jsc/bindings/headers-handwritten.h
+++ b/src/javascript/jsc/bindings/headers-handwritten.h
@@ -218,6 +218,19 @@ extern "C" int64_t Bun__encoding__writeUTF16AsUTF8(const UChar* ptr, size_t len,
extern "C" int64_t Bun__encoding__writeLatin1AsASCII(const unsigned char* ptr, size_t len, unsigned char* to, size_t other_len);
extern "C" int64_t Bun__encoding__writeUTF16AsASCII(const UChar* ptr, size_t len, unsigned char* to, size_t other_len);
+extern "C" size_t Bun__encoding__byteLengthLatin1AsHex(const unsigned char* ptr, size_t len);
+extern "C" size_t Bun__encoding__byteLengthUTF16AsHex(const UChar* ptr, size_t len);
+extern "C" size_t Bun__encoding__byteLengthLatin1AsURLSafeBase64(const unsigned char* ptr, size_t len);
+extern "C" size_t Bun__encoding__byteLengthUTF16AsURLSafeBase64(const UChar* ptr, size_t len);
+extern "C" size_t Bun__encoding__byteLengthLatin1AsBase64(const unsigned char* ptr, size_t len);
+extern "C" size_t Bun__encoding__byteLengthUTF16AsBase64(const UChar* ptr, size_t len);
+extern "C" size_t Bun__encoding__byteLengthLatin1AsUTF16(const unsigned char* ptr, size_t len);
+extern "C" size_t Bun__encoding__byteLengthUTF16AsUTF16(const UChar* ptr, size_t len);
+extern "C" size_t Bun__encoding__byteLengthLatin1AsUTF8(const unsigned char* ptr, size_t len);
+extern "C" size_t Bun__encoding__byteLengthUTF16AsUTF8(const UChar* ptr, size_t len);
+extern "C" size_t Bun__encoding__byteLengthLatin1AsASCII(const unsigned char* ptr, size_t len);
+extern "C" size_t Bun__encoding__byteLengthUTF16AsASCII(const UChar* ptr, size_t len);
+
extern "C" int64_t Bun__encoding__constructFromLatin1AsHex(void*, const unsigned char* ptr, size_t len);
extern "C" int64_t Bun__encoding__constructFromUTF16AsHex(void*, const UChar* ptr, size_t len);
extern "C" int64_t Bun__encoding__constructFromLatin1AsURLSafeBase64(void*, const unsigned char* ptr, size_t len);
diff --git a/src/javascript/jsc/bindings/headers.h b/src/javascript/jsc/bindings/headers.h
index 07a86385f..32de25cf5 100644
--- a/src/javascript/jsc/bindings/headers.h
+++ b/src/javascript/jsc/bindings/headers.h
@@ -1,5 +1,5 @@
// clang-format: off
-//-- AUTOGENERATED FILE -- 1651982852
+//-- AUTOGENERATED FILE -- 1652089399
#pragma once
#include <stddef.h>
diff --git a/src/javascript/jsc/bindings/napi.cpp b/src/javascript/jsc/bindings/napi.cpp
index 9a8a03405..3335cde9e 100644
--- a/src/javascript/jsc/bindings/napi.cpp
+++ b/src/javascript/jsc/bindings/napi.cpp
@@ -55,13 +55,12 @@ namespace Napi {
JSC::SourceCode generateSourceCode(WTF::String keyString, JSC::VM& vm, JSC::JSObject* object, JSC::JSGlobalObject* globalObject)
{
-
+ JSC::gcProtect(object);
JSC::JSArray* exportKeys = ownPropertyKeys(globalObject, object, PropertyNameMode::StringsAndSymbols, DontEnumPropertiesMode::Include, std::nullopt);
- auto symbol = vm.symbolRegistry().symbolForKey("__BunTemporaryGlobal"_s);
- JSC::Identifier ident = JSC::Identifier::fromUid(symbol);
+ JSC::Identifier ident = JSC::Identifier::fromString(vm, "__BunTemporaryGlobal"_s);
WTF::StringBuilder sourceCodeBuilder = WTF::StringBuilder();
// TODO: handle symbol collision
- sourceCodeBuilder.append("var $$TempSymbol = Symbol.for('__BunTemporaryGlobal'), $$NativeModule = globalThis[$$TempSymbol]; globalThis[$$TempSymbol] = null;\n if (!$$NativeModule) { throw new Error('Assertion failure: Native module not found'); }\n\n"_s);
+ sourceCodeBuilder.append("\nvar $$NativeModule = globalThis['__BunTemporaryGlobal']; console.log($$NativeModule); globalThis['__BunTemporaryGlobal'] = null;\n if (!$$NativeModule) { throw new Error('Assertion failure: Native module not found'); }\n\n"_s);
for (unsigned i = 0; i < exportKeys->length(); i++) {
auto key = exportKeys->getIndexQuickly(i);
@@ -207,17 +206,32 @@ static void defineNapiProperty(Zig::GlobalObject* globalObject, JSC::JSObject* t
}
WTF::String nameStr;
if (property.utf8name != nullptr) {
- nameStr = WTF::String::fromUTF8(property.utf8name);
+ nameStr = WTF::String::fromUTF8(property.utf8name).isolatedCopy();
} else if (property.name) {
- nameStr = toJS(property.name).toWTFString(globalObject);
+ nameStr = toJS(property.name).toWTFString(globalObject).isolatedCopy();
}
auto propertyName = JSC::PropertyName(JSC::Identifier::fromString(vm, nameStr));
if (property.method) {
- auto function = Zig::JSFFIFunction::create(vm, globalObject, 1, nameStr, reinterpret_cast<Zig::FFIFunction>(property.method));
- function->dataPtr = dataPtr;
- JSC::JSValue value = JSC::JSValue(function);
+ JSC::JSValue value;
+ auto method = reinterpret_cast<Zig::FFIFunction>(property.method);
+ if (!dataPtr) {
+ JSC::JSNativeStdFunction* func = JSC::JSNativeStdFunction::create(
+ globalObject->vm(), globalObject, 1, String(), [method](JSC::JSGlobalObject* globalObject, JSC::CallFrame* callFrame) -> JSC::EncodedJSValue {
+ JSC::MarkedArgumentBuffer values;
+ values.append(callFrame->thisValue());
+ for (int i = 0; i < callFrame->argumentCount(); i++) {
+ values.append(callFrame->argument(i));
+ }
+ return method(globalObject, callFrame);
+ });
+ value = JSC::JSValue(func);
+ } else {
+ auto function = Zig::JSFFIFunction::create(vm, globalObject, 1, nameStr, method);
+ function->dataPtr = dataPtr;
+ value = JSC::JSValue(function);
+ }
to->putDirect(vm, propertyName, value, getPropertyAttributes(property) | JSC::PropertyAttribute::Function);
return;
@@ -233,6 +247,8 @@ static void defineNapiProperty(Zig::GlobalObject* globalObject, JSC::JSObject* t
if (getterProperty) {
JSC::JSNativeStdFunction* getterFunction = JSC::JSNativeStdFunction::create(
globalObject->vm(), globalObject, 0, String(), [getterProperty](JSC::JSGlobalObject* globalObject, JSC::CallFrame* callFrame) -> JSC::EncodedJSValue {
+ JSC::MarkedArgumentBufferWithSize values;
+ values.append(callFrame->thisValue());
return getterProperty(globalObject, callFrame);
});
getter = getterFunction;
@@ -247,8 +263,10 @@ static void defineNapiProperty(Zig::GlobalObject* globalObject, JSC::JSObject* t
if (setterProperty) {
JSC::JSNativeStdFunction* setterFunction = JSC::JSNativeStdFunction::create(
globalObject->vm(), globalObject, 1, String(), [setterProperty](JSC::JSGlobalObject* globalObject, JSC::CallFrame* callFrame) -> JSC::EncodedJSValue {
- setterProperty(globalObject, callFrame);
- return JSC::JSValue::encode(JSC::jsBoolean(true));
+ JSC::MarkedArgumentBufferWithSize values;
+ values.append(callFrame->thisValue());
+ values.append(callFrame->uncheckedArgument(0));
+ return setterProperty(globalObject, callFrame);
});
setter = setterFunction;
} else {
@@ -500,9 +518,9 @@ extern "C" napi_status napi_wrap(napi_env env,
extern "C" napi_status napi_unwrap(napi_env env, napi_value js_object,
void** result)
{
- // if (!toJS(js_object).isObject()) {
- // return NAPI_OBJECT_EXPECTED;
- // }
+ if (!toJS(js_object).isObject()) {
+ return NAPI_OBJECT_EXPECTED;
+ }
auto* globalObject = toJS(env);
auto& vm = globalObject->vm();
auto* object = JSC::jsDynamicCast<NapiPrototype*>(toJS(js_object));
@@ -521,12 +539,27 @@ extern "C" napi_status napi_create_function(napi_env env, const char* utf8name,
{
Zig::GlobalObject* globalObject = toJS(env);
JSC::VM& vm = globalObject->vm();
- auto name = WTF::String::fromUTF8(utf8name, length);
+ auto name = WTF::String::fromUTF8(utf8name, length == NAPI_AUTO_LENGTH ? strlen(utf8name) : length).isolatedCopy();
+ auto method = reinterpret_cast<Zig::FFIFunction>(cb);
+ if (data) {
+ auto function = Zig::JSFFIFunction::create(vm, globalObject, 1, name, method);
+ function->dataPtr = data;
+ *result = toNapi(JSC::JSValue(function));
+ } else {
+ JSC::JSNativeStdFunction* func = JSC::JSNativeStdFunction::create(
+ globalObject->vm(), globalObject, 1, String(), [method](JSC::JSGlobalObject* globalObject, JSC::CallFrame* callFrame) -> JSC::EncodedJSValue {
+ JSC::MarkedArgumentBuffer values;
+ values.append(callFrame->thisValue());
+ for (int i = 0; i < callFrame->argumentCount(); i++) {
+ values.append(callFrame->argument(i));
+ }
+ return method(globalObject, callFrame);
+ });
+ *result = toNapi(JSC::JSValue(func));
+ }
+
// std::cout << "napi_create_function: " << utf8name << std::endl;
- auto function = Zig::JSFFIFunction::create(vm, globalObject, 1, name, reinterpret_cast<Zig::FFIFunction>(cb));
- function->dataPtr = data;
- JSC::JSValue functionValue = JSC::JSValue(function);
- *reinterpret_cast<JSC::EncodedJSValue*>(result) = JSC::JSValue::encode(functionValue);
+
return napi_ok;
}
@@ -559,8 +592,9 @@ extern "C" napi_status napi_get_cb_info(
}
}
+ JSC::JSValue thisValue = callFrame->thisValue();
+
if (this_arg != nullptr) {
- JSC::JSValue thisValue = callFrame->thisValue();
*this_arg = toNapi(thisValue);
}
@@ -568,8 +602,14 @@ extern "C" napi_status napi_get_cb_info(
JSC::JSValue callee = JSC::JSValue(callFrame->jsCallee());
if (Zig::JSFFIFunction* ffiFunction = JSC::jsDynamicCast<Zig::JSFFIFunction*>(callee)) {
*data = reinterpret_cast<void*>(ffiFunction->dataPtr);
- } else if (NapiPrototype* proto = JSC::jsDynamicCast<NapiPrototype*>(callee)) {
+ } else if (auto* proto = JSC::jsDynamicCast<NapiPrototype*>(callee)) {
*data = proto->napiRef ? proto->napiRef->data : nullptr;
+ } else if (auto* proto = JSC::jsDynamicCast<NapiClass*>(callee)) {
+ *data = proto->dataPtr;
+ } else if (auto* proto = JSC::jsDynamicCast<NapiPrototype*>(thisValue)) {
+ *data = proto->napiRef ? proto->napiRef->data : nullptr;
+ } else if (auto* proto = JSC::jsDynamicCast<NapiClass*>(thisValue)) {
+ *data = proto->dataPtr;
} else {
*data = nullptr;
}
@@ -595,6 +635,8 @@ napi_define_properties(napi_env env, napi_value object, size_t property_count,
void* inheritedDataPtr = nullptr;
if (NapiPrototype* proto = jsDynamicCast<NapiPrototype*>(objectValue)) {
inheritedDataPtr = proto->napiRef ? proto->napiRef->data : nullptr;
+ } else if (NapiClass* proto = jsDynamicCast<NapiClass*>(objectValue)) {
+ inheritedDataPtr = proto->dataPtr;
}
for (size_t i = 0; i < property_count; i++) {
@@ -980,6 +1022,55 @@ static JSC_DEFINE_HOST_FUNCTION(NapiClass_ConstructorFunction,
callFrame->setThisValue(prototype->subclass(newTarget));
napi->constructor()(globalObject, callFrame);
+ size_t count = callFrame->argumentCount();
+
+ switch (count) {
+ case 0: {
+ break;
+ }
+ case 1: {
+ JSC::ensureStillAliveHere(callFrame->argument(0));
+ break;
+ }
+ case 2: {
+ JSC::ensureStillAliveHere(callFrame->argument(0));
+ JSC::ensureStillAliveHere(callFrame->argument(1));
+ break;
+ }
+ case 3: {
+ JSC::ensureStillAliveHere(callFrame->argument(0));
+ JSC::ensureStillAliveHere(callFrame->argument(1));
+ JSC::ensureStillAliveHere(callFrame->argument(2));
+ break;
+ }
+ case 4: {
+ JSC::ensureStillAliveHere(callFrame->argument(0));
+ JSC::ensureStillAliveHere(callFrame->argument(1));
+ JSC::ensureStillAliveHere(callFrame->argument(2));
+ JSC::ensureStillAliveHere(callFrame->argument(3));
+ break;
+ }
+ case 5: {
+ JSC::ensureStillAliveHere(callFrame->argument(0));
+ JSC::ensureStillAliveHere(callFrame->argument(1));
+ JSC::ensureStillAliveHere(callFrame->argument(2));
+ JSC::ensureStillAliveHere(callFrame->argument(3));
+ JSC::ensureStillAliveHere(callFrame->argument(4));
+ break;
+ }
+ default: {
+ JSC::ensureStillAliveHere(callFrame->argument(0));
+ JSC::ensureStillAliveHere(callFrame->argument(1));
+ JSC::ensureStillAliveHere(callFrame->argument(2));
+ JSC::ensureStillAliveHere(callFrame->argument(3));
+ JSC::ensureStillAliveHere(callFrame->argument(4));
+ JSC::ensureStillAliveHere(callFrame->argument(5));
+ for (int i = 6; i < count; i++) {
+ JSC::ensureStillAliveHere(callFrame->argument(i));
+ }
+ break;
+ }
+ }
RETURN_IF_EXCEPTION(scope, {});
RELEASE_AND_RETURN(scope, JSValue::encode(callFrame->thisValue()));
@@ -992,7 +1083,7 @@ NapiClass* NapiClass::create(VM& vm, Zig::GlobalObject* globalObject, const char
size_t property_count,
const napi_property_descriptor* properties)
{
- WTF::String name = WTF::String::fromUTF8(utf8name, length);
+ WTF::String name = WTF::String::fromUTF8(utf8name, length).isolatedCopy();
NativeExecutable* executable = vm.getHostFunction(NapiClass_ConstructorFunction, NapiClass_ConstructorFunction, name);
Structure* structure = globalObject->NapiClassStructure();
@@ -1098,8 +1189,11 @@ extern "C" napi_status napi_define_class(napi_env env,
{
Zig::GlobalObject* globalObject = toJS(env);
JSC::VM& vm = globalObject->vm();
-
- NapiClass* napiClass = NapiClass::create(vm, globalObject, utf8name, length, constructor, data, property_count, properties);
+ size_t len = length;
+ if (len == NAPI_AUTO_LENGTH) {
+ len = strlen(utf8name);
+ }
+ NapiClass* napiClass = NapiClass::create(vm, globalObject, utf8name, len, constructor, data, property_count, properties);
JSC::JSValue value = JSC::JSValue(napiClass);
if (data != nullptr) {
napiClass->dataPtr = data;
diff --git a/src/javascript/jsc/webcore/encoding.zig b/src/javascript/jsc/webcore/encoding.zig
index 2e7e41c27..245f6127d 100644
--- a/src/javascript/jsc/webcore/encoding.zig
+++ b/src/javascript/jsc/webcore/encoding.zig
@@ -692,6 +692,43 @@ pub const Encoder = struct {
return writeU8(input, len, to, to_len, .ascii);
}
+ export fn Bun__encoding__byteLengthLatin1AsHex(input: [*]const u8, len: usize) usize {
+ return byteLengthU8(input, len, .hex);
+ }
+ export fn Bun__encoding__byteLengthLatin1AsASCII(input: [*]const u8, len: usize) usize {
+ return byteLengthU8(input, len, .ascii);
+ }
+ export fn Bun__encoding__byteLengthLatin1AsURLSafeBase64(input: [*]const u8, len: usize) usize {
+ return byteLengthU8(input, len, .base64url);
+ }
+ export fn Bun__encoding__byteLengthLatin1AsUTF16(input: [*]const u8, len: usize) usize {
+ return byteLengthU8(input, len, .utf16le);
+ }
+ export fn Bun__encoding__byteLengthLatin1AsUTF8(input: [*]const u8, len: usize) usize {
+ return byteLengthU8(input, len, .utf8);
+ }
+ export fn Bun__encoding__byteLengthLatin1AsBase64(input: [*]const u8, len: usize) usize {
+ return byteLengthU8(input, len, .base64);
+ }
+ export fn Bun__encoding__byteLengthUTF16AsBase64(input: [*]const u16, len: usize) usize {
+ return byteLengthU16(input, len, .base64);
+ }
+ export fn Bun__encoding__byteLengthUTF16AsHex(input: [*]const u16, len: usize) usize {
+ return byteLengthU16(input, len, .hex);
+ }
+ export fn Bun__encoding__byteLengthUTF16AsURLSafeBase64(input: [*]const u16, len: usize) usize {
+ return byteLengthU16(input, len, .base64url);
+ }
+ export fn Bun__encoding__byteLengthUTF16AsUTF16(input: [*]const u16, len: usize) usize {
+ return byteLengthU16(input, len, .utf16le);
+ }
+ export fn Bun__encoding__byteLengthUTF16AsUTF8(input: [*]const u16, len: usize) usize {
+ return byteLengthU16(input, len, .utf8);
+ }
+ export fn Bun__encoding__byteLengthUTF16AsASCII(input: [*]const u8, len: usize) usize {
+ return byteLengthU8(input, len, .ascii);
+ }
+
export fn Bun__encoding__constructFromLatin1AsHex(globalObject: *JSGlobalObject, input: [*]const u8, len: usize) JSValue {
var slice = constructFromU8(input, len, .hex);
return JSC.JSValue.createBuffer(globalObject, slice, VirtualMachine.vm.allocator);
@@ -919,15 +956,37 @@ pub const Encoder = struct {
}
}
- pub fn writeU16(input: [*]const u16, len: usize, to: [*]u8, to_len: usize, comptime encoding: JSC.Node.Encoding) i64 {
+ pub fn byteLengthU8(input: [*]const u8, len: usize, comptime encoding: JSC.Node.Encoding) usize {
if (len == 0)
return 0;
- // TODO: increase temporary buffer size for larger amounts of data
- // defer {
- // if (comptime encoding.isBinaryToText()) {}
- // }
- // if (comptime encoding.isBinaryToText()) {}
+ switch (comptime encoding) {
+ .utf8 => {
+ return strings.elementLengthLatin1IntoUTF8([]const u8, input[0..len]);
+ },
+
+ .latin1, JSC.Node.Encoding.ascii, JSC.Node.Encoding.buffer => {
+ return len;
+ },
+
+ JSC.Node.Encoding.ucs2, JSC.Node.Encoding.utf16le => {
+ return strings.elementLengthUTF8IntoUTF16([]const u8, input[0..len]) * 2;
+ },
+
+ JSC.Node.Encoding.hex => {
+ return len * 2;
+ },
+
+ JSC.Node.Encoding.base64, JSC.Node.Encoding.base64url => {
+ return bun.base64.encodeLen(input[0..len]);
+ },
+ // else => return &[_]u8{};
+ }
+ }
+
+ pub fn writeU16(input: [*]const u16, len: usize, to: [*]u8, to_len: usize, comptime encoding: JSC.Node.Encoding) i64 {
+ if (len == 0)
+ return 0;
switch (comptime encoding) {
.utf8 => {
@@ -958,6 +1017,32 @@ pub const Encoder = struct {
}
}
+ /// Node returns imprecise byte length here
+ /// Should be fast enough for us to return precise length
+ pub fn byteLengthU16(input: [*]const u16, len: usize, comptime encoding: JSC.Node.Encoding) usize {
+ if (len == 0)
+ return 0;
+
+ switch (comptime encoding) {
+ // these should be the same size
+ .ascii, .latin1, .utf8 => {
+ return strings.elementLengthUTF16IntoUTF8([]const u16, input[0..len]);
+ },
+ JSC.Node.Encoding.ucs2, JSC.Node.Encoding.buffer, JSC.Node.Encoding.utf16le => {
+ return len * 2;
+ },
+
+ JSC.Node.Encoding.hex => {
+ return len;
+ },
+
+ JSC.Node.Encoding.base64, JSC.Node.Encoding.base64url => {
+ return bun.base64.encodeLen(input[0..len]);
+ },
+ // else => return &[_]u8{};
+ }
+ }
+
pub fn constructFromU8(input: [*]const u8, len: usize, comptime encoding: JSC.Node.Encoding) []u8 {
if (len == 0)
return &[_]u8{};
@@ -1105,6 +1190,19 @@ pub const Encoder = struct {
_ = Bun__encoding__writeLatin1AsASCII;
_ = Bun__encoding__writeUTF16AsASCII;
+ _ = Bun__encoding__byteLengthLatin1AsHex;
+ _ = Bun__encoding__byteLengthLatin1AsURLSafeBase64;
+ _ = Bun__encoding__byteLengthLatin1AsUTF16;
+ _ = Bun__encoding__byteLengthLatin1AsUTF8;
+ _ = Bun__encoding__byteLengthLatin1AsBase64;
+ _ = Bun__encoding__byteLengthUTF16AsBase64;
+ _ = Bun__encoding__byteLengthUTF16AsHex;
+ _ = Bun__encoding__byteLengthUTF16AsURLSafeBase64;
+ _ = Bun__encoding__byteLengthUTF16AsUTF16;
+ _ = Bun__encoding__byteLengthUTF16AsUTF8;
+ _ = Bun__encoding__byteLengthLatin1AsASCII;
+ _ = Bun__encoding__byteLengthUTF16AsASCII;
+
_ = Bun__encoding__toStringUTF16;
_ = Bun__encoding__toStringUTF8;
_ = Bun__encoding__toStringASCII;
diff --git a/src/string_immutable.zig b/src/string_immutable.zig
index a928f45a6..1eb88675f 100644
--- a/src/string_immutable.zig
+++ b/src/string_immutable.zig
@@ -699,7 +699,7 @@ pub inline fn copyU8IntoU16(output_: []u16, input_: []const u8) void {
// https://zig.godbolt.org/z/9rTn1orcY
- const group = if (Environment.isAarch64)
+ const group = comptime if (Environment.isAarch64)
// on ARM64, 128 seems to be the best choice judging by lines of ASM
128
else
@@ -945,12 +945,7 @@ pub fn toUTF8AllocWithType(allocator: std.mem.Allocator, comptime Type: type, ut
const replacement = utf16Codepoint(Type, utf16_remaining);
utf16_remaining = utf16_remaining[replacement.len..];
- const count: usize = switch (replacement.code_point) {
- 0...0x7F => 1,
- (0x7F + 1)...0x7FF => 2,
- (0x7FF + 1)...0xFFFF => 3,
- else => 4,
- };
+ const count: usize = replacement.utf8Width();
try list.ensureUnusedCapacity(i + count);
list.items.len += i;
@@ -1009,6 +1004,15 @@ pub fn allocateLatin1IntoUTF8(allocator: std.mem.Allocator, comptime Type: type,
pub const UTF16Replacement = struct {
code_point: u32 = unicode_replacement,
len: u3 = 0,
+
+ pub inline fn utf8Width(replacement: UTF16Replacement) usize {
+ return switch (replacement.code_point) {
+ 0...0x7F => 1,
+ (0x7F + 1)...0x7FF => 2,
+ (0x7FF + 1)...0xFFFF => 3,
+ else => 4,
+ };
+ }
};
// This variation matches WebKit behavior.
@@ -1138,6 +1142,36 @@ pub fn copyLatin1IntoUTF8(buf_: []u8, comptime Type: type, latin1_: Type) Encode
};
}
+pub fn elementLengthLatin1IntoUTF8(comptime Type: type, latin1_: Type) usize {
+ var latin1 = latin1_;
+ var count: usize = 0;
+ while (latin1.len > 0) {
+ var read: usize = 0;
+
+ while (latin1.len > ascii_vector_size) {
+ const vec: AsciiVector = latin1[0..ascii_vector_size].*;
+
+ if (@reduce(.Max, vec) > 127) {
+ break;
+ }
+
+ latin1 = latin1[ascii_vector_size..];
+ count += ascii_vector_size;
+ }
+
+ while (read < latin1.len and latin1[read] < 0x80) : (read += 1) {}
+
+ count += read;
+ latin1 = latin1[read..];
+ if (latin1.len > 0) {
+ latin1 = latin1[1..];
+ count += 2;
+ }
+ }
+
+ return count;
+}
+
const JSC = @import("javascript_core");
pub fn copyLatin1IntoUTF16(comptime Buffer: type, buf_: Buffer, comptime Type: type, latin1_: Type) EncodeIntoResult {
@@ -1161,6 +1195,28 @@ pub fn copyLatin1IntoUTF16(comptime Buffer: type, buf_: Buffer, comptime Type: t
};
}
+pub fn elementLengthLatin1IntoUTF16(comptime Type: type, latin1_: Type) usize {
+ // latin1 is always at most 1 UTF-16 code unit long
+ if (comptime std.meta.Child(u16) == Type) {
+ return latin1_.len;
+ }
+
+ var count: usize = 0;
+ var latin1 = latin1_;
+ while (latin1.len > 0) {
+ const function = comptime if (std.meta.Child(Type) == u8) strings.firstNonASCIIWithType else strings.firstNonASCII16;
+ const to_write = function(Type, latin1) orelse @truncate(u32, latin1.len);
+ count += to_write;
+ latin1 = latin1[to_write..];
+ if (latin1.len > 0) {
+ count += comptime if (std.meta.Child(Type) == u8) 2 else 1;
+ latin1 = latin1[1..];
+ }
+ }
+
+ return count;
+}
+
test "copyLatin1IntoUTF8" {
var input: string = "hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!";
var output = std.mem.zeroes([500]u8);
@@ -1213,13 +1269,7 @@ pub fn copyUTF16IntoUTF8(buf: []u8, comptime Type: type, utf16: Type) EncodeInto
const replacement = utf16Codepoint(Type, utf16_remaining);
- const width: usize = switch (replacement.code_point) {
- 0...0x7F => 1,
- (0x7F + 1)...0x7FF => 2,
- (0x7FF + 1)...0xFFFF => 3,
- else => 4,
- };
-
+ const width: usize = replacement.utf8Width();
if (width > remaining.len) {
ended_on_non_ascii = width > 1;
break;
@@ -1243,6 +1293,42 @@ pub fn copyUTF16IntoUTF8(buf: []u8, comptime Type: type, utf16: Type) EncodeInto
};
}
+pub fn elementLengthUTF16IntoUTF8(comptime Type: type, utf16: Type) usize {
+ var utf16_remaining = utf16;
+ var count: usize = 0;
+
+ while (firstNonASCII16(Type, utf16_remaining)) |i| {
+ count += i;
+
+ utf16_remaining = utf16_remaining[i..];
+
+ const replacement = utf16Codepoint(Type, utf16_remaining);
+
+ count += replacement.utf8Width();
+ utf16_remaining = utf16_remaining[replacement.len..];
+ }
+
+ return count + utf16_remaining.len;
+}
+
+pub fn elementLengthUTF8IntoUTF16(comptime Type: type, utf8: Type) usize {
+ var utf8_remaining = utf8;
+ var count: usize = 0;
+
+ while (firstNonASCII(utf8_remaining)) |i| {
+ count += i;
+
+ utf8_remaining = utf8_remaining[i..];
+
+ const replacement = utf16Codepoint(Type, utf8_remaining);
+
+ count += replacement.len;
+ utf8_remaining = utf8_remaining[@minimum(replacement.utf8Width(), utf8_remaining.len)..];
+ }
+
+ return count + utf8_remaining.len;
+}
+
// Check utf16 string equals utf8 string without allocating extra memory
pub fn utf16EqlString(text: []const u16, str: string) bool {
if (text.len > str.len) {
@@ -1462,6 +1548,10 @@ pub inline fn u16Len(supplementary: anytype) u2 {
}
pub fn firstNonASCII(slice: []const u8) ?u32 {
+ return firstNonASCIIWithType([]const u8, slice);
+}
+
+pub fn firstNonASCIIWithType(comptime Type: type, slice: Type) ?u32 {
var remaining = slice;
if (comptime Environment.isAarch64 or Environment.isX64) {