diff options
-rw-r--r-- | packages/bun-types/buffer.d.ts | 24 | ||||
-rw-r--r-- | src/bun.js/modules/BufferModule.h | 130 | ||||
-rw-r--r-- | test/js/node/buffer.test.js | 24 |
3 files changed, 177 insertions, 1 deletions
diff --git a/packages/bun-types/buffer.d.ts b/packages/bun-types/buffer.d.ts index eb7335871..fd7201677 100644 --- a/packages/bun-types/buffer.d.ts +++ b/packages/bun-types/buffer.d.ts @@ -2084,6 +2084,30 @@ declare module "buffer" { values(): IterableIterator<number>; } var Buffer: BufferConstructor; + + /** + * This function returns `true` if `input` contains only valid UTF-8-encoded data, + * including the case in which `input` is empty. + * + * Throws if the `input` is a detached array buffer. + * @since Bun v0.6.13 + * @param input The input to validate. + */ + export function isUtf8( + input: TypedArray | ArrayBufferLike | DataView, + ): boolean; + + /** + * This function returns `true` if `input` contains only valid ASCII-encoded data, + * including the case in which `input` is empty. + * + * Throws if the `input` is a detached array buffer. + * @since Bun v0.6.13 + * @param input The input to validate. + */ + export function isAscii( + input: TypedArray | ArrayBufferLike | DataView, + ): boolean; } } declare module "node:buffer" { diff --git a/src/bun.js/modules/BufferModule.h b/src/bun.js/modules/BufferModule.h index a96fb18c8..6e6e39e9c 100644 --- a/src/bun.js/modules/BufferModule.h +++ b/src/bun.js/modules/BufferModule.h @@ -2,11 +2,127 @@ #include "../bindings/ZigGlobalObject.h" #include "JavaScriptCore/JSGlobalObject.h" #include "JavaScriptCore/ObjectConstructor.h" +#include "simdutf.h" namespace Zig { using namespace WebCore; using namespace JSC; +// TODO: Add DOMJIT fast path +JSC_DEFINE_HOST_FUNCTION(jsBufferConstructorFunction_isUtf8, + (JSC::JSGlobalObject * lexicalGlobalObject, + JSC::CallFrame *callframe)) { + auto throwScope = DECLARE_THROW_SCOPE(lexicalGlobalObject->vm()); + + auto buffer = callframe->argument(0); + auto *bufferView = JSC::jsDynamicCast<JSC::JSArrayBufferView *>(buffer); + const char *ptr = nullptr; + size_t byteLength = 0; + if (bufferView) { + if (UNLIKELY(bufferView->isDetached())) { + throwTypeError(lexicalGlobalObject, throwScope, + "ArrayBufferView is detached"_s); + return JSValue::encode({}); + } + + byteLength = bufferView->byteLength(); + + if (byteLength == 0) { + return JSValue::encode(jsBoolean(true)); + } + + ptr = reinterpret_cast<const char *>(bufferView->vector()); + } else if (auto *arrayBuffer = + JSC::jsDynamicCast<JSC::JSArrayBuffer *>(buffer)) { + auto *impl = arrayBuffer->impl(); + + if (!impl) { + return JSValue::encode(jsBoolean(true)); + } + + if (UNLIKELY(impl->isDetached())) { + throwTypeError(lexicalGlobalObject, throwScope, + "ArrayBuffer is detached"_s); + return JSValue::encode({}); + } + + byteLength = impl->byteLength(); + + if (byteLength == 0) { + return JSValue::encode(jsBoolean(true)); + } + + ptr = reinterpret_cast<const char *>(impl->data()); + } else { + throwVMError( + lexicalGlobalObject, throwScope, + createTypeError(lexicalGlobalObject, + "First argument must be an ArrayBufferView"_s)); + return JSValue::encode({}); + } + + RELEASE_AND_RETURN(throwScope, JSValue::encode(jsBoolean( + simdutf::validate_utf8(ptr, byteLength)))); +} + +// TODO: Add DOMJIT fast path +JSC_DEFINE_HOST_FUNCTION(jsBufferConstructorFunction_isAscii, + (JSC::JSGlobalObject * lexicalGlobalObject, + JSC::CallFrame *callframe)) { + auto throwScope = DECLARE_THROW_SCOPE(lexicalGlobalObject->vm()); + + auto buffer = callframe->argument(0); + auto *bufferView = JSC::jsDynamicCast<JSC::JSArrayBufferView *>(buffer); + const char *ptr = nullptr; + size_t byteLength = 0; + if (bufferView) { + + if (UNLIKELY(bufferView->isDetached())) { + throwTypeError(lexicalGlobalObject, throwScope, + "ArrayBufferView is detached"_s); + return JSValue::encode({}); + } + + byteLength = bufferView->byteLength(); + + if (byteLength == 0) { + return JSValue::encode(jsBoolean(true)); + } + + ptr = reinterpret_cast<const char *>(bufferView->vector()); + } else if (auto *arrayBuffer = + JSC::jsDynamicCast<JSC::JSArrayBuffer *>(buffer)) { + auto *impl = arrayBuffer->impl(); + if (UNLIKELY(impl->isDetached())) { + throwTypeError(lexicalGlobalObject, throwScope, + "ArrayBuffer is detached"_s); + return JSValue::encode({}); + } + + if (!impl) { + return JSValue::encode(jsBoolean(true)); + } + + byteLength = impl->byteLength(); + + if (byteLength == 0) { + return JSValue::encode(jsBoolean(true)); + } + + ptr = reinterpret_cast<const char *>(impl->data()); + } else { + throwVMError( + lexicalGlobalObject, throwScope, + createTypeError(lexicalGlobalObject, + "First argument must be an ArrayBufferView"_s)); + return JSValue::encode({}); + } + + RELEASE_AND_RETURN( + throwScope, + JSValue::encode(jsBoolean(simdutf::validate_ascii(ptr, byteLength)))); +} + JSC_DEFINE_HOST_FUNCTION(jsFunctionNotImplemented, (JSGlobalObject * globalObject, CallFrame *callFrame)) { @@ -106,6 +222,20 @@ inline void generateBufferSourceCode(JSC::JSGlobalObject *lexicalGlobalObject, exportProperty(JSC::Identifier::fromString(vm, "resolveObjectURL"_s), resolveObjectURL); + exportProperty(JSC::Identifier::fromString(vm, "isAscii"_s), + JSC::JSFunction::create(vm, globalObject, 1, "isAscii"_s, + jsBufferConstructorFunction_isAscii, + ImplementationVisibility::Public, + NoIntrinsic, + jsBufferConstructorFunction_isUtf8)); + + exportProperty(JSC::Identifier::fromString(vm, "isUtf8"_s), + JSC::JSFunction::create(vm, globalObject, 1, "isUtf8"_s, + jsBufferConstructorFunction_isUtf8, + ImplementationVisibility::Public, + NoIntrinsic, + jsBufferConstructorFunction_isUtf8)); + exportNames.append(vm.propertyNames->defaultKeyword); exportValues.append(defaultObject); } diff --git a/test/js/node/buffer.test.js b/test/js/node/buffer.test.js index e0d8f5486..99562fedd 100644 --- a/test/js/node/buffer.test.js +++ b/test/js/node/buffer.test.js @@ -1,4 +1,4 @@ -import { Buffer, SlowBuffer } from "buffer"; +import { Buffer, SlowBuffer, isAscii, isUtf8 } from "buffer"; import { describe, it, expect, beforeEach, afterEach } from "bun:test"; import { gc } from "harness"; @@ -7,6 +7,28 @@ const BufferModule = await import("buffer"); beforeEach(() => gc()); afterEach(() => gc()); +it("isAscii", () => { + expect(isAscii(new Buffer("abc"))).toBeTrue(); + expect(isAscii(new Buffer(""))).toBeTrue(); + expect(isAscii(new Buffer([32, 32, 128]))).toBeFalse(); + expect(isAscii(new Buffer("What did the 🦊 say?"))).toBeFalse(); + + expect(isAscii(new Buffer("").buffer)).toBeTrue(); + expect(isAscii(new Buffer([32, 32, 128]).buffer)).toBeFalse(); +}); + +it("isUtf8", () => { + expect(isUtf8(new Buffer("abc"))).toBeTrue(); + expect(isAscii(new Buffer(""))).toBeTrue(); + expect(isUtf8(new Buffer("What did the 🦊 say?"))).toBeTrue(); + expect(isUtf8(new Buffer([129, 129, 129]))).toBeFalse(); + + expect(isUtf8(new Buffer("abc").buffer)).toBeTrue(); + expect(isAscii(new Buffer("").buffer)).toBeTrue(); + expect(isUtf8(new Buffer("What did the 🦊 say?").buffer)).toBeTrue(); + expect(isUtf8(new Buffer([129, 129, 129]).buffer)).toBeFalse(); +}); + // https://github.com/oven-sh/bun/issues/2052 it("Buffer global is settable", () => { var prevBuffer = globalThis.Buffer; |