aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Jarred Sumner <jarred@jarredsumner.com> 2023-07-02 18:06:21 -0700
committerGravatar GitHub <noreply@github.com> 2023-07-02 18:06:21 -0700
commitc3f8593f8cb1571b41b8233e5ded98e3d3f99fb0 (patch)
treec6d45c32b1d63b8bf6f347146b67255b0bccdb6b
parent4cbda049e97b2e1049cd2c4c93a617a54931b220 (diff)
downloadbun-c3f8593f8cb1571b41b8233e5ded98e3d3f99fb0.tar.gz
bun-c3f8593f8cb1571b41b8233e5ded98e3d3f99fb0.tar.zst
bun-c3f8593f8cb1571b41b8233e5ded98e3d3f99fb0.zip
[node:buffer] Implement `isUtf8` and `isAscii` (#3498)
Co-authored-by: Jarred Sumner <709451+Jarred-Sumner@users.noreply.github.com>
-rw-r--r--packages/bun-types/buffer.d.ts24
-rw-r--r--src/bun.js/modules/BufferModule.h130
-rw-r--r--test/js/node/buffer.test.js24
3 files changed, 177 insertions, 1 deletions
diff --git a/packages/bun-types/buffer.d.ts b/packages/bun-types/buffer.d.ts
index eb7335871..fd7201677 100644
--- a/packages/bun-types/buffer.d.ts
+++ b/packages/bun-types/buffer.d.ts
@@ -2084,6 +2084,30 @@ declare module "buffer" {
values(): IterableIterator<number>;
}
var Buffer: BufferConstructor;
+
+ /**
+ * This function returns `true` if `input` contains only valid UTF-8-encoded data,
+ * including the case in which `input` is empty.
+ *
+ * Throws if the `input` is a detached array buffer.
+ * @since Bun v0.6.13
+ * @param input The input to validate.
+ */
+ export function isUtf8(
+ input: TypedArray | ArrayBufferLike | DataView,
+ ): boolean;
+
+ /**
+ * This function returns `true` if `input` contains only valid ASCII-encoded data,
+ * including the case in which `input` is empty.
+ *
+ * Throws if the `input` is a detached array buffer.
+ * @since Bun v0.6.13
+ * @param input The input to validate.
+ */
+ export function isAscii(
+ input: TypedArray | ArrayBufferLike | DataView,
+ ): boolean;
}
}
declare module "node:buffer" {
diff --git a/src/bun.js/modules/BufferModule.h b/src/bun.js/modules/BufferModule.h
index a96fb18c8..6e6e39e9c 100644
--- a/src/bun.js/modules/BufferModule.h
+++ b/src/bun.js/modules/BufferModule.h
@@ -2,11 +2,127 @@
#include "../bindings/ZigGlobalObject.h"
#include "JavaScriptCore/JSGlobalObject.h"
#include "JavaScriptCore/ObjectConstructor.h"
+#include "simdutf.h"
namespace Zig {
using namespace WebCore;
using namespace JSC;
+// TODO: Add DOMJIT fast path
+JSC_DEFINE_HOST_FUNCTION(jsBufferConstructorFunction_isUtf8,
+ (JSC::JSGlobalObject * lexicalGlobalObject,
+ JSC::CallFrame *callframe)) {
+ auto throwScope = DECLARE_THROW_SCOPE(lexicalGlobalObject->vm());
+
+ auto buffer = callframe->argument(0);
+ auto *bufferView = JSC::jsDynamicCast<JSC::JSArrayBufferView *>(buffer);
+ const char *ptr = nullptr;
+ size_t byteLength = 0;
+ if (bufferView) {
+ if (UNLIKELY(bufferView->isDetached())) {
+ throwTypeError(lexicalGlobalObject, throwScope,
+ "ArrayBufferView is detached"_s);
+ return JSValue::encode({});
+ }
+
+ byteLength = bufferView->byteLength();
+
+ if (byteLength == 0) {
+ return JSValue::encode(jsBoolean(true));
+ }
+
+ ptr = reinterpret_cast<const char *>(bufferView->vector());
+ } else if (auto *arrayBuffer =
+ JSC::jsDynamicCast<JSC::JSArrayBuffer *>(buffer)) {
+ auto *impl = arrayBuffer->impl();
+
+ if (!impl) {
+ return JSValue::encode(jsBoolean(true));
+ }
+
+ if (UNLIKELY(impl->isDetached())) {
+ throwTypeError(lexicalGlobalObject, throwScope,
+ "ArrayBuffer is detached"_s);
+ return JSValue::encode({});
+ }
+
+ byteLength = impl->byteLength();
+
+ if (byteLength == 0) {
+ return JSValue::encode(jsBoolean(true));
+ }
+
+ ptr = reinterpret_cast<const char *>(impl->data());
+ } else {
+ throwVMError(
+ lexicalGlobalObject, throwScope,
+ createTypeError(lexicalGlobalObject,
+ "First argument must be an ArrayBufferView"_s));
+ return JSValue::encode({});
+ }
+
+ RELEASE_AND_RETURN(throwScope, JSValue::encode(jsBoolean(
+ simdutf::validate_utf8(ptr, byteLength))));
+}
+
+// TODO: Add DOMJIT fast path
+JSC_DEFINE_HOST_FUNCTION(jsBufferConstructorFunction_isAscii,
+ (JSC::JSGlobalObject * lexicalGlobalObject,
+ JSC::CallFrame *callframe)) {
+ auto throwScope = DECLARE_THROW_SCOPE(lexicalGlobalObject->vm());
+
+ auto buffer = callframe->argument(0);
+ auto *bufferView = JSC::jsDynamicCast<JSC::JSArrayBufferView *>(buffer);
+ const char *ptr = nullptr;
+ size_t byteLength = 0;
+ if (bufferView) {
+
+ if (UNLIKELY(bufferView->isDetached())) {
+ throwTypeError(lexicalGlobalObject, throwScope,
+ "ArrayBufferView is detached"_s);
+ return JSValue::encode({});
+ }
+
+ byteLength = bufferView->byteLength();
+
+ if (byteLength == 0) {
+ return JSValue::encode(jsBoolean(true));
+ }
+
+ ptr = reinterpret_cast<const char *>(bufferView->vector());
+ } else if (auto *arrayBuffer =
+ JSC::jsDynamicCast<JSC::JSArrayBuffer *>(buffer)) {
+ auto *impl = arrayBuffer->impl();
+ if (UNLIKELY(impl->isDetached())) {
+ throwTypeError(lexicalGlobalObject, throwScope,
+ "ArrayBuffer is detached"_s);
+ return JSValue::encode({});
+ }
+
+ if (!impl) {
+ return JSValue::encode(jsBoolean(true));
+ }
+
+ byteLength = impl->byteLength();
+
+ if (byteLength == 0) {
+ return JSValue::encode(jsBoolean(true));
+ }
+
+ ptr = reinterpret_cast<const char *>(impl->data());
+ } else {
+ throwVMError(
+ lexicalGlobalObject, throwScope,
+ createTypeError(lexicalGlobalObject,
+ "First argument must be an ArrayBufferView"_s));
+ return JSValue::encode({});
+ }
+
+ RELEASE_AND_RETURN(
+ throwScope,
+ JSValue::encode(jsBoolean(simdutf::validate_ascii(ptr, byteLength))));
+}
+
JSC_DEFINE_HOST_FUNCTION(jsFunctionNotImplemented,
(JSGlobalObject * globalObject,
CallFrame *callFrame)) {
@@ -106,6 +222,20 @@ inline void generateBufferSourceCode(JSC::JSGlobalObject *lexicalGlobalObject,
exportProperty(JSC::Identifier::fromString(vm, "resolveObjectURL"_s),
resolveObjectURL);
+ exportProperty(JSC::Identifier::fromString(vm, "isAscii"_s),
+ JSC::JSFunction::create(vm, globalObject, 1, "isAscii"_s,
+ jsBufferConstructorFunction_isAscii,
+ ImplementationVisibility::Public,
+ NoIntrinsic,
+ jsBufferConstructorFunction_isUtf8));
+
+ exportProperty(JSC::Identifier::fromString(vm, "isUtf8"_s),
+ JSC::JSFunction::create(vm, globalObject, 1, "isUtf8"_s,
+ jsBufferConstructorFunction_isUtf8,
+ ImplementationVisibility::Public,
+ NoIntrinsic,
+ jsBufferConstructorFunction_isUtf8));
+
exportNames.append(vm.propertyNames->defaultKeyword);
exportValues.append(defaultObject);
}
diff --git a/test/js/node/buffer.test.js b/test/js/node/buffer.test.js
index e0d8f5486..99562fedd 100644
--- a/test/js/node/buffer.test.js
+++ b/test/js/node/buffer.test.js
@@ -1,4 +1,4 @@
-import { Buffer, SlowBuffer } from "buffer";
+import { Buffer, SlowBuffer, isAscii, isUtf8 } from "buffer";
import { describe, it, expect, beforeEach, afterEach } from "bun:test";
import { gc } from "harness";
@@ -7,6 +7,28 @@ const BufferModule = await import("buffer");
beforeEach(() => gc());
afterEach(() => gc());
+it("isAscii", () => {
+ expect(isAscii(new Buffer("abc"))).toBeTrue();
+ expect(isAscii(new Buffer(""))).toBeTrue();
+ expect(isAscii(new Buffer([32, 32, 128]))).toBeFalse();
+ expect(isAscii(new Buffer("What did the 🦊 say?"))).toBeFalse();
+
+ expect(isAscii(new Buffer("").buffer)).toBeTrue();
+ expect(isAscii(new Buffer([32, 32, 128]).buffer)).toBeFalse();
+});
+
+it("isUtf8", () => {
+ expect(isUtf8(new Buffer("abc"))).toBeTrue();
+ expect(isAscii(new Buffer(""))).toBeTrue();
+ expect(isUtf8(new Buffer("What did the 🦊 say?"))).toBeTrue();
+ expect(isUtf8(new Buffer([129, 129, 129]))).toBeFalse();
+
+ expect(isUtf8(new Buffer("abc").buffer)).toBeTrue();
+ expect(isAscii(new Buffer("").buffer)).toBeTrue();
+ expect(isUtf8(new Buffer("What did the 🦊 say?").buffer)).toBeTrue();
+ expect(isUtf8(new Buffer([129, 129, 129]).buffer)).toBeFalse();
+});
+
// https://github.com/oven-sh/bun/issues/2052
it("Buffer global is settable", () => {
var prevBuffer = globalThis.Buffer;