diff options
Diffstat (limited to '')
-rw-r--r-- | src/bun.js/bindings/OnigurumaRegExp.cpp | 213 | ||||
-rw-r--r-- | src/bun.js/bindings/OnigurumaRegExp.h | 17 | ||||
-rw-r--r-- | test/bun.js/oniguruma-regexp.test.ts | 282 |
3 files changed, 353 insertions, 159 deletions
diff --git a/src/bun.js/bindings/OnigurumaRegExp.cpp b/src/bun.js/bindings/OnigurumaRegExp.cpp index 3b6066e0f..513b650f8 100644 --- a/src/bun.js/bindings/OnigurumaRegExp.cpp +++ b/src/bun.js/bindings/OnigurumaRegExp.cpp @@ -97,7 +97,7 @@ static WTF::String extendMultibyteHexCharacters(const WTF::String& string) if (inCharacterClass) { // we know ']' will be escaped so there isn't a need to scan for the closing bracket - if (characters[i] == '[' || characters[i] == ']' || characters[i] == '^' || characters[i] == '-' || characters[i] == ')' || characters[i] == '(') { + if (characters[i] == '[' || characters[i] == ']') { if (characters[i - 1] != '\\') { // character class intersections not supported, assume end of character class if (characters[i] == ']') { @@ -247,6 +247,49 @@ bool validateRegExpFlags(WTF::StringView flags) return true; } +std::once_flag onigurumaEncodingInitFlag; + +static regex_t* createOnigurumaRegExp(JSGlobalObject* globalObject, const WTF::String& patternString, const WTF::String& flagsString, int& errorCode, OnigErrorInfo& errorInfo) +{ + auto& vm = globalObject->vm(); + auto throwScope = DECLARE_THROW_SCOPE(vm); + + OnigEncoding encodings[] = { + ONIG_ENCODING_UTF16_LE, + }; + std::call_once(onigurumaEncodingInitFlag, [&encodings]() { + onig_initialize(encodings, 1); + }); + + OnigOptionType options = 0; + if (flagsString.contains('i')) { + options |= ONIG_OPTION_IGNORECASE; + } + if (flagsString.contains('m')) { + options |= ONIG_OPTION_MULTILINE; + } else { + options |= ONIG_OPTION_SINGLELINE; + } + if (flagsString.contains('s')) { + options |= ONIG_OPTION_MULTILINE; + } + + OnigSyntaxType* syntax = ONIG_SYNTAX_ONIGURUMA; + OnigEncodingType* encoding = encodings[0]; + regex_t* onigRegExp = NULL; + + errorCode = onig_new( + &onigRegExp, + reinterpret_cast<const OnigUChar*>(patternString.characters16()), + reinterpret_cast<const OnigUChar*>(patternString.characters16() + patternString.length()), + options, + encoding, + syntax, + &errorInfo); + + return onigRegExp; +} + class OnigurumaRegExpPrototype final : public JSC::JSNonFinalObject { public: using Base = JSC::JSNonFinalObject; @@ -289,13 +332,12 @@ public: return ptr; } - static OnigurumaRegEx* create(JSC::JSGlobalObject* globalObject, WTF::String&& pattern, WTF::String&& flags, regex_t* regExpCode) + static OnigurumaRegEx* create(JSC::JSGlobalObject* globalObject, WTF::String&& pattern, WTF::String&& flags) { auto* structure = reinterpret_cast<Zig::GlobalObject*>(globalObject)->OnigurumaRegExpStructure(); auto* object = create(globalObject->vm(), globalObject, structure); object->m_flagsString = WTFMove(flags); object->m_patternString = WTFMove(pattern); - object->m_onigurumaRegExp = regExpCode; return object; } @@ -314,18 +356,6 @@ public: [](auto& spaces, auto&& space) { spaces.m_subspaceForOnigurumaRegExp = WTFMove(space); }); } - static void destroy(JSC::JSCell* cell) - { - static_cast<OnigurumaRegEx*>(cell)->OnigurumaRegEx::~OnigurumaRegEx(); - } - - ~OnigurumaRegEx() - { - if (m_onigurumaRegExp) { - onig_free(m_onigurumaRegExp); - } - } - static JSC::Structure* createStructure(JSC::VM& vm, JSC::JSGlobalObject* globalObject, JSC::JSValue prototype) { return JSC::Structure::create(vm, globalObject, prototype, JSC::TypeInfo(RegExpObjectType, StructureFlags), info()); @@ -338,7 +368,6 @@ public: const WTF::String& patternString() const { return m_patternString; } void setPatternString(const WTF::String& patternString) { m_patternString = patternString; } - regex_t* m_onigurumaRegExp = NULL; int32_t m_lastIndex = 0; private: @@ -511,46 +540,10 @@ JSC_DEFINE_HOST_FUNCTION(onigurumaRegExpProtoFuncCompile, (JSGlobalObject * glob thisRegExp->setFlagsString(newFlagsString); } - OnigEncoding encodings[] = { - ONIG_ENCODING_UTF16_LE, - }; - onig_initialize(encodings, 1); - - OnigOptionType options = 0; - if (thisRegExp->flagsString().contains('i')) { - options |= ONIG_OPTION_IGNORECASE; - } - if (thisRegExp->flagsString().contains('m')) { - options |= ONIG_OPTION_MULTILINE; - } else { - options |= ONIG_OPTION_SINGLELINE; - } - if (thisRegExp->flagsString().contains('s')) { - options |= ONIG_OPTION_MULTILINE; - } - - OnigSyntaxType* syntax = ONIG_SYNTAX_DEFAULT; - onig_set_syntax_op(syntax, onig_get_syntax_op(syntax) | ONIG_SYN_OP_ESC_X_HEX2); - onig_set_syntax_op(syntax, onig_get_syntax_op(syntax) | ONIG_SYN_OP_ESC_X_BRACE_HEX8); - onig_set_syntax_op2(syntax, onig_get_syntax_op2(syntax) | ONIG_SYN_OP2_ESC_U_HEX4); - onig_set_syntax_behavior(syntax, onig_get_syntax_behavior(syntax) | ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC); - onig_set_syntax_behavior(syntax, onig_get_syntax_behavior(syntax) | ONIG_SYN_ALLOW_INVALID_CODE_END_OF_RANGE_IN_CC); - onig_set_syntax_behavior(syntax, onig_get_syntax_behavior(syntax) & ~ONIG_SYN_BACKSLASH_ESCAPE_IN_CC); - - OnigEncodingType* encoding = ONIG_ENCODING_UTF16_LE; - OnigErrorInfo errorInfo = { 0 }; - regex_t* onigRegExp = NULL; + // for pattern syntax checking int errorCode = 0; - - errorCode = onig_new( - &onigRegExp, - reinterpret_cast<const OnigUChar*>(patternStringExtended.characters16()), - reinterpret_cast<const OnigUChar*>(patternStringExtended.characters16() + patternStringExtended.length()), - options, - encoding, - syntax, - &errorInfo); - + OnigErrorInfo errorInfo = { 0 }; + regex_t* onigurumaRegExp = createOnigurumaRegExp(globalObject, extendMultibyteHexCharacters(thisRegExp->patternString()), thisRegExp->flagsString(), errorCode, errorInfo); if (errorCode != ONIG_NORMAL) { OnigUChar errorBuff[ONIG_MAX_ERROR_MESSAGE_LEN] = { 0 }; int length = onig_error_code_to_str(errorBuff, errorCode, &errorInfo); @@ -561,15 +554,14 @@ JSC_DEFINE_HOST_FUNCTION(onigurumaRegExpProtoFuncCompile, (JSGlobalObject * glob } else { errorMessage.appendCharacters(errorBuff, length); } + if (onigurumaRegExp != nullptr) { + onig_free(onigurumaRegExp); + } throwScope.throwException(globalObject, createSyntaxError(globalObject, errorMessage.toString())); return JSValue::encode({}); } + onig_free(onigurumaRegExp); - if (thisRegExp->m_onigurumaRegExp) { - onig_free(thisRegExp->m_onigurumaRegExp); - } - - thisRegExp->m_onigurumaRegExp = onigRegExp; thisRegExp->m_lastIndex = 0; return JSValue::encode(thisRegExp); @@ -579,6 +571,7 @@ JSC_DEFINE_HOST_FUNCTION(onigurumaRegExpProtoFuncTest, (JSGlobalObject * globalO { auto& vm = globalObject->vm(); auto scope = DECLARE_THROW_SCOPE(vm); + auto throwScope = DECLARE_THROW_SCOPE(vm); auto* thisValue = jsDynamicCast<OnigurumaRegEx*>(callFrame->thisValue()); if (!thisValue) @@ -593,6 +586,26 @@ JSC_DEFINE_HOST_FUNCTION(onigurumaRegExpProtoFuncTest, (JSGlobalObject * globalO WTF::String string = to16Bit(arg, globalObject, ""_s); RETURN_IF_EXCEPTION(scope, JSValue::encode({})); + int errorCode = 0; + OnigErrorInfo errorInfo = { 0 }; + regex_t* onigurumaRegExp = createOnigurumaRegExp(globalObject, extendMultibyteHexCharacters(thisValue->patternString()), thisValue->flagsString(), errorCode, errorInfo); + if (errorCode != ONIG_NORMAL) { + OnigUChar errorBuff[ONIG_MAX_ERROR_MESSAGE_LEN] = { 0 }; + int length = onig_error_code_to_str(errorBuff, errorCode, &errorInfo); + WTF::StringBuilder errorMessage; + errorMessage.append("Invalid regular expression: "_s); + if (length < 0) { + errorMessage.append("An unknown error occurred."_s); + } else { + errorMessage.appendCharacters(errorBuff, length); + } + if (onigurumaRegExp != nullptr) { + onig_free(onigurumaRegExp); + } + throwScope.throwException(globalObject, createSyntaxError(globalObject, errorMessage.toString())); + return JSValue::encode({}); + } + OnigRegion* region = onig_region_new(); const OnigUChar* end = reinterpret_cast<const OnigUChar*>(string.characters16() + string.length()); @@ -601,12 +614,13 @@ JSC_DEFINE_HOST_FUNCTION(onigurumaRegExpProtoFuncTest, (JSGlobalObject * globalO if (thisValue->m_lastIndex >= string.length()) { onig_region_free(region, 1); + onig_free(onigurumaRegExp); thisValue->m_lastIndex = 0; return JSValue::encode(jsBoolean(false)); } int result = onig_search( - thisValue->m_onigurumaRegExp, + onigurumaRegExp, reinterpret_cast<const OnigUChar*>(string.characters16()), end, start, @@ -617,11 +631,13 @@ JSC_DEFINE_HOST_FUNCTION(onigurumaRegExpProtoFuncTest, (JSGlobalObject * globalO if (result < 0) { thisValue->m_lastIndex = 0; onig_region_free(region, 1); + onig_free(onigurumaRegExp); return JSValue::encode(jsBoolean(false)); } if (thisValue->flagsString().contains('y') && region->beg[0] != thisValue->m_lastIndex) { onig_region_free(region, 1); + onig_free(onigurumaRegExp); return JSValue::encode(jsBoolean(false)); } @@ -632,6 +648,7 @@ JSC_DEFINE_HOST_FUNCTION(onigurumaRegExpProtoFuncTest, (JSGlobalObject * globalO } onig_region_free(region, 1); + onig_free(onigurumaRegExp); return JSValue::encode(jsBoolean(true)); } @@ -655,6 +672,26 @@ JSC_DEFINE_HOST_FUNCTION(onigurumaRegExpProtoFuncExec, (JSGlobalObject * globalO WTF::String string = to16Bit(arg, globalObject, ""_s); RETURN_IF_EXCEPTION(scope, JSValue::encode({})); + int errorCode = 0; + OnigErrorInfo errorInfo = { 0 }; + regex_t* onigurumaRegExp = createOnigurumaRegExp(globalObject, extendMultibyteHexCharacters(thisValue->patternString()), thisValue->flagsString(), errorCode, errorInfo); + if (errorCode != ONIG_NORMAL) { + OnigUChar errorBuff[ONIG_MAX_ERROR_MESSAGE_LEN] = { 0 }; + int length = onig_error_code_to_str(errorBuff, errorCode, &errorInfo); + WTF::StringBuilder errorMessage; + errorMessage.append("Invalid regular expression: "_s); + if (length < 0) { + errorMessage.append("An unknown error occurred."_s); + } else { + errorMessage.appendCharacters(errorBuff, length); + } + if (onigurumaRegExp != nullptr) { + onig_free(onigurumaRegExp); + } + throwScope.throwException(globalObject, createSyntaxError(globalObject, errorMessage.toString())); + return JSValue::encode({}); + } + OnigRegion* region = onig_region_new(); const OnigUChar* end = reinterpret_cast<const OnigUChar*>(string.characters16() + string.length()); @@ -662,7 +699,7 @@ JSC_DEFINE_HOST_FUNCTION(onigurumaRegExpProtoFuncExec, (JSGlobalObject * globalO const OnigUChar* range = end; int result = onig_search( - thisValue->m_onigurumaRegExp, + onigurumaRegExp, reinterpret_cast<const OnigUChar*>(string.characters16()), end, start, @@ -672,6 +709,7 @@ JSC_DEFINE_HOST_FUNCTION(onigurumaRegExpProtoFuncExec, (JSGlobalObject * globalO if (result < 0) { onig_region_free(region, 1); + onig_free(onigurumaRegExp); thisValue->m_lastIndex = 0; return JSValue::encode(jsNull()); } @@ -694,6 +732,7 @@ JSC_DEFINE_HOST_FUNCTION(onigurumaRegExpProtoFuncExec, (JSGlobalObject * globalO if (UNLIKELY(!ptr)) { throwOutOfMemoryError(globalObject, scope); onig_region_free(region, 1); + onig_free(onigurumaRegExp); return JSValue::encode(jsNull()); } @@ -720,6 +759,7 @@ JSC_DEFINE_HOST_FUNCTION(onigurumaRegExpProtoFuncExec, (JSGlobalObject * globalO } onig_region_free(region, 1); + onig_free(onigurumaRegExp); return JSValue::encode(array); } @@ -759,6 +799,7 @@ void OnigurumaRegExpPrototype::finishCreation(VM& vm, JSGlobalObject* globalObje this->putDirectCustomAccessor(vm, vm.propertyNames->source, JSC::CustomGetterSetter::create(vm, onigurumaRegExpProtoGetterSource, nullptr), 0 | PropertyAttribute::CustomAccessor | PropertyAttribute::ReadOnly); this->putDirectCustomAccessor(vm, vm.propertyNames->flags, JSC::CustomGetterSetter::create(vm, onigurumaRegExpProtoGetterFlags, nullptr), 0 | PropertyAttribute::CustomAccessor | PropertyAttribute::ReadOnly); this->putDirectCustomAccessor(vm, vm.propertyNames->lastIndex, JSC::CustomGetterSetter::create(vm, onigurumaRegExpProtoGetterLastIndex, onigurumaRegExpProtoSetterLastIndex), 0 | PropertyAttribute::CustomAccessor); + this->putDirectNativeFunction(vm, globalObject, PropertyName(vm.propertyNames->test), 1, onigurumaRegExpProtoFuncTest, ImplementationVisibility::Public, NoIntrinsic, static_cast<unsigned>(0)); this->putDirectBuiltinFunction(vm, globalObject, vm.propertyNames->matchSymbol, onigurumaRegExpPrototypeMatchCodeGenerator(vm), static_cast<unsigned>(0)); @@ -805,8 +846,6 @@ static JSC::EncodedJSValue constructOrCall(Zig::GlobalObject* globalObject, JSVa WTF::String patternString = to16Bit(arg0, globalObject, "(?:)"_s); RETURN_IF_EXCEPTION(scope, {}); - WTF::String patternStringExtended = extendMultibyteHexCharacters(patternString); - WTF::String flagsString = to16Bit(arg1, globalObject, ""_s); RETURN_IF_EXCEPTION(scope, {}); @@ -817,45 +856,10 @@ static JSC::EncodedJSValue constructOrCall(Zig::GlobalObject* globalObject, JSVa flagsString = sortRegExpFlags(flagsString); - OnigEncoding encodings[] = { - ONIG_ENCODING_UTF16_LE, - }; - onig_initialize(encodings, 1); - - OnigOptionType options = 0; - if (flagsString.contains('i')) { - options |= ONIG_OPTION_IGNORECASE; - } - if (flagsString.contains('m')) { - options |= ONIG_OPTION_MULTILINE; - } else { - options |= ONIG_OPTION_SINGLELINE; - } - if (flagsString.contains('s')) { - options |= ONIG_OPTION_MULTILINE; - } - - OnigSyntaxType* syntax = ONIG_SYNTAX_ONIGURUMA; - onig_set_syntax_op(syntax, onig_get_syntax_op(syntax) | ONIG_SYN_OP_ESC_X_HEX2); - onig_set_syntax_op(syntax, onig_get_syntax_op(syntax) | ONIG_SYN_OP_ESC_X_BRACE_HEX8); - onig_set_syntax_op2(syntax, onig_get_syntax_op2(syntax) | ONIG_SYN_OP2_ESC_U_HEX4); - onig_set_syntax_behavior(syntax, onig_get_syntax_behavior(syntax) | ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC); - onig_set_syntax_behavior(syntax, onig_get_syntax_behavior(syntax) | ONIG_SYN_ALLOW_INVALID_CODE_END_OF_RANGE_IN_CC); - - OnigEncodingType* encoding = encodings[0]; - OnigErrorInfo errorInfo = { 0 }; - regex_t* onigRegExp = NULL; + // create for pattern compilation errors, but need to create another for each exec/test int errorCode = 0; - - errorCode = onig_new( - &onigRegExp, - reinterpret_cast<const OnigUChar*>(patternStringExtended.characters16()), - reinterpret_cast<const OnigUChar*>(patternStringExtended.characters16() + patternStringExtended.length()), - options, - encoding, - syntax, - &errorInfo); - + OnigErrorInfo errorInfo = { 0 }; + regex_t* onigurumaRegExp = createOnigurumaRegExp(globalObject, extendMultibyteHexCharacters(patternString), flagsString, errorCode, errorInfo); if (errorCode != ONIG_NORMAL) { OnigUChar errorBuff[ONIG_MAX_ERROR_MESSAGE_LEN] = { 0 }; int length = onig_error_code_to_str(errorBuff, errorCode, &errorInfo); @@ -869,10 +873,9 @@ static JSC::EncodedJSValue constructOrCall(Zig::GlobalObject* globalObject, JSVa throwScope.throwException(globalObject, createSyntaxError(globalObject, errorMessage.toString())); return JSValue::encode({}); } + onig_free(onigurumaRegExp); - RETURN_IF_EXCEPTION(scope, {}); - - OnigurumaRegEx* result = OnigurumaRegEx::create(globalObject, WTFMove(patternString), WTFMove(flagsString), onigRegExp); + OnigurumaRegEx* result = OnigurumaRegEx::create(globalObject, WTFMove(patternString), WTFMove(flagsString)); return JSValue::encode(result); } diff --git a/src/bun.js/bindings/OnigurumaRegExp.h b/src/bun.js/bindings/OnigurumaRegExp.h index 846ca6357..058492844 100644 --- a/src/bun.js/bindings/OnigurumaRegExp.h +++ b/src/bun.js/bindings/OnigurumaRegExp.h @@ -4,9 +4,8 @@ #include "BunBuiltinNames.h" #include "BunClientData.h" #include "ZigGlobalObject.h" -#include "JavaScriptCore/JSDestructibleObjectHeapCellType.h" -extern "C" JSC::EncodedJSValue jsFunctionGetOnigurumaRegExpConstructor(JSC::JSGlobalObject * lexicalGlobalObject, JSC::EncodedJSValue thisValue, JSC::PropertyName attributeName); +extern "C" JSC::EncodedJSValue jsFunctionGetOnigurumaRegExpConstructor(JSC::JSGlobalObject* lexicalGlobalObject, JSC::EncodedJSValue thisValue, JSC::PropertyName attributeName); namespace Zig { @@ -24,24 +23,20 @@ public: static JSC::Structure* createClassStructure(JSC::JSGlobalObject*, JSC::JSValue prototype); static JSC::JSObject* createPrototype(JSC::JSGlobalObject*); - + static JSC::Structure* createStructure(JSC::VM& vm, JSC::JSGlobalObject* globalObject, JSC::JSValue prototype) { return JSC::Structure::create(vm, globalObject, prototype, JSC::TypeInfo(JSC::InternalFunctionType, StructureFlags), info()); } - private: OnigurumaRegExpConstructor(JSC::VM& vm, JSC::Structure* structure, JSC::NativeFunction nativeFunction) - : Base(vm, structure, nativeFunction, nativeFunction) - - { - } - + : Base(vm, structure, nativeFunction, nativeFunction) + + { + } void finishCreation(JSC::VM&, JSValue prototype); }; - - }
\ No newline at end of file diff --git a/test/bun.js/oniguruma-regexp.test.ts b/test/bun.js/oniguruma-regexp.test.ts index 48906fb7d..372e8eb99 100644 --- a/test/bun.js/oniguruma-regexp.test.ts +++ b/test/bun.js/oniguruma-regexp.test.ts @@ -2,10 +2,186 @@ import { OnigurumaRegExp } from "bun"; import { expect, it, test } from "bun:test"; import { gc as gcTrace } from "./gc"; +it("repeated match and exec calls", () => { + for (let i = 0; i < 20000; i++) { + let r1 = new OnigurumaRegExp("//.+?/[^?]+", "sg"); + let r2 = new RegExp("//.+?/[^?]+", "sg"); + let s1 = "https://dylan-conway.com/profile"; + expect(s1.match(r1)[0] === s1.match(r2)[0]).toBe(true); + expect(r1.exec(s1)[0] === r2.exec(s1)[0]).toBe(true); + } +}); + +it("repeated match and exec calls no global flag", () => { + for (let i = 0; i < 20000; i++) { + let r1 = new OnigurumaRegExp("//.+?/[^?]+", "s"); + let r2 = new RegExp("//.+?/[^?]+", "s"); + let s1 = "https://dylan-conway.com/profile"; + expect(r1.exec(s1)[0] === r2.exec(s1)[0]).toBe(true); + expect(s1.match(r1)[0] === s1.match(r2)[0]).toBe(true); + } +}); + +const rb1 = new OnigurumaRegExp("//.+?/[^?]+", "s"); +const rb2 = new RegExp("//.+?/[^?]+", "s"); +it("repeated match calls with global regex without global flag", () => { + for (let i = 0; i < 20000; i++) { + let s1 = "https://dylan-conway.com/profile"; + expect(rb1.exec(s1)[0] === rb2.exec(s1)[0]).toBe(true); + expect(s1.match(rb1)[0] === s1.match(rb2)[0]).toBe(true); + } +}); + it("escaped characters in character classes", () => { + expect( + new RegExp("[a-z]").exec("a")[0] === + new OnigurumaRegExp("[a-z]").exec("a")[0] + ).toBe(true); + + Bun.gc(true); + + expect( + new RegExp("[a-z]").exec("b")[0] === + new OnigurumaRegExp("[a-z]").exec("b")[0] + ).toBe(true); + + Bun.gc(true); + + expect(new RegExp("[a-zA-Z0-9_]+").exec("B9")[0]).toBe( + new OnigurumaRegExp("[a-zA-Z0-9_]+").exec("B9")[0] + ); + Bun.gc(true); + expect(new RegExp("[a-z]").exec("-")).toBe(null); + Bun.gc(true); + expect(new OnigurumaRegExp("[a-z]").exec("-")).toBe(null); + Bun.gc(true); + expect(new RegExp("[a\\-z]").exec("-")[0]).toBe("-"); + Bun.gc(true); + expect(new OnigurumaRegExp("[a\\-z]").exec("-")[0]).toBe("-"); + Bun.gc(true); + expect(new RegExp("[a\\-z]").exec("a")[0]).toBe("a"); + Bun.gc(true); + expect(new OnigurumaRegExp("[a\\-z]").exec("a")[0]).toBe("a"); + Bun.gc(true); + expect(new RegExp("[a\\-z]").exec("z")[0]).toBe("z"); + Bun.gc(true); + expect(new OnigurumaRegExp("[a\\-z]").exec("z")[0]).toBe("z"); + Bun.gc(true); + expect(new RegExp("[a\\-z]").exec("b")).toBe(null); + Bun.gc(true); + expect(new OnigurumaRegExp("[a\\-z]").exec("b")).toBe(null); + Bun.gc(true); + + expect(new RegExp("[^b-c]").exec("a")[0]).toBe( + new OnigurumaRegExp("[^b-c]").exec("a")[0] + ); + Bun.gc(true); + + expect(new RegExp("[\\^b-c]").exec("a")).toBe(null); + expect(new OnigurumaRegExp("[\\^b-c]").exec("a")).toBe(null); + Bun.gc(true); + + expect(new RegExp("[\\^b-c]").exec("^c")[0]).toBe("^"); + expect(new OnigurumaRegExp("[\\^b-c]").exec("^c")[0]).toBe("^"); + Bun.gc(true); + + expect(new RegExp("[a^b-c]").exec("a^")[0]).toBe("a"); + expect(new OnigurumaRegExp("[a^b-c]").exec("a^")[0]).toBe("a"); + Bun.gc(true); + + expect(new RegExp("[\\\\]").exec("\\")[0]).toBe("\\"); + expect(new OnigurumaRegExp("[\\\\]").exec("\\")[0]).toBe("\\"); + Bun.gc(true); + + let p = "//.+?[^?]+"; + let s = "https://dylan-conway.com/profile"; + const b1 = new RegExp(p, "gs"); + const b2 = new OnigurumaRegExp(p, "gs"); + Bun.gc(true); + expect(s.match(b1)[0] === s.match(b2)[0]).toBe(true); + Bun.gc(true); + + const b3 = new RegExp("[\\^]"); + const b4 = new OnigurumaRegExp("[\\^]"); + Bun.gc(true); + expect( + "https://dylan-co^nway.com/profile".match(b3)[0] === + "https://dylan-co^nway.com/profile".match(b4)[0] + ).toBe(true); + Bun.gc(true); + + // prettier-ignore + p = "\/\/.+?\/[^?]+"; + s = "https://dylan-conway.com/profile"; + Bun.gc(true); + expect(s.match(new OnigurumaRegExp(p, "gs"))[0]).toBe( + new RegExp(p, "gs").exec(s)[0] + ); + Bun.gc(true); + + // middle no escape + p = "[.i^e]+"; + s = "https://dylan-co^nway.com/profile"; + expect(new OnigurumaRegExp(p).exec(s)[0]).toBe(new RegExp(p).exec(s)[0]); + + // middle with escape + p = "[.i\\^e]+"; + s = "https://dylan-co^nway.com/profile"; + expect(new OnigurumaRegExp(p).exec(s)[0]).toBe(new RegExp(p).exec(s)[0]); + + // end with escape + p = "[.ie\\^]+"; + s = "https://dylan-co^nway.com/profile"; + expect(new OnigurumaRegExp(p).exec(s)[0]).toBe(new RegExp(p).exec(s)[0]); + + // end no escape + p = "[.ie^]+"; + s = "https://dylan-co^nway.com/profile"; + expect(new OnigurumaRegExp(p).exec(s)[0]).toBe(new RegExp(p).exec(s)[0]); + + // start no escape + p = "[^.ie]+"; + s = "https://dylan-co^nway.com/profile"; + expect(new OnigurumaRegExp(p).exec(s)[0]).toBe(new RegExp(p).exec(s)[0]); + + // start with escape + p = "[\\^.ie]+"; + s = "https://dylan-co^nway.com/profile"; + expect(new OnigurumaRegExp(p).exec(s)[0]).toBe(new RegExp(p).exec(s)[0]); + + // middle with escape + p = "[.i\\-e]+"; + s = "https://dylan-conway.com/profile"; + expect(new OnigurumaRegExp(p).exec(s)[0]).toBe(new RegExp(p).exec(s)[0]); + + // end with escape + p = "[.ie\\-]+"; + s = "https://dylan-conway.com/profile"; + expect(new OnigurumaRegExp(p).exec(s)[0]).toBe(new RegExp(p).exec(s)[0]); + + // end no escape + p = "[.ie-]+"; + s = "https://dylan-conway.com/profile"; + expect(new OnigurumaRegExp(p).exec(s)[0]).toBe(new RegExp(p).exec(s)[0]); + + // start no escape + p = "[-.ie]+"; + s = "https://dylan-conway.com/profile"; + expect(new OnigurumaRegExp(p).exec(s)[0]).toBe(new RegExp(p).exec(s)[0]); + + // start with escape + p = "[\\-.ie]+"; + s = "https://dylan-conway.com/profile"; + expect(new OnigurumaRegExp(p).exec(s)[0]).toBe(new RegExp(p).exec(s)[0]); + let r1 = new RegExp("^([[(]-?[\\d]+)?,?(-?[\\d]+[\\])])?$").exec("(1,1]"); - let r2 = new OnigurumaRegExp("^([[(]-?[\\d]+)?,?(-?[\\d]+[\\])])?$").exec("(1,1]"); + Bun.gc(true); + let r2 = new OnigurumaRegExp("^([[(]-?[\\d]+)?,?(-?[\\d]+[\\])])?$").exec( + "(1,1]" + ); + Bun.gc(true); expect(r1[0]).toBe(r2[0]); + Bun.gc(true); let r3 = new RegExp("[\\d],[\\d]").exec("1,2"); let r4 = new OnigurumaRegExp("[\\d],[\\d]").exec("1,2"); @@ -54,15 +230,15 @@ it("escaped characters in character classes", () => { let r31 = new RegExp("[[\\]][[\\]]").exec("]]"); let r32 = new OnigurumaRegExp("[[\\]][[\\]]").exec("]]"); expect(r31[0]).toBe(r32[0]); - + let r33 = new RegExp("[\\]][\\]]").exec("]]"); let r34 = new OnigurumaRegExp("[\\]][\\]]").exec("]]"); expect(r33[0]).toBe(r34[0]); - + let r35 = new RegExp("[a-z&&[^aeiou]").exec("a"); let r36 = new OnigurumaRegExp("[a-z&&[^aeiou]").exec("a"); expect(r35[0]).toBe(r36[0]); - + let r37 = new RegExp("[a-z&&[^aeiou]]").exec("a]"); let r38 = new OnigurumaRegExp("[a-z&&[^aeiou]]").exec("a]"); expect(r37[0]).toBe(r38[0]); @@ -107,9 +283,8 @@ test("OnigurumaRegExp.prototype.exec() 2", () => { }); test("OnigurumaRegExp.prototype.exec() 3", () => { - let a22 = new OnigurumaRegExp("\\x9\\x5e", "gd"); - let a22_1 = a22.exec("table fox9\^otball, fox9\^osball"); + let a22_1 = a22.exec("table fox9^otball, fox9^osball"); expect(a22_1[0]).toBe("x9^"); let a1 = new OnigurumaRegExp("x3\\x5e", "gd"); @@ -130,12 +305,12 @@ test("OnigurumaRegExp.prototype.exec() 3", () => { test("OnigurumaRegExp.prototype.exec() 4", () => { let a2 = new RegExp("\\x3\\x5e", "gd"); - let a2_1 = a2.exec("table fox3\^otball, fox3\^osball"); - a2_1 = a2.exec("table fox3\^otball, fox3\^osball"); + let a2_1 = a2.exec("table fox3^otball, fox3^osball"); + a2_1 = a2.exec("table fox3^otball, fox3^osball"); for (const RegExpConstructor of [OnigurumaRegExp, RegExp]) { let a2 = new RegExpConstructor("\\x3\\x5e", "gd"); - let a2_1 = a2.exec("table fox3\^otball, fox3\^osball"); + let a2_1 = a2.exec("table fox3^otball, fox3^osball"); expect(a2_1[0]).toBe("x3^"); expect(new RegExpConstructor("\\x3").source).toBe("\\x3"); @@ -144,22 +319,30 @@ test("OnigurumaRegExp.prototype.exec() 4", () => { expect(new RegExpConstructor("j\\x3\\x2").source).toBe("j\\x3\\x2"); expect(new RegExpConstructor("\\x3\\x5\\j").source).toBe("\\x3\\x5\\j"); expect(new RegExpConstructor("\\x3\\x7\\xa").source).toBe("\\x3\\x7\\xa"); - expect(new RegExpConstructor("\\j323\\x7\\xa").source).toBe("\\j323\\x7\\xa"); + expect(new RegExpConstructor("\\j323\\x7\\xa").source).toBe( + "\\j323\\x7\\xa" + ); expect(new RegExpConstructor("\\x56").test("V")).toBe(true); } }); test("OnigurumaRegExp.prototype.test()", () => { - expect(new RegExp("\\\\(?![*+?^${}(|)[\\]])", "g").test('\\')).toBe(true); - expect(new OnigurumaRegExp("\\\\(?![*+?^${}(|)[\\]])", "g").test('\\')).toBe(true); + expect(new RegExp("\\\\(?![*+?^${}(|)[\\]])", "g").test("\\")).toBe(true); + expect(new OnigurumaRegExp("\\\\(?![*+?^${}(|)[\\]])", "g").test("\\")).toBe( + true + ); expect(new RegExp("\\x56").test("V")).toBe(true); expect(new OnigurumaRegExp("\\x56").test("V")).toBe(true); - expect(new RegExp('//').compile('\\\\(?![*+?^${}(|)[\\]])', 'g').test('\\')).toBe(true); - let r = new OnigurumaRegExp('//'); - expect(r.compile('\\\\(?![*+?^${}(|)[\\]])', 'g').test('\\')).toBe(true); - expect(new OnigurumaRegExp('').compile('\\\\(?![*+?^${}(|)[\\]])', 'g').test('\\')).toBe(true); + expect( + new RegExp("//").compile("\\\\(?![*+?^${}(|)[\\]])", "g").test("\\") + ).toBe(true); + let r = new OnigurumaRegExp("//"); + expect(r.compile("\\\\(?![*+?^${}(|)[\\]])", "g").test("\\")).toBe(true); + expect( + new OnigurumaRegExp("").compile("\\\\(?![*+?^${}(|)[\\]])", "g").test("\\") + ).toBe(true); }); test("OnigurumaRegExp flag order", () => { @@ -234,16 +417,15 @@ test("OnigurumaRegExp flags", () => { expect(/a/g.toString()).toBe("/a/g"); expect(/a/y.toString()).toBe("/a/y"); expect(/a/m.toString()).toBe("/a/m"); - expect(/a/sg.toString()).toBe("/a/gs"); - expect(/a/ys.toString()).toBe("/a/sy"); + expect(/a/gs.toString()).toBe("/a/gs"); + expect(/a/sy.toString()).toBe("/a/sy"); expect(/a/gm.toString()).toBe("/a/gm"); - expect(/a/sgy.toString()).toBe("/a/gsy"); - expect(/a/sgm.toString()).toBe("/a/gms"); - expect(/a/ymg.toString()).toBe("/a/gmy"); + expect(/a/gsy.toString()).toBe("/a/gsy"); + expect(/a/gms.toString()).toBe("/a/gms"); + expect(/a/gmy.toString()).toBe("/a/gmy"); // expect(/a/d.toString()).toBe("/a/d"); // expect(/a/dgimsuy.toString()).toBe("/a/dgimsuy"); - // case insensitive option for (const RegExpConstructor of [OnigurumaRegExp, RegExp]) { expect(new RegExpConstructor("Is ThIs SqL?").test("IS THIS SQL?")).toBe( @@ -526,31 +708,35 @@ it("String.prototype.split", () => { it("escapes characters, unicode, and hex", () => { for (const RegExpConstructor of [OnigurumaRegExp, RegExp]) { - expect(new RegExpConstructor("[\\x00-\\x1F]").toString()).toBe("/[\\x00-\\x1F]/"); - expect(new RegExpConstructor("[\\u0000-\\u001F]").toString()).toBe("/[\\u0000-\\u001F]/"); - var s = /\\x{7HHHHHHH}(?=\\u{1233})/ - let a = new RegExpConstructor('\u{0001F46E}'); - expect(a.exec('👮')[0]).toBe('👮'); + expect(new RegExpConstructor("[\\x00-\\x1F]").toString()).toBe( + "/[\\x00-\\x1F]/" + ); + expect(new RegExpConstructor("[\\u0000-\\u001F]").toString()).toBe( + "/[\\u0000-\\u001F]/" + ); + var s = /\\x{7HHHHHHH}(?=\\u{1233})/; + let a = new RegExpConstructor("\u{0001F46E}"); + expect(a.exec("👮")[0]).toBe("👮"); } - let y = new OnigurumaRegExp('[👮\\x7F](?<=👮)'); - expect(y.exec('👮\\x{7F}')[0]).toBe('👮'); + let y = new OnigurumaRegExp("[👮\\x7F](?<=👮)"); + expect(y.exec("👮\\x{7F}")[0]).toBe("👮"); - let by = new OnigurumaRegExp('[👮\\cx7f](?<=👮)'); - expect(y.exec('👮\\x{7F}')[0]).toBe('👮'); + let by = new OnigurumaRegExp("[👮\\cx7f](?<=👮)"); + expect(y.exec("👮\\x{7F}")[0]).toBe("👮"); - let bz = new OnigurumaRegExp('[👮\\x7](?<=👮)'); + let bz = new OnigurumaRegExp("[👮\\x7](?<=👮)"); - let d = new OnigurumaRegExp('[\u{0001F46E}\x7F](?<=\u{0001F46E})'); - expect(d.exec('👮\x7F')[0]).toBe('👮'); + let d = new OnigurumaRegExp("[\u{0001F46E}\x7F](?<=\u{0001F46E})"); + expect(d.exec("👮\x7F")[0]).toBe("👮"); let y_2 = /[[👮\x7F](?<=👮)]/; - expect(y_2.exec('👮\x7F')[0]).toBe('👮'); + expect(y_2.exec("👮]")[0]).toBe("👮]"); let a1 = new OnigurumaRegExp("(f\xf3oo)", "gd"); let a1_1 = a1.exec("table f\xf3ootball, f\xf3oosball"); a1_1 = a1.exec("table f\xf3ootball, f\xf3oosball"); - + let a2 = new RegExp("(f\xf3oo)", "dg"); let a2_1 = a2.exec("table f\xf3ootball, f\xf3oosball"); a2_1 = a2.exec("table f\xf3ootball, f\xf3oosball"); @@ -580,17 +766,27 @@ it("lookbehinds", () => { let small = /(?:)/; expect(small instanceof OnigurumaRegExp).toBe(false); - expect(/[\x00-\x1F\x27\x5C\x7F-\x9F]|[\uD800-\uDBFF]\(?<=[\uDC00-\uDFFF]\)|(?!.*[\uD800-\uDBFF][\uDC00-\uDFFF]).*[\uDC00-\uDFFF]/ instanceof RegExp).toBe(true); - expect(/[\x00-\x1F\x27\x5C\x7F-\x9F]|[\uD800-\uDBFF](?<=[\uDC00-\uDFFF])|(?!.*[\uD800-\uDBFF][\uDC00-\uDFFF]).*[\uDC00-\uDFFF]/ instanceof OnigurumaRegExp).toBe(true); + expect( + /[\x00-\x1F\x27\x5C\x7F-\x9F]|[\uD800-\uDBFF]\(?<=[\uDC00-\uDFFF]\)|(?!.*[\uD800-\uDBFF][\uDC00-\uDFFF]).*[\uDC00-\uDFFF]/ instanceof + RegExp + ).toBe(true); + expect( + /[\x00-\x1F\x27\x5C\x7F-\x9F]|[\uD800-\uDBFF](?<=[\uDC00-\uDFFF])|(?!.*[\uD800-\uDBFF][\uDC00-\uDFFF]).*[\uDC00-\uDFFF]/ instanceof + OnigurumaRegExp + ).toBe(true); expect(/(?<=\1d(o))/ instanceof OnigurumaRegExp).toBe(true); expect(/\(?<=\1d(o)\)/ instanceof OnigurumaRegExp).toBe(false); - expect(/(?!.*[\uD800-\uDBFF][\uDC00-\uDFFF]).*[\uDC00-\uDFFF]/ instanceof RegExp).toBe(true); - expect(/\(?!.*[\uD800-\uDBFF][\uDC00-\uDFFF]\).*[\uDC00-\uDFFF]/ instanceof RegExp).toBe(true); + expect( + /(?!.*[\uD800-\uDBFF][\uDC00-\uDFFF]).*[\uDC00-\uDFFF]/ instanceof RegExp + ).toBe(true); + expect( + /\(?!.*[\uD800-\uDBFF][\uDC00-\uDFFF]\).*[\uDC00-\uDFFF]/ instanceof RegExp + ).toBe(true); - let e = new OnigurumaRegExp('\(?<=\)'); - expect(e.source).toBe('(?<=)'); - expect(new OnigurumaRegExp('(?<=)').source).toBe('(?<=)'); + let e = new OnigurumaRegExp("(?<=)"); + expect(e.source).toBe("(?<=)"); + expect(new OnigurumaRegExp("(?<=)").source).toBe("(?<=)"); expect(/\(?<=\)/.source).toBe("\\(?<=\\)"); expect(/(?<=)/.source).toBe("(?<=)"); |