aboutsummaryrefslogtreecommitdiff
path: root/src/bun.js/bindings/OnigurumaRegExp.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/bun.js/bindings/OnigurumaRegExp.cpp')
-rw-r--r--src/bun.js/bindings/OnigurumaRegExp.cpp213
1 files changed, 108 insertions, 105 deletions
diff --git a/src/bun.js/bindings/OnigurumaRegExp.cpp b/src/bun.js/bindings/OnigurumaRegExp.cpp
index 3b6066e0f..513b650f8 100644
--- a/src/bun.js/bindings/OnigurumaRegExp.cpp
+++ b/src/bun.js/bindings/OnigurumaRegExp.cpp
@@ -97,7 +97,7 @@ static WTF::String extendMultibyteHexCharacters(const WTF::String& string)
if (inCharacterClass) {
// we know ']' will be escaped so there isn't a need to scan for the closing bracket
- if (characters[i] == '[' || characters[i] == ']' || characters[i] == '^' || characters[i] == '-' || characters[i] == ')' || characters[i] == '(') {
+ if (characters[i] == '[' || characters[i] == ']') {
if (characters[i - 1] != '\\') {
// character class intersections not supported, assume end of character class
if (characters[i] == ']') {
@@ -247,6 +247,49 @@ bool validateRegExpFlags(WTF::StringView flags)
return true;
}
+std::once_flag onigurumaEncodingInitFlag;
+
+static regex_t* createOnigurumaRegExp(JSGlobalObject* globalObject, const WTF::String& patternString, const WTF::String& flagsString, int& errorCode, OnigErrorInfo& errorInfo)
+{
+ auto& vm = globalObject->vm();
+ auto throwScope = DECLARE_THROW_SCOPE(vm);
+
+ OnigEncoding encodings[] = {
+ ONIG_ENCODING_UTF16_LE,
+ };
+ std::call_once(onigurumaEncodingInitFlag, [&encodings]() {
+ onig_initialize(encodings, 1);
+ });
+
+ OnigOptionType options = 0;
+ if (flagsString.contains('i')) {
+ options |= ONIG_OPTION_IGNORECASE;
+ }
+ if (flagsString.contains('m')) {
+ options |= ONIG_OPTION_MULTILINE;
+ } else {
+ options |= ONIG_OPTION_SINGLELINE;
+ }
+ if (flagsString.contains('s')) {
+ options |= ONIG_OPTION_MULTILINE;
+ }
+
+ OnigSyntaxType* syntax = ONIG_SYNTAX_ONIGURUMA;
+ OnigEncodingType* encoding = encodings[0];
+ regex_t* onigRegExp = NULL;
+
+ errorCode = onig_new(
+ &onigRegExp,
+ reinterpret_cast<const OnigUChar*>(patternString.characters16()),
+ reinterpret_cast<const OnigUChar*>(patternString.characters16() + patternString.length()),
+ options,
+ encoding,
+ syntax,
+ &errorInfo);
+
+ return onigRegExp;
+}
+
class OnigurumaRegExpPrototype final : public JSC::JSNonFinalObject {
public:
using Base = JSC::JSNonFinalObject;
@@ -289,13 +332,12 @@ public:
return ptr;
}
- static OnigurumaRegEx* create(JSC::JSGlobalObject* globalObject, WTF::String&& pattern, WTF::String&& flags, regex_t* regExpCode)
+ static OnigurumaRegEx* create(JSC::JSGlobalObject* globalObject, WTF::String&& pattern, WTF::String&& flags)
{
auto* structure = reinterpret_cast<Zig::GlobalObject*>(globalObject)->OnigurumaRegExpStructure();
auto* object = create(globalObject->vm(), globalObject, structure);
object->m_flagsString = WTFMove(flags);
object->m_patternString = WTFMove(pattern);
- object->m_onigurumaRegExp = regExpCode;
return object;
}
@@ -314,18 +356,6 @@ public:
[](auto& spaces, auto&& space) { spaces.m_subspaceForOnigurumaRegExp = WTFMove(space); });
}
- static void destroy(JSC::JSCell* cell)
- {
- static_cast<OnigurumaRegEx*>(cell)->OnigurumaRegEx::~OnigurumaRegEx();
- }
-
- ~OnigurumaRegEx()
- {
- if (m_onigurumaRegExp) {
- onig_free(m_onigurumaRegExp);
- }
- }
-
static JSC::Structure* createStructure(JSC::VM& vm, JSC::JSGlobalObject* globalObject, JSC::JSValue prototype)
{
return JSC::Structure::create(vm, globalObject, prototype, JSC::TypeInfo(RegExpObjectType, StructureFlags), info());
@@ -338,7 +368,6 @@ public:
const WTF::String& patternString() const { return m_patternString; }
void setPatternString(const WTF::String& patternString) { m_patternString = patternString; }
- regex_t* m_onigurumaRegExp = NULL;
int32_t m_lastIndex = 0;
private:
@@ -511,46 +540,10 @@ JSC_DEFINE_HOST_FUNCTION(onigurumaRegExpProtoFuncCompile, (JSGlobalObject * glob
thisRegExp->setFlagsString(newFlagsString);
}
- OnigEncoding encodings[] = {
- ONIG_ENCODING_UTF16_LE,
- };
- onig_initialize(encodings, 1);
-
- OnigOptionType options = 0;
- if (thisRegExp->flagsString().contains('i')) {
- options |= ONIG_OPTION_IGNORECASE;
- }
- if (thisRegExp->flagsString().contains('m')) {
- options |= ONIG_OPTION_MULTILINE;
- } else {
- options |= ONIG_OPTION_SINGLELINE;
- }
- if (thisRegExp->flagsString().contains('s')) {
- options |= ONIG_OPTION_MULTILINE;
- }
-
- OnigSyntaxType* syntax = ONIG_SYNTAX_DEFAULT;
- onig_set_syntax_op(syntax, onig_get_syntax_op(syntax) | ONIG_SYN_OP_ESC_X_HEX2);
- onig_set_syntax_op(syntax, onig_get_syntax_op(syntax) | ONIG_SYN_OP_ESC_X_BRACE_HEX8);
- onig_set_syntax_op2(syntax, onig_get_syntax_op2(syntax) | ONIG_SYN_OP2_ESC_U_HEX4);
- onig_set_syntax_behavior(syntax, onig_get_syntax_behavior(syntax) | ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC);
- onig_set_syntax_behavior(syntax, onig_get_syntax_behavior(syntax) | ONIG_SYN_ALLOW_INVALID_CODE_END_OF_RANGE_IN_CC);
- onig_set_syntax_behavior(syntax, onig_get_syntax_behavior(syntax) & ~ONIG_SYN_BACKSLASH_ESCAPE_IN_CC);
-
- OnigEncodingType* encoding = ONIG_ENCODING_UTF16_LE;
- OnigErrorInfo errorInfo = { 0 };
- regex_t* onigRegExp = NULL;
+ // for pattern syntax checking
int errorCode = 0;
-
- errorCode = onig_new(
- &onigRegExp,
- reinterpret_cast<const OnigUChar*>(patternStringExtended.characters16()),
- reinterpret_cast<const OnigUChar*>(patternStringExtended.characters16() + patternStringExtended.length()),
- options,
- encoding,
- syntax,
- &errorInfo);
-
+ OnigErrorInfo errorInfo = { 0 };
+ regex_t* onigurumaRegExp = createOnigurumaRegExp(globalObject, extendMultibyteHexCharacters(thisRegExp->patternString()), thisRegExp->flagsString(), errorCode, errorInfo);
if (errorCode != ONIG_NORMAL) {
OnigUChar errorBuff[ONIG_MAX_ERROR_MESSAGE_LEN] = { 0 };
int length = onig_error_code_to_str(errorBuff, errorCode, &errorInfo);
@@ -561,15 +554,14 @@ JSC_DEFINE_HOST_FUNCTION(onigurumaRegExpProtoFuncCompile, (JSGlobalObject * glob
} else {
errorMessage.appendCharacters(errorBuff, length);
}
+ if (onigurumaRegExp != nullptr) {
+ onig_free(onigurumaRegExp);
+ }
throwScope.throwException(globalObject, createSyntaxError(globalObject, errorMessage.toString()));
return JSValue::encode({});
}
+ onig_free(onigurumaRegExp);
- if (thisRegExp->m_onigurumaRegExp) {
- onig_free(thisRegExp->m_onigurumaRegExp);
- }
-
- thisRegExp->m_onigurumaRegExp = onigRegExp;
thisRegExp->m_lastIndex = 0;
return JSValue::encode(thisRegExp);
@@ -579,6 +571,7 @@ JSC_DEFINE_HOST_FUNCTION(onigurumaRegExpProtoFuncTest, (JSGlobalObject * globalO
{
auto& vm = globalObject->vm();
auto scope = DECLARE_THROW_SCOPE(vm);
+ auto throwScope = DECLARE_THROW_SCOPE(vm);
auto* thisValue = jsDynamicCast<OnigurumaRegEx*>(callFrame->thisValue());
if (!thisValue)
@@ -593,6 +586,26 @@ JSC_DEFINE_HOST_FUNCTION(onigurumaRegExpProtoFuncTest, (JSGlobalObject * globalO
WTF::String string = to16Bit(arg, globalObject, ""_s);
RETURN_IF_EXCEPTION(scope, JSValue::encode({}));
+ int errorCode = 0;
+ OnigErrorInfo errorInfo = { 0 };
+ regex_t* onigurumaRegExp = createOnigurumaRegExp(globalObject, extendMultibyteHexCharacters(thisValue->patternString()), thisValue->flagsString(), errorCode, errorInfo);
+ if (errorCode != ONIG_NORMAL) {
+ OnigUChar errorBuff[ONIG_MAX_ERROR_MESSAGE_LEN] = { 0 };
+ int length = onig_error_code_to_str(errorBuff, errorCode, &errorInfo);
+ WTF::StringBuilder errorMessage;
+ errorMessage.append("Invalid regular expression: "_s);
+ if (length < 0) {
+ errorMessage.append("An unknown error occurred."_s);
+ } else {
+ errorMessage.appendCharacters(errorBuff, length);
+ }
+ if (onigurumaRegExp != nullptr) {
+ onig_free(onigurumaRegExp);
+ }
+ throwScope.throwException(globalObject, createSyntaxError(globalObject, errorMessage.toString()));
+ return JSValue::encode({});
+ }
+
OnigRegion* region = onig_region_new();
const OnigUChar* end = reinterpret_cast<const OnigUChar*>(string.characters16() + string.length());
@@ -601,12 +614,13 @@ JSC_DEFINE_HOST_FUNCTION(onigurumaRegExpProtoFuncTest, (JSGlobalObject * globalO
if (thisValue->m_lastIndex >= string.length()) {
onig_region_free(region, 1);
+ onig_free(onigurumaRegExp);
thisValue->m_lastIndex = 0;
return JSValue::encode(jsBoolean(false));
}
int result = onig_search(
- thisValue->m_onigurumaRegExp,
+ onigurumaRegExp,
reinterpret_cast<const OnigUChar*>(string.characters16()),
end,
start,
@@ -617,11 +631,13 @@ JSC_DEFINE_HOST_FUNCTION(onigurumaRegExpProtoFuncTest, (JSGlobalObject * globalO
if (result < 0) {
thisValue->m_lastIndex = 0;
onig_region_free(region, 1);
+ onig_free(onigurumaRegExp);
return JSValue::encode(jsBoolean(false));
}
if (thisValue->flagsString().contains('y') && region->beg[0] != thisValue->m_lastIndex) {
onig_region_free(region, 1);
+ onig_free(onigurumaRegExp);
return JSValue::encode(jsBoolean(false));
}
@@ -632,6 +648,7 @@ JSC_DEFINE_HOST_FUNCTION(onigurumaRegExpProtoFuncTest, (JSGlobalObject * globalO
}
onig_region_free(region, 1);
+ onig_free(onigurumaRegExp);
return JSValue::encode(jsBoolean(true));
}
@@ -655,6 +672,26 @@ JSC_DEFINE_HOST_FUNCTION(onigurumaRegExpProtoFuncExec, (JSGlobalObject * globalO
WTF::String string = to16Bit(arg, globalObject, ""_s);
RETURN_IF_EXCEPTION(scope, JSValue::encode({}));
+ int errorCode = 0;
+ OnigErrorInfo errorInfo = { 0 };
+ regex_t* onigurumaRegExp = createOnigurumaRegExp(globalObject, extendMultibyteHexCharacters(thisValue->patternString()), thisValue->flagsString(), errorCode, errorInfo);
+ if (errorCode != ONIG_NORMAL) {
+ OnigUChar errorBuff[ONIG_MAX_ERROR_MESSAGE_LEN] = { 0 };
+ int length = onig_error_code_to_str(errorBuff, errorCode, &errorInfo);
+ WTF::StringBuilder errorMessage;
+ errorMessage.append("Invalid regular expression: "_s);
+ if (length < 0) {
+ errorMessage.append("An unknown error occurred."_s);
+ } else {
+ errorMessage.appendCharacters(errorBuff, length);
+ }
+ if (onigurumaRegExp != nullptr) {
+ onig_free(onigurumaRegExp);
+ }
+ throwScope.throwException(globalObject, createSyntaxError(globalObject, errorMessage.toString()));
+ return JSValue::encode({});
+ }
+
OnigRegion* region = onig_region_new();
const OnigUChar* end = reinterpret_cast<const OnigUChar*>(string.characters16() + string.length());
@@ -662,7 +699,7 @@ JSC_DEFINE_HOST_FUNCTION(onigurumaRegExpProtoFuncExec, (JSGlobalObject * globalO
const OnigUChar* range = end;
int result = onig_search(
- thisValue->m_onigurumaRegExp,
+ onigurumaRegExp,
reinterpret_cast<const OnigUChar*>(string.characters16()),
end,
start,
@@ -672,6 +709,7 @@ JSC_DEFINE_HOST_FUNCTION(onigurumaRegExpProtoFuncExec, (JSGlobalObject * globalO
if (result < 0) {
onig_region_free(region, 1);
+ onig_free(onigurumaRegExp);
thisValue->m_lastIndex = 0;
return JSValue::encode(jsNull());
}
@@ -694,6 +732,7 @@ JSC_DEFINE_HOST_FUNCTION(onigurumaRegExpProtoFuncExec, (JSGlobalObject * globalO
if (UNLIKELY(!ptr)) {
throwOutOfMemoryError(globalObject, scope);
onig_region_free(region, 1);
+ onig_free(onigurumaRegExp);
return JSValue::encode(jsNull());
}
@@ -720,6 +759,7 @@ JSC_DEFINE_HOST_FUNCTION(onigurumaRegExpProtoFuncExec, (JSGlobalObject * globalO
}
onig_region_free(region, 1);
+ onig_free(onigurumaRegExp);
return JSValue::encode(array);
}
@@ -759,6 +799,7 @@ void OnigurumaRegExpPrototype::finishCreation(VM& vm, JSGlobalObject* globalObje
this->putDirectCustomAccessor(vm, vm.propertyNames->source, JSC::CustomGetterSetter::create(vm, onigurumaRegExpProtoGetterSource, nullptr), 0 | PropertyAttribute::CustomAccessor | PropertyAttribute::ReadOnly);
this->putDirectCustomAccessor(vm, vm.propertyNames->flags, JSC::CustomGetterSetter::create(vm, onigurumaRegExpProtoGetterFlags, nullptr), 0 | PropertyAttribute::CustomAccessor | PropertyAttribute::ReadOnly);
this->putDirectCustomAccessor(vm, vm.propertyNames->lastIndex, JSC::CustomGetterSetter::create(vm, onigurumaRegExpProtoGetterLastIndex, onigurumaRegExpProtoSetterLastIndex), 0 | PropertyAttribute::CustomAccessor);
+
this->putDirectNativeFunction(vm, globalObject, PropertyName(vm.propertyNames->test), 1, onigurumaRegExpProtoFuncTest, ImplementationVisibility::Public, NoIntrinsic, static_cast<unsigned>(0));
this->putDirectBuiltinFunction(vm, globalObject, vm.propertyNames->matchSymbol, onigurumaRegExpPrototypeMatchCodeGenerator(vm), static_cast<unsigned>(0));
@@ -805,8 +846,6 @@ static JSC::EncodedJSValue constructOrCall(Zig::GlobalObject* globalObject, JSVa
WTF::String patternString = to16Bit(arg0, globalObject, "(?:)"_s);
RETURN_IF_EXCEPTION(scope, {});
- WTF::String patternStringExtended = extendMultibyteHexCharacters(patternString);
-
WTF::String flagsString = to16Bit(arg1, globalObject, ""_s);
RETURN_IF_EXCEPTION(scope, {});
@@ -817,45 +856,10 @@ static JSC::EncodedJSValue constructOrCall(Zig::GlobalObject* globalObject, JSVa
flagsString = sortRegExpFlags(flagsString);
- OnigEncoding encodings[] = {
- ONIG_ENCODING_UTF16_LE,
- };
- onig_initialize(encodings, 1);
-
- OnigOptionType options = 0;
- if (flagsString.contains('i')) {
- options |= ONIG_OPTION_IGNORECASE;
- }
- if (flagsString.contains('m')) {
- options |= ONIG_OPTION_MULTILINE;
- } else {
- options |= ONIG_OPTION_SINGLELINE;
- }
- if (flagsString.contains('s')) {
- options |= ONIG_OPTION_MULTILINE;
- }
-
- OnigSyntaxType* syntax = ONIG_SYNTAX_ONIGURUMA;
- onig_set_syntax_op(syntax, onig_get_syntax_op(syntax) | ONIG_SYN_OP_ESC_X_HEX2);
- onig_set_syntax_op(syntax, onig_get_syntax_op(syntax) | ONIG_SYN_OP_ESC_X_BRACE_HEX8);
- onig_set_syntax_op2(syntax, onig_get_syntax_op2(syntax) | ONIG_SYN_OP2_ESC_U_HEX4);
- onig_set_syntax_behavior(syntax, onig_get_syntax_behavior(syntax) | ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC);
- onig_set_syntax_behavior(syntax, onig_get_syntax_behavior(syntax) | ONIG_SYN_ALLOW_INVALID_CODE_END_OF_RANGE_IN_CC);
-
- OnigEncodingType* encoding = encodings[0];
- OnigErrorInfo errorInfo = { 0 };
- regex_t* onigRegExp = NULL;
+ // create for pattern compilation errors, but need to create another for each exec/test
int errorCode = 0;
-
- errorCode = onig_new(
- &onigRegExp,
- reinterpret_cast<const OnigUChar*>(patternStringExtended.characters16()),
- reinterpret_cast<const OnigUChar*>(patternStringExtended.characters16() + patternStringExtended.length()),
- options,
- encoding,
- syntax,
- &errorInfo);
-
+ OnigErrorInfo errorInfo = { 0 };
+ regex_t* onigurumaRegExp = createOnigurumaRegExp(globalObject, extendMultibyteHexCharacters(patternString), flagsString, errorCode, errorInfo);
if (errorCode != ONIG_NORMAL) {
OnigUChar errorBuff[ONIG_MAX_ERROR_MESSAGE_LEN] = { 0 };
int length = onig_error_code_to_str(errorBuff, errorCode, &errorInfo);
@@ -869,10 +873,9 @@ static JSC::EncodedJSValue constructOrCall(Zig::GlobalObject* globalObject, JSVa
throwScope.throwException(globalObject, createSyntaxError(globalObject, errorMessage.toString()));
return JSValue::encode({});
}
+ onig_free(onigurumaRegExp);
- RETURN_IF_EXCEPTION(scope, {});
-
- OnigurumaRegEx* result = OnigurumaRegEx::create(globalObject, WTFMove(patternString), WTFMove(flagsString), onigRegExp);
+ OnigurumaRegEx* result = OnigurumaRegEx::create(globalObject, WTFMove(patternString), WTFMove(flagsString));
return JSValue::encode(result);
}