aboutsummaryrefslogtreecommitdiff
path: root/src/bun.js
diff options
context:
space:
mode:
authorGravatar Dylan Conway <35280289+dylan-conway@users.noreply.github.com> 2022-11-21 16:52:00 -0800
committerGravatar GitHub <noreply@github.com> 2022-11-21 16:52:00 -0800
commit8e36485a76d0ec3406245287c87df7111afee9bc (patch)
tree9ec948b465ec37edc326e20757f85ae9bc381e3b /src/bun.js
parent5f2e74bbccd02ae88fda723e3d4415d480f712c3 (diff)
downloadbun-8e36485a76d0ec3406245287c87df7111afee9bc.tar.gz
bun-8e36485a76d0ec3406245287c87df7111afee9bc.tar.zst
bun-8e36485a76d0ec3406245287c87df7111afee9bc.zip
fix oniguruma regex character properties (#1528)
* fix for character properties * cleanup tests * cleanup comments * i - 2 >= 0
Diffstat (limited to 'src/bun.js')
-rw-r--r--src/bun.js/bindings/OnigurumaRegExp.cpp69
1 files changed, 62 insertions, 7 deletions
diff --git a/src/bun.js/bindings/OnigurumaRegExp.cpp b/src/bun.js/bindings/OnigurumaRegExp.cpp
index 513b650f8..496a3de67 100644
--- a/src/bun.js/bindings/OnigurumaRegExp.cpp
+++ b/src/bun.js/bindings/OnigurumaRegExp.cpp
@@ -63,14 +63,17 @@ static WTF::String to16Bit(JSValue jsValue, JSC::JSGlobalObject* globalObject, A
return to16Bit(jsString, globalObject);
}
-static WTF::String extendMultibyteHexCharacters(const WTF::String& string)
+static WTF::String convertToOnigurumaSyntax(const WTF::String& string)
{
WTF::StringBuilder sb;
uint32_t length = string.length();
const UChar* characters = string.characters16();
bool inCharacterClass = false;
+ bool inCharacterProperty = false;
for (int i = 0; i < length; i++) {
+
+ // extend multibyte hex characters
while (characters[i] == '\\') {
if (i + 1 < length && characters[i + 1] == 'x') {
if (i + 2 < length && isxdigit(characters[i + 2])) {
@@ -95,6 +98,58 @@ static WTF::String extendMultibyteHexCharacters(const WTF::String& string)
break;
}
+ // convert character properties
+ if (UNLIKELY(characters[i] == '{' && i - 2 >= 0 && (characters[i - 1] == 'p' || characters[i - 1] == 'P') && characters[i - 2] == '\\')) {
+ sb.append(characters[i]);
+ i += 1;
+ if (i == length) {
+ break;
+ }
+
+ // handle negative
+ if (characters[i] == '^') {
+ sb.append(characters[i]);
+ i += 1;
+ if (i == length) {
+ break;
+ }
+ }
+
+ // could be \p{propName=propValue} or \p{propValue}.
+ bool foundEquals = false;
+ WTF::StringBuilder propName;
+ while (characters[i] != '}') {
+ if (characters[i] == '=') {
+ foundEquals = true;
+ i += 1;
+ if (i == length) {
+ break;
+ }
+ continue;
+ }
+
+ if (foundEquals) {
+ sb.append(characters[i]);
+ } else {
+ propName.append(characters[i]);
+ }
+
+ i += 1;
+ if (i == length) {
+ break;
+ }
+ }
+
+ if (!foundEquals) {
+ sb.append(propName.toString());
+ }
+ }
+
+ if (i >= length) {
+ break;
+ }
+
+ // escape brackets in character classes
if (inCharacterClass) {
// we know ']' will be escaped so there isn't a need to scan for the closing bracket
if (characters[i] == '[' || characters[i] == ']') {
@@ -518,13 +573,13 @@ JSC_DEFINE_HOST_FUNCTION(onigurumaRegExpProtoFuncCompile, (JSGlobalObject * glob
return JSValue::encode({});
}
thisRegExp->setPatternString(regExpObject->patternString());
- patternStringExtended = extendMultibyteHexCharacters(thisRegExp->patternString());
+ patternStringExtended = convertToOnigurumaSyntax(thisRegExp->patternString());
thisRegExp->setFlagsString(regExpObject->flagsString());
} else {
WTF::String newPatternString = to16Bit(arg0, globalObject, "(?:)"_s);
RETURN_IF_EXCEPTION(scope, {});
- patternStringExtended = extendMultibyteHexCharacters(newPatternString);
+ patternStringExtended = convertToOnigurumaSyntax(newPatternString);
WTF::String newFlagsString = to16Bit(arg1, globalObject, ""_s);
RETURN_IF_EXCEPTION(scope, {});
@@ -543,7 +598,7 @@ JSC_DEFINE_HOST_FUNCTION(onigurumaRegExpProtoFuncCompile, (JSGlobalObject * glob
// for pattern syntax checking
int errorCode = 0;
OnigErrorInfo errorInfo = { 0 };
- regex_t* onigurumaRegExp = createOnigurumaRegExp(globalObject, extendMultibyteHexCharacters(thisRegExp->patternString()), thisRegExp->flagsString(), errorCode, errorInfo);
+ regex_t* onigurumaRegExp = createOnigurumaRegExp(globalObject, convertToOnigurumaSyntax(thisRegExp->patternString()), thisRegExp->flagsString(), errorCode, errorInfo);
if (errorCode != ONIG_NORMAL) {
OnigUChar errorBuff[ONIG_MAX_ERROR_MESSAGE_LEN] = { 0 };
int length = onig_error_code_to_str(errorBuff, errorCode, &errorInfo);
@@ -588,7 +643,7 @@ JSC_DEFINE_HOST_FUNCTION(onigurumaRegExpProtoFuncTest, (JSGlobalObject * globalO
int errorCode = 0;
OnigErrorInfo errorInfo = { 0 };
- regex_t* onigurumaRegExp = createOnigurumaRegExp(globalObject, extendMultibyteHexCharacters(thisValue->patternString()), thisValue->flagsString(), errorCode, errorInfo);
+ regex_t* onigurumaRegExp = createOnigurumaRegExp(globalObject, convertToOnigurumaSyntax(thisValue->patternString()), thisValue->flagsString(), errorCode, errorInfo);
if (errorCode != ONIG_NORMAL) {
OnigUChar errorBuff[ONIG_MAX_ERROR_MESSAGE_LEN] = { 0 };
int length = onig_error_code_to_str(errorBuff, errorCode, &errorInfo);
@@ -674,7 +729,7 @@ JSC_DEFINE_HOST_FUNCTION(onigurumaRegExpProtoFuncExec, (JSGlobalObject * globalO
int errorCode = 0;
OnigErrorInfo errorInfo = { 0 };
- regex_t* onigurumaRegExp = createOnigurumaRegExp(globalObject, extendMultibyteHexCharacters(thisValue->patternString()), thisValue->flagsString(), errorCode, errorInfo);
+ regex_t* onigurumaRegExp = createOnigurumaRegExp(globalObject, convertToOnigurumaSyntax(thisValue->patternString()), thisValue->flagsString(), errorCode, errorInfo);
if (errorCode != ONIG_NORMAL) {
OnigUChar errorBuff[ONIG_MAX_ERROR_MESSAGE_LEN] = { 0 };
int length = onig_error_code_to_str(errorBuff, errorCode, &errorInfo);
@@ -859,7 +914,7 @@ static JSC::EncodedJSValue constructOrCall(Zig::GlobalObject* globalObject, JSVa
// create for pattern compilation errors, but need to create another for each exec/test
int errorCode = 0;
OnigErrorInfo errorInfo = { 0 };
- regex_t* onigurumaRegExp = createOnigurumaRegExp(globalObject, extendMultibyteHexCharacters(patternString), flagsString, errorCode, errorInfo);
+ regex_t* onigurumaRegExp = createOnigurumaRegExp(globalObject, convertToOnigurumaSyntax(patternString), flagsString, errorCode, errorInfo);
if (errorCode != ONIG_NORMAL) {
OnigUChar errorBuff[ONIG_MAX_ERROR_MESSAGE_LEN] = { 0 };
int length = onig_error_code_to_str(errorBuff, errorCode, &errorInfo);