diff options
author | 2022-11-21 16:52:00 -0800 | |
---|---|---|
committer | 2022-11-21 16:52:00 -0800 | |
commit | 8e36485a76d0ec3406245287c87df7111afee9bc (patch) | |
tree | 9ec948b465ec37edc326e20757f85ae9bc381e3b /src/bun.js | |
parent | 5f2e74bbccd02ae88fda723e3d4415d480f712c3 (diff) | |
download | bun-8e36485a76d0ec3406245287c87df7111afee9bc.tar.gz bun-8e36485a76d0ec3406245287c87df7111afee9bc.tar.zst bun-8e36485a76d0ec3406245287c87df7111afee9bc.zip |
fix oniguruma regex character properties (#1528)
* fix for character properties
* cleanup tests
* cleanup comments
* i - 2 >= 0
Diffstat (limited to 'src/bun.js')
-rw-r--r-- | src/bun.js/bindings/OnigurumaRegExp.cpp | 69 |
1 files changed, 62 insertions, 7 deletions
diff --git a/src/bun.js/bindings/OnigurumaRegExp.cpp b/src/bun.js/bindings/OnigurumaRegExp.cpp index 513b650f8..496a3de67 100644 --- a/src/bun.js/bindings/OnigurumaRegExp.cpp +++ b/src/bun.js/bindings/OnigurumaRegExp.cpp @@ -63,14 +63,17 @@ static WTF::String to16Bit(JSValue jsValue, JSC::JSGlobalObject* globalObject, A return to16Bit(jsString, globalObject); } -static WTF::String extendMultibyteHexCharacters(const WTF::String& string) +static WTF::String convertToOnigurumaSyntax(const WTF::String& string) { WTF::StringBuilder sb; uint32_t length = string.length(); const UChar* characters = string.characters16(); bool inCharacterClass = false; + bool inCharacterProperty = false; for (int i = 0; i < length; i++) { + + // extend multibyte hex characters while (characters[i] == '\\') { if (i + 1 < length && characters[i + 1] == 'x') { if (i + 2 < length && isxdigit(characters[i + 2])) { @@ -95,6 +98,58 @@ static WTF::String extendMultibyteHexCharacters(const WTF::String& string) break; } + // convert character properties + if (UNLIKELY(characters[i] == '{' && i - 2 >= 0 && (characters[i - 1] == 'p' || characters[i - 1] == 'P') && characters[i - 2] == '\\')) { + sb.append(characters[i]); + i += 1; + if (i == length) { + break; + } + + // handle negative + if (characters[i] == '^') { + sb.append(characters[i]); + i += 1; + if (i == length) { + break; + } + } + + // could be \p{propName=propValue} or \p{propValue}. + bool foundEquals = false; + WTF::StringBuilder propName; + while (characters[i] != '}') { + if (characters[i] == '=') { + foundEquals = true; + i += 1; + if (i == length) { + break; + } + continue; + } + + if (foundEquals) { + sb.append(characters[i]); + } else { + propName.append(characters[i]); + } + + i += 1; + if (i == length) { + break; + } + } + + if (!foundEquals) { + sb.append(propName.toString()); + } + } + + if (i >= length) { + break; + } + + // escape brackets in character classes if (inCharacterClass) { // we know ']' will be escaped so there isn't a need to scan for the closing bracket if (characters[i] == '[' || characters[i] == ']') { @@ -518,13 +573,13 @@ JSC_DEFINE_HOST_FUNCTION(onigurumaRegExpProtoFuncCompile, (JSGlobalObject * glob return JSValue::encode({}); } thisRegExp->setPatternString(regExpObject->patternString()); - patternStringExtended = extendMultibyteHexCharacters(thisRegExp->patternString()); + patternStringExtended = convertToOnigurumaSyntax(thisRegExp->patternString()); thisRegExp->setFlagsString(regExpObject->flagsString()); } else { WTF::String newPatternString = to16Bit(arg0, globalObject, "(?:)"_s); RETURN_IF_EXCEPTION(scope, {}); - patternStringExtended = extendMultibyteHexCharacters(newPatternString); + patternStringExtended = convertToOnigurumaSyntax(newPatternString); WTF::String newFlagsString = to16Bit(arg1, globalObject, ""_s); RETURN_IF_EXCEPTION(scope, {}); @@ -543,7 +598,7 @@ JSC_DEFINE_HOST_FUNCTION(onigurumaRegExpProtoFuncCompile, (JSGlobalObject * glob // for pattern syntax checking int errorCode = 0; OnigErrorInfo errorInfo = { 0 }; - regex_t* onigurumaRegExp = createOnigurumaRegExp(globalObject, extendMultibyteHexCharacters(thisRegExp->patternString()), thisRegExp->flagsString(), errorCode, errorInfo); + regex_t* onigurumaRegExp = createOnigurumaRegExp(globalObject, convertToOnigurumaSyntax(thisRegExp->patternString()), thisRegExp->flagsString(), errorCode, errorInfo); if (errorCode != ONIG_NORMAL) { OnigUChar errorBuff[ONIG_MAX_ERROR_MESSAGE_LEN] = { 0 }; int length = onig_error_code_to_str(errorBuff, errorCode, &errorInfo); @@ -588,7 +643,7 @@ JSC_DEFINE_HOST_FUNCTION(onigurumaRegExpProtoFuncTest, (JSGlobalObject * globalO int errorCode = 0; OnigErrorInfo errorInfo = { 0 }; - regex_t* onigurumaRegExp = createOnigurumaRegExp(globalObject, extendMultibyteHexCharacters(thisValue->patternString()), thisValue->flagsString(), errorCode, errorInfo); + regex_t* onigurumaRegExp = createOnigurumaRegExp(globalObject, convertToOnigurumaSyntax(thisValue->patternString()), thisValue->flagsString(), errorCode, errorInfo); if (errorCode != ONIG_NORMAL) { OnigUChar errorBuff[ONIG_MAX_ERROR_MESSAGE_LEN] = { 0 }; int length = onig_error_code_to_str(errorBuff, errorCode, &errorInfo); @@ -674,7 +729,7 @@ JSC_DEFINE_HOST_FUNCTION(onigurumaRegExpProtoFuncExec, (JSGlobalObject * globalO int errorCode = 0; OnigErrorInfo errorInfo = { 0 }; - regex_t* onigurumaRegExp = createOnigurumaRegExp(globalObject, extendMultibyteHexCharacters(thisValue->patternString()), thisValue->flagsString(), errorCode, errorInfo); + regex_t* onigurumaRegExp = createOnigurumaRegExp(globalObject, convertToOnigurumaSyntax(thisValue->patternString()), thisValue->flagsString(), errorCode, errorInfo); if (errorCode != ONIG_NORMAL) { OnigUChar errorBuff[ONIG_MAX_ERROR_MESSAGE_LEN] = { 0 }; int length = onig_error_code_to_str(errorBuff, errorCode, &errorInfo); @@ -859,7 +914,7 @@ static JSC::EncodedJSValue constructOrCall(Zig::GlobalObject* globalObject, JSVa // create for pattern compilation errors, but need to create another for each exec/test int errorCode = 0; OnigErrorInfo errorInfo = { 0 }; - regex_t* onigurumaRegExp = createOnigurumaRegExp(globalObject, extendMultibyteHexCharacters(patternString), flagsString, errorCode, errorInfo); + regex_t* onigurumaRegExp = createOnigurumaRegExp(globalObject, convertToOnigurumaSyntax(patternString), flagsString, errorCode, errorInfo); if (errorCode != ONIG_NORMAL) { OnigUChar errorBuff[ONIG_MAX_ERROR_MESSAGE_LEN] = { 0 }; int length = onig_error_code_to_str(errorBuff, errorCode, &errorInfo); |