Jackie-Jiang commented on code in PR #16276:
URL: https://github.com/apache/pinot/pull/16276#discussion_r2208472521
##########
pinot-common/src/main/java/org/apache/pinot/common/function/scalar/regexp/RegexpLikeConstFunctions.java:
##########
@@ -30,16 +32,45 @@
public class RegexpLikeConstFunctions {
private Matcher _matcher;
+ private String _currentPattern;
+ private String _currentMatchParameter;
@ScalarFunction
public boolean regexpLike(String inputStr, String regexPatternStr) {
- if (_matcher == null) {
- _matcher = PatternFactory.compile(regexPatternStr).matcher("");
+ return regexpLike(inputStr, regexPatternStr, "c"); // Default
case-sensitive
Review Comment:
This is much more commonly used. Let's not add the extra parameter parsing
logic to this method to reduce overhead
##########
pinot-common/src/main/java/org/apache/pinot/common/function/scalar/regexp/RegexpLikeConstFunctions.java:
##########
@@ -30,16 +32,45 @@
public class RegexpLikeConstFunctions {
private Matcher _matcher;
+ private String _currentPattern;
+ private String _currentMatchParameter;
@ScalarFunction
public boolean regexpLike(String inputStr, String regexPatternStr) {
- if (_matcher == null) {
- _matcher = PatternFactory.compile(regexPatternStr).matcher("");
+ return regexpLike(inputStr, regexPatternStr, "c"); // Default
case-sensitive
+ }
+
+ @ScalarFunction
+ public boolean regexpLike(String inputStr, String regexPatternStr, String
matchParameter) {
+ if (_matcher == null || !_currentPattern.equals(regexPatternStr) ||
!Objects.equals(_currentMatchParameter,
+ matchParameter)) {
+ _matcher = buildPattern(regexPatternStr, matchParameter).matcher("");
+ _currentPattern = regexPatternStr;
+ _currentMatchParameter = matchParameter;
}
return _matcher.reset(inputStr).find();
}
+ private Pattern buildPattern(String pattern, String matchParameter) {
+ if (matchParameter != null) {
+ for (char c : matchParameter.toCharArray()) {
+ switch (c) {
+ case 'i':
+ return PatternFactory.compileCaseInsensitive(pattern);
+ case 'c':
+ return PatternFactory.compile(pattern);
+ default:
+ // Invalid character - default to case-sensitive
+ return PatternFactory.compile(pattern);
Review Comment:
This only checks the first parameter. We probably want to throw exception if
it is not `i` or `c` since we don't support other parameters
##########
pinot-common/src/main/java/org/apache/pinot/common/function/scalar/regexp/RegexpLikeConstFunctions.java:
##########
@@ -30,16 +32,45 @@
public class RegexpLikeConstFunctions {
private Matcher _matcher;
+ private String _currentPattern;
+ private String _currentMatchParameter;
@ScalarFunction
public boolean regexpLike(String inputStr, String regexPatternStr) {
- if (_matcher == null) {
- _matcher = PatternFactory.compile(regexPatternStr).matcher("");
+ return regexpLike(inputStr, regexPatternStr, "c"); // Default
case-sensitive
+ }
+
+ @ScalarFunction
+ public boolean regexpLike(String inputStr, String regexPatternStr, String
matchParameter) {
+ if (_matcher == null || !_currentPattern.equals(regexPatternStr) ||
!Objects.equals(_currentMatchParameter,
+ matchParameter)) {
Review Comment:
Do we need this check? All the invocations should have the same arguments
##########
pinot-common/src/main/java/org/apache/pinot/sql/FilterKind.java:
##########
@@ -32,8 +32,7 @@ public enum FilterKind {
RANGE,
IN,
NOT_IN,
- LIKE,
- REGEXP_LIKE,
+ LIKE, REGEXP_LIKE,
Review Comment:
Revert
##########
pinot-common/src/main/java/org/apache/pinot/common/function/scalar/regexp/RegexpLikeConstFunctions.java:
##########
@@ -30,16 +32,45 @@
public class RegexpLikeConstFunctions {
private Matcher _matcher;
+ private String _currentPattern;
+ private String _currentMatchParameter;
@ScalarFunction
public boolean regexpLike(String inputStr, String regexPatternStr) {
- if (_matcher == null) {
- _matcher = PatternFactory.compile(regexPatternStr).matcher("");
+ return regexpLike(inputStr, regexPatternStr, "c"); // Default
case-sensitive
+ }
+
+ @ScalarFunction
+ public boolean regexpLike(String inputStr, String regexPatternStr, String
matchParameter) {
+ if (_matcher == null || !_currentPattern.equals(regexPatternStr) ||
!Objects.equals(_currentMatchParameter,
+ matchParameter)) {
+ _matcher = buildPattern(regexPatternStr, matchParameter).matcher("");
+ _currentPattern = regexPatternStr;
+ _currentMatchParameter = matchParameter;
}
return _matcher.reset(inputStr).find();
}
+ private Pattern buildPattern(String pattern, String matchParameter) {
+ if (matchParameter != null) {
+ for (char c : matchParameter.toCharArray()) {
+ switch (c) {
+ case 'i':
+ return PatternFactory.compileCaseInsensitive(pattern);
+ case 'c':
+ return PatternFactory.compile(pattern);
+ default:
+ // Invalid character - default to case-sensitive
+ return PatternFactory.compile(pattern);
Review Comment:
Check if upper case should be supported
##########
pinot-common/src/main/java/org/apache/pinot/common/request/context/predicate/RegexpLikePredicate.java:
##########
@@ -24,15 +24,21 @@
import org.apache.pinot.common.utils.regex.PatternFactory;
/**
- * Predicate for REGEXP_LIKE.
+ * Predicate for REGEXP_LIKE with optional match parameters (Oracle-style).
*/
public class RegexpLikePredicate extends BasePredicate {
private final String _value;
+ private final String _matchParameter;
private Pattern _pattern = null;
public RegexpLikePredicate(ExpressionContext lhs, String value) {
+ this(lhs, value, "c"); // Default case-sensitive
Review Comment:
Short circuit the more common case
##########
pinot-common/src/main/java/org/apache/pinot/common/request/context/predicate/RegexpLikePredicate.java:
##########
@@ -24,15 +24,21 @@
import org.apache.pinot.common.utils.regex.PatternFactory;
/**
- * Predicate for REGEXP_LIKE.
+ * Predicate for REGEXP_LIKE with optional match parameters (Oracle-style).
*/
public class RegexpLikePredicate extends BasePredicate {
private final String _value;
+ private final String _matchParameter;
private Pattern _pattern = null;
public RegexpLikePredicate(ExpressionContext lhs, String value) {
+ this(lhs, value, "c"); // Default case-sensitive
+ }
+
+ public RegexpLikePredicate(ExpressionContext lhs, String value, String
matchParameter) {
super(lhs);
_value = value;
+ _matchParameter = matchParameter != null ? matchParameter : "c";
Review Comment:
`matchParameter` will never be `null`
##########
pinot-common/src/main/java/org/apache/pinot/common/function/scalar/regexp/RegexpLikeConstFunctions.java:
##########
@@ -30,16 +32,45 @@
public class RegexpLikeConstFunctions {
private Matcher _matcher;
+ private String _currentPattern;
+ private String _currentMatchParameter;
@ScalarFunction
public boolean regexpLike(String inputStr, String regexPatternStr) {
- if (_matcher == null) {
- _matcher = PatternFactory.compile(regexPatternStr).matcher("");
+ return regexpLike(inputStr, regexPatternStr, "c"); // Default
case-sensitive
+ }
+
+ @ScalarFunction
+ public boolean regexpLike(String inputStr, String regexPatternStr, String
matchParameter) {
+ if (_matcher == null || !_currentPattern.equals(regexPatternStr) ||
!Objects.equals(_currentMatchParameter,
+ matchParameter)) {
+ _matcher = buildPattern(regexPatternStr, matchParameter).matcher("");
+ _currentPattern = regexPatternStr;
+ _currentMatchParameter = matchParameter;
}
return _matcher.reset(inputStr).find();
}
+ private Pattern buildPattern(String pattern, String matchParameter) {
+ if (matchParameter != null) {
Review Comment:
`matchParameter` will never be `null`
##########
pinot-common/src/main/java/org/apache/pinot/common/function/scalar/regexp/RegexpLikeVarFunctions.java:
##########
@@ -33,10 +34,34 @@ private RegexpLikeVarFunctions() {
@ScalarFunction
public static boolean regexpLikeVar(String inputStr, String regexPatternStr)
{
- Pattern pattern = Pattern.compile(regexPatternStr, Pattern.UNICODE_CASE |
Pattern.CASE_INSENSITIVE);
+ return regexpLikeVar(inputStr, regexPatternStr, "c"); // Default
case-sensitive
Review Comment:
Same comments for this class
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]