This is an automated email from the ASF dual-hosted git repository.
virajjasani pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/phoenix.git
The following commit(s) were added to refs/heads/master by this push:
new 5d14dec569 PHOENIX-7276 Add REGEXP_LIKE built-in function (#2398)
5d14dec569 is described below
commit 5d14dec56959018b7c63bb7a0b9848392ed53e60
Author: Xavier Fernandis <[email protected]>
AuthorDate: Thu Apr 23 10:34:20 2026 +0530
PHOENIX-7276 Add REGEXP_LIKE built-in function (#2398)
---
docs/phoenix.csv | 16 +
.../apache/phoenix/expression/ExpressionType.java | 5 +-
.../function/ByteBasedRegexpLikeFunction.java | 59 +++
.../expression/function/RegexpLikeFunction.java | 244 +++++++++++
.../function/StringBasedRegexpLikeFunction.java | 47 +++
.../apache/phoenix/parse/RegexpLikeParseNode.java | 53 +++
.../phoenix/end2end/RegexpLikeFunctionIT.java | 452 +++++++++++++++++++++
.../function/RegexpLikeFunctionTest.java | 241 +++++++++++
8 files changed, 1116 insertions(+), 1 deletion(-)
diff --git a/docs/phoenix.csv b/docs/phoenix.csv
index 6ccf09882b..1679892a0e 100644
--- a/docs/phoenix.csv
+++ b/docs/phoenix.csv
@@ -1840,6 +1840,22 @@ REGEXP_SPLIT('ONE,TWO,THREE', ',') evaluates to
ARRAY['ONE', 'TWO', 'THREE']
REGEXP_SPLIT('ONE!#TWO#,!THREE', '[,!#]+') evaluates to ARRAY['ONE', 'TWO',
'THREE']
"
+"Functions (String)","REGEXP_LIKE","
+REGEXP_LIKE( stringTerm, patternString [, matchParameterString ] )
+","
+Tests whether a string matches a regular expression pattern and returns true
or false.
+Uses full-match semantics: the entire string must match the pattern.
+The optional matchParameterString controls matching behavior:
+'i' for case-insensitive, 'c' for case-sensitive (default),
+'m' for multiline mode, 's' for dotall mode (dot matches newline).
+If both 'i' and 'c' are specified, the last one wins.
+","
+REGEXP_LIKE('Hello World', 'Hello.*') evaluates to true
+REGEXP_LIKE('Hello World', 'Hello') evaluates to false (full match required)
+REGEXP_LIKE('Hello World', 'hello.*', 'i') evaluates to true (case-insensitive)
+REGEXP_LIKE('abc123', '.*\\d+.*') evaluates to true
+"
+
"Functions (General)","MD5","
MD5( term )
","
diff --git
a/phoenix-core-client/src/main/java/org/apache/phoenix/expression/ExpressionType.java
b/phoenix-core-client/src/main/java/org/apache/phoenix/expression/ExpressionType.java
index 2491c71a2d..6b280cf9de 100644
---
a/phoenix-core-client/src/main/java/org/apache/phoenix/expression/ExpressionType.java
+++
b/phoenix-core-client/src/main/java/org/apache/phoenix/expression/ExpressionType.java
@@ -208,7 +208,10 @@ public enum ExpressionType {
ScanEndKeyFunction(ScanEndKeyFunction.class),
TotalSegmentsFunction(TotalSegmentsFunction.class),
RowSizeFunction(RowSizeFunction.class),
- RawRowSizeFunction(RawRowSizeFunction.class);
+ RawRowSizeFunction(RawRowSizeFunction.class),
+ RegexpLikeFunction(RegexpLikeFunction.class),
+ ByteBasedRegexpLikeFunction(ByteBasedRegexpLikeFunction.class),
+ StringBasedRegexpLikeFunction(StringBasedRegexpLikeFunction.class);
ExpressionType(Class<? extends Expression> clazz) {
this.clazz = clazz;
diff --git
a/phoenix-core-client/src/main/java/org/apache/phoenix/expression/function/ByteBasedRegexpLikeFunction.java
b/phoenix-core-client/src/main/java/org/apache/phoenix/expression/function/ByteBasedRegexpLikeFunction.java
new file mode 100644
index 0000000000..50fcb95982
--- /dev/null
+++
b/phoenix-core-client/src/main/java/org/apache/phoenix/expression/function/ByteBasedRegexpLikeFunction.java
@@ -0,0 +1,59 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.expression.function;
+
+import java.util.List;
+import java.util.regex.Pattern;
+import org.apache.phoenix.expression.Expression;
+import org.apache.phoenix.expression.util.regex.AbstractBasePattern;
+import org.apache.phoenix.expression.util.regex.JONIPattern;
+import org.apache.phoenix.parse.FunctionParseNode.Argument;
+import org.apache.phoenix.parse.FunctionParseNode.BuiltInFunction;
+import org.apache.phoenix.parse.FunctionParseNode.FunctionClassType;
+import org.apache.phoenix.schema.types.PVarchar;
+import org.joni.Option;
+
+@BuiltInFunction(name = RegexpLikeFunction.NAME,
+ args = { @Argument(allowedTypes = { PVarchar.class }),
+ @Argument(allowedTypes = { PVarchar.class }),
+ @Argument(allowedTypes = { PVarchar.class }, defaultValue = "null") },
+ classType = FunctionClassType.DERIVED)
+public class ByteBasedRegexpLikeFunction extends RegexpLikeFunction {
+
+ public ByteBasedRegexpLikeFunction() {
+ }
+
+ public ByteBasedRegexpLikeFunction(List<Expression> children) {
+ super(children);
+ }
+
+ @Override
+ protected AbstractBasePattern compilePatternSpec(String value, int
javaFlags) {
+ int joniFlags = 0;
+ if ((javaFlags & Pattern.CASE_INSENSITIVE) != 0) {
+ joniFlags |= Option.IGNORECASE;
+ }
+ if ((javaFlags & Pattern.MULTILINE) != 0) {
+ joniFlags |= Option.MULTILINE;
+ }
+ if ((javaFlags & Pattern.DOTALL) != 0) {
+ joniFlags |= Option.SINGLELINE;
+ }
+ return new JONIPattern(value, joniFlags);
+ }
+}
diff --git
a/phoenix-core-client/src/main/java/org/apache/phoenix/expression/function/RegexpLikeFunction.java
b/phoenix-core-client/src/main/java/org/apache/phoenix/expression/function/RegexpLikeFunction.java
new file mode 100644
index 0000000000..94978f129f
--- /dev/null
+++
b/phoenix-core-client/src/main/java/org/apache/phoenix/expression/function/RegexpLikeFunction.java
@@ -0,0 +1,244 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.expression.function;
+
+import java.io.DataInput;
+import java.io.IOException;
+import java.util.List;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.phoenix.expression.Determinism;
+import org.apache.phoenix.expression.Expression;
+import org.apache.phoenix.expression.util.regex.AbstractBasePattern;
+import org.apache.phoenix.parse.FunctionParseNode;
+import org.apache.phoenix.parse.FunctionParseNode.Argument;
+import org.apache.phoenix.parse.FunctionParseNode.BuiltInFunction;
+import org.apache.phoenix.parse.RegexpLikeParseNode;
+import org.apache.phoenix.schema.SortOrder;
+import org.apache.phoenix.schema.tuple.Tuple;
+import org.apache.phoenix.schema.types.PBoolean;
+import org.apache.phoenix.schema.types.PDataType;
+import org.apache.phoenix.schema.types.PVarchar;
+
+/**
+ * Function similar to Oracle's REGEXP_LIKE, which tests whether a string
matches a regular
+ * expression pattern. Usage: {@code REGEXP_LIKE(<source_char>, <pattern> [,
<match_parameter>]) }
+ * <p>
+ * source_char is the string to search. pattern is a Java compatible regular
expression string.
+ * match_parameter is an optional string of flags that modify matching
behavior:
+ * <ul>
+ * <li>'i' - case-insensitive matching</li>
+ * <li>'c' - case-sensitive matching (default)</li>
+ * <li>'m' - multiline mode (^ and $ match line boundaries). Note: with
full-match semantics, 'm'
+ * has limited use on its own; it is most useful combined with 's'. May become
independently useful
+ * if partial-match semantics are adopted in the future.</li>
+ * <li>'s' - dotall mode (. matches any character including newline)</li>
+ * </ul>
+ * <p>
+ * <b>Differences from LIKE expression:</b>
+ * <ul>
+ * <li><b>Pattern syntax:</b> REGEXP_LIKE uses full Java regular expressions,
while LIKE uses simple
+ * wildcard patterns with '%' (zero or more characters) and '_' (single
character)</li>
+ * <li><b>Complexity:</b> REGEXP_LIKE supports complex pattern matching
including character classes,
+ * quantifiers, anchors, and groups, while LIKE is limited to basic wildcard
matching</li>
+ * <li><b>Flags:</b> REGEXP_LIKE supports multiple matching flags (i/c/m/s),
while LIKE only
+ * supports case-sensitive/case-insensitive modes</li>
+ * <li><b>Performance:</b> LIKE is typically faster for simple patterns, while
REGEXP_LIKE provides
+ * more flexibility at the cost of potential performance overhead</li>
+ * <li><b>Examples:</b> {@code LIKE: column LIKE 'test%'} vs {@code
REGEXP_LIKE(column, '^test.*')}
+ * {@code LIKE: column LIKE 't_st'} vs {@code REGEXP_LIKE(column,
'^t.st$')}</li>
+ * </ul>
+ * The function returns a {@link org.apache.phoenix.schema.types.PBoolean}.
+ * @since 5.3
+ */
+@BuiltInFunction(name = RegexpLikeFunction.NAME, nodeClass =
RegexpLikeParseNode.class,
+ args = { @Argument(allowedTypes = { PVarchar.class }),
+ @Argument(allowedTypes = { PVarchar.class }),
+ @Argument(allowedTypes = { PVarchar.class }, defaultValue = "null") },
+ classType = FunctionParseNode.FunctionClassType.ABSTRACT,
+ derivedFunctions = { ByteBasedRegexpLikeFunction.class,
StringBasedRegexpLikeFunction.class })
+public abstract class RegexpLikeFunction extends ScalarFunction {
+ public static final String NAME = "REGEXP_LIKE";
+
+ private static final PVarchar TYPE = PVarchar.INSTANCE;
+ private AbstractBasePattern pattern;
+
+ public RegexpLikeFunction() {
+ }
+
+ public RegexpLikeFunction(List<Expression> children) {
+ super(children);
+ init();
+ }
+
+ protected abstract AbstractBasePattern compilePatternSpec(String value, int
flags);
+
+ /**
+ * Parse the match_parameter string into regex flags. Subclasses translate
these into
+ * implementation-specific flag values.
+ * @param matchParameter the match parameter string (e.g. "im", "cs")
+ * @return a bitmask of standard Java regex flags
+ */
+ static int parseMatchParameter(String matchParameter) {
+ int flags = 0;
+ if (matchParameter == null || matchParameter.isEmpty()) {
+ return flags;
+ }
+ for (int i = 0; i < matchParameter.length(); i++) {
+ char c = matchParameter.charAt(i);
+ switch (c) {
+ case 'i':
+ // Enable case-insensitive matching. If 'c' appears later, it will
override this.
+ flags |= java.util.regex.Pattern.CASE_INSENSITIVE;
+ break;
+ case 'c':
+ // Enable case-sensitive matching (default). Overrides a preceding
'i'.
+ // Per Oracle semantics, if both 'i' and 'c' are specified, the last
one wins.
+ flags &= ~java.util.regex.Pattern.CASE_INSENSITIVE;
+ break;
+ case 'm':
+ // Enable multiline mode: ^ and $ match at line boundaries, not just
+ // the start and end of the entire string.
+ // NOTE: With the current full-match semantics, 'm' has limited
practical use
+ // on its own because the entire string must match the pattern
regardless.
+ // It becomes more meaningful when combined with 's' (dotall) and .*
wrappers,
+ // e.g., REGEXP_LIKE(val, '.*^ERROR.*$.*', 'ms').
+ // If REGEXP_LIKE is changed to partial-match semantics in the
future, 'm' will
+ // become independently useful (e.g., REGEXP_LIKE(val, '^ERROR',
'm') would
+ // find 'ERROR' at the start of any line).
+ // Users can also use (?m) inline in the pattern as an alternative.
+ flags |= java.util.regex.Pattern.MULTILINE;
+ break;
+ case 's':
+ // Enable dotall mode: the '.' metacharacter matches any character
+ // including newline characters.
+ flags |= java.util.regex.Pattern.DOTALL;
+ break;
+ default:
+ throw new IllegalArgumentException("Invalid match_parameter
character '" + c
+ + "' in REGEXP_LIKE. Valid values are 'i', 'c', 'm', 's'.");
+ }
+ }
+ return flags;
+ }
+
+ private void init() {
+ ImmutableBytesWritable tmpPtr = new ImmutableBytesWritable();
+ Expression patternExpr = getPatternExpression();
+ if (
+ patternExpr.isStateless() && patternExpr.getDeterminism() ==
Determinism.ALWAYS
+ && patternExpr.evaluate(null, tmpPtr)
+ ) {
+ String patternStr =
+ (String) TYPE.toObject(tmpPtr, patternExpr.getDataType(),
patternExpr.getSortOrder());
+ if (patternStr != null) {
+ int flags = resolveFlags(tmpPtr);
+ pattern = compilePatternSpec(patternStr, flags);
+ }
+ }
+ }
+
+ /**
+ * Resolve the regex flags from the optional match_parameter argument.
+ */
+ private int resolveFlags(ImmutableBytesWritable tmpPtr) {
+ if (children.size() <= 2) {
+ return 0;
+ }
+ Expression matchParamExpr = getMatchParameterExpression();
+ if (
+ matchParamExpr.isStateless() && matchParamExpr.getDeterminism() ==
Determinism.ALWAYS
+ && matchParamExpr.evaluate(null, tmpPtr)
+ ) {
+ String matchParam =
+ (String) TYPE.toObject(tmpPtr, matchParamExpr.getDataType(),
matchParamExpr.getSortOrder());
+ return parseMatchParameter(matchParam);
+ }
+ return 0;
+ }
+
+ @Override
+ public boolean evaluate(Tuple tuple, ImmutableBytesWritable ptr) {
+ AbstractBasePattern pattern = this.pattern;
+ if (pattern == null) {
+ Expression patternExpr = getPatternExpression();
+ if (!patternExpr.evaluate(tuple, ptr)) {
+ return false;
+ }
+ if (ptr.getLength() == 0) {
+ return true;
+ }
+ String patternStr =
+ (String) TYPE.toObject(ptr, patternExpr.getDataType(),
patternExpr.getSortOrder());
+ if (patternStr == null) {
+ return false;
+ }
+ // Resolve flags at evaluation time if not pre-compiled
+ int flags = 0;
+ if (children.size() > 2) {
+ Expression matchParamExpr = getMatchParameterExpression();
+ if (!matchParamExpr.evaluate(tuple, ptr)) {
+ return false;
+ }
+ String matchParam =
+ (String) TYPE.toObject(ptr, matchParamExpr.getDataType(),
matchParamExpr.getSortOrder());
+ flags = parseMatchParameter(matchParam);
+ }
+ pattern = compilePatternSpec(patternStr, flags);
+ }
+
+ Expression sourceExpr = getSourceExpression();
+ if (!sourceExpr.evaluate(tuple, ptr)) {
+ return false;
+ }
+ if (ptr.getLength() == 0) {
+ return true;
+ }
+ TYPE.coerceBytes(ptr, TYPE, sourceExpr.getSortOrder(), SortOrder.ASC);
+
+ pattern.matches(ptr);
+ return true;
+ }
+
+ private Expression getSourceExpression() {
+ return children.get(0);
+ }
+
+ private Expression getPatternExpression() {
+ return children.get(1);
+ }
+
+ private Expression getMatchParameterExpression() {
+ return children.get(2);
+ }
+
+ @Override
+ public PDataType getDataType() {
+ return PBoolean.INSTANCE;
+ }
+
+ @Override
+ public void readFields(DataInput input) throws IOException {
+ super.readFields(input);
+ init();
+ }
+
+ @Override
+ public String getName() {
+ return NAME;
+ }
+}
diff --git
a/phoenix-core-client/src/main/java/org/apache/phoenix/expression/function/StringBasedRegexpLikeFunction.java
b/phoenix-core-client/src/main/java/org/apache/phoenix/expression/function/StringBasedRegexpLikeFunction.java
new file mode 100644
index 0000000000..a97493d471
--- /dev/null
+++
b/phoenix-core-client/src/main/java/org/apache/phoenix/expression/function/StringBasedRegexpLikeFunction.java
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.expression.function;
+
+import java.util.List;
+import org.apache.phoenix.expression.Expression;
+import org.apache.phoenix.expression.util.regex.AbstractBasePattern;
+import org.apache.phoenix.expression.util.regex.JavaPattern;
+import org.apache.phoenix.parse.FunctionParseNode.Argument;
+import org.apache.phoenix.parse.FunctionParseNode.BuiltInFunction;
+import org.apache.phoenix.parse.FunctionParseNode.FunctionClassType;
+import org.apache.phoenix.schema.types.PVarchar;
+
+@BuiltInFunction(name = RegexpLikeFunction.NAME,
+ args = { @Argument(allowedTypes = { PVarchar.class }),
+ @Argument(allowedTypes = { PVarchar.class }),
+ @Argument(allowedTypes = { PVarchar.class }, defaultValue = "null") },
+ classType = FunctionClassType.DERIVED)
+public class StringBasedRegexpLikeFunction extends RegexpLikeFunction {
+
+ public StringBasedRegexpLikeFunction() {
+ }
+
+ public StringBasedRegexpLikeFunction(List<Expression> children) {
+ super(children);
+ }
+
+ @Override
+ protected AbstractBasePattern compilePatternSpec(String value, int flags) {
+ return new JavaPattern(value, flags);
+ }
+}
diff --git
a/phoenix-core-client/src/main/java/org/apache/phoenix/parse/RegexpLikeParseNode.java
b/phoenix-core-client/src/main/java/org/apache/phoenix/parse/RegexpLikeParseNode.java
new file mode 100644
index 0000000000..217cf0797e
--- /dev/null
+++
b/phoenix-core-client/src/main/java/org/apache/phoenix/parse/RegexpLikeParseNode.java
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.parse;
+
+import java.sql.SQLException;
+import java.util.List;
+import org.apache.phoenix.compile.StatementContext;
+import org.apache.phoenix.expression.Expression;
+import org.apache.phoenix.expression.function.ByteBasedRegexpLikeFunction;
+import org.apache.phoenix.expression.function.RegexpLikeFunction;
+import org.apache.phoenix.expression.function.StringBasedRegexpLikeFunction;
+import org.apache.phoenix.query.QueryServices;
+import org.apache.phoenix.query.QueryServicesOptions;
+
+/**
+ * Parse node corresponding to {@link RegexpLikeFunction}. It also acts as a
factory for creating
+ * the right kind of RegexpLikeFunction according to setting in
+ * QueryServices.USE_BYTE_BASED_REGEX_ATTRIB
+ */
+public class RegexpLikeParseNode extends FunctionParseNode {
+
+ RegexpLikeParseNode(String name, List<ParseNode> children,
BuiltInFunctionInfo info) {
+ super(name, children, info);
+ }
+
+ @Override
+ public Expression create(List<Expression> children, StatementContext context)
+ throws SQLException {
+ QueryServices services = context.getConnection().getQueryServices();
+ boolean useByteBasedRegex = services.getProps().getBoolean(
+ QueryServices.USE_BYTE_BASED_REGEX_ATTRIB,
QueryServicesOptions.DEFAULT_USE_BYTE_BASED_REGEX);
+ if (useByteBasedRegex) {
+ return new ByteBasedRegexpLikeFunction(children);
+ } else {
+ return new StringBasedRegexpLikeFunction(children);
+ }
+ }
+}
diff --git
a/phoenix-core/src/it/java/org/apache/phoenix/end2end/RegexpLikeFunctionIT.java
b/phoenix-core/src/it/java/org/apache/phoenix/end2end/RegexpLikeFunctionIT.java
new file mode 100644
index 0000000000..9d8860d4bc
--- /dev/null
+++
b/phoenix-core/src/it/java/org/apache/phoenix/end2end/RegexpLikeFunctionIT.java
@@ -0,0 +1,452 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.end2end;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+import java.sql.Connection;
+import java.sql.DriverManager;
+import java.sql.PreparedStatement;
+import java.sql.ResultSet;
+import java.sql.SQLException;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+
+@Category(ParallelStatsDisabledTest.class)
+public class RegexpLikeFunctionIT extends ParallelStatsDisabledIT {
+
+ private String tableName;
+ private int id;
+
+ @Before
+ public void setup() throws Exception {
+ tableName = generateUniqueName();
+ Connection conn = DriverManager.getConnection(getUrl());
+ conn.createStatement()
+ .execute("CREATE TABLE " + tableName + " (ID VARCHAR NOT NULL PRIMARY
KEY, VAL VARCHAR)");
+ insertRow(conn, "Hello World");
+ insertRow(conn, "hello world");
+ insertRow(conn, "Report123");
+ insertRow(conn, "Test456");
+ insertRow(conn, "line1\nline2");
+ insertRow(conn, null);
+ conn.commit();
+ conn.close();
+ }
+
+ private void insertRow(Connection conn, String val) throws SQLException {
+ PreparedStatement stmt =
+ conn.prepareStatement("UPSERT INTO " + tableName + " (ID, VAL) VALUES
(?, ?)");
+ stmt.setString(1, "id" + id);
+ stmt.setString(2, val);
+ stmt.executeUpdate();
+ id++;
+ }
+
+ @Test
+ public void testBasicMatch() throws Exception {
+ Connection conn = DriverManager.getConnection(getUrl());
+ // Match rows starting with 'Hello' — full match requires .* at end
+ ResultSet rs = conn.createStatement().executeQuery(
+ "SELECT ID FROM " + tableName + " WHERE REGEXP_LIKE(VAL, 'Hello.*')
ORDER BY ID");
+ assertTrue(rs.next());
+ assertEquals("id0", rs.getString(1));
+ assertFalse(rs.next());
+ conn.close();
+ }
+
+ @Test
+ public void testNoMatch() throws Exception {
+ Connection conn = DriverManager.getConnection(getUrl());
+ ResultSet rs = conn.createStatement()
+ .executeQuery("SELECT ID FROM " + tableName + " WHERE REGEXP_LIKE(VAL,
'^ZZZZZ')");
+ assertFalse(rs.next());
+ conn.close();
+ }
+
+ @Test
+ public void testCaseInsensitiveFlag() throws Exception {
+ Connection conn = DriverManager.getConnection(getUrl());
+ // With 'i' flag, should match both 'Hello World' and 'hello world'
+ ResultSet rs = conn.createStatement().executeQuery(
+ "SELECT ID FROM " + tableName + " WHERE REGEXP_LIKE(VAL, 'hello.*', 'i')
ORDER BY ID");
+ assertTrue(rs.next());
+ assertEquals("id0", rs.getString(1));
+ assertTrue(rs.next());
+ assertEquals("id1", rs.getString(1));
+ assertFalse(rs.next());
+ conn.close();
+ }
+
+ @Test
+ public void testCaseSensitiveDefault() throws Exception {
+ Connection conn = DriverManager.getConnection(getUrl());
+ // Without flag, case-sensitive: only lowercase 'hello' matches
+ ResultSet rs = conn.createStatement().executeQuery(
+ "SELECT ID FROM " + tableName + " WHERE REGEXP_LIKE(VAL, 'hello.*')
ORDER BY ID");
+ assertTrue(rs.next());
+ assertEquals("id1", rs.getString(1));
+ assertFalse(rs.next());
+ conn.close();
+ }
+
+ @Test
+ public void testCaseSensitiveFlagOverride() throws Exception {
+ Connection conn = DriverManager.getConnection(getUrl());
+ // 'ic' means last one wins: 'c' overrides 'i', so case-sensitive
+ ResultSet rs = conn.createStatement().executeQuery(
+ "SELECT ID FROM " + tableName + " WHERE REGEXP_LIKE(VAL, 'hello.*',
'ic') ORDER BY ID");
+ assertTrue(rs.next());
+ assertEquals("id1", rs.getString(1));
+ assertFalse(rs.next());
+ conn.close();
+ }
+
+ @Test
+ public void testDigitPattern() throws Exception {
+ Connection conn = DriverManager.getConnection(getUrl());
+ // Match rows that are entirely digits
+ ResultSet rs = conn.createStatement().executeQuery(
+ "SELECT ID FROM " + tableName + " WHERE REGEXP_LIKE(VAL, '.*\\d+.*')
ORDER BY ID");
+ assertTrue(rs.next());
+ assertEquals("id2", rs.getString(1)); // Report123
+ assertTrue(rs.next());
+ assertEquals("id3", rs.getString(1)); // Test456
+ // id4 has newline — '.' does not match newline by default, so it won't
match
+ assertFalse(rs.next());
+ conn.close();
+ }
+
+ @Test
+ public void testInSelectList() throws Exception {
+ Connection conn = DriverManager.getConnection(getUrl());
+ // Use REGEXP_LIKE in SELECT list — should return boolean (full match)
+ ResultSet rs = conn.createStatement()
+ .executeQuery("SELECT ID, REGEXP_LIKE(VAL, 'Report.*') FROM " +
tableName + " ORDER BY ID");
+ assertTrue(rs.next());
+ assertEquals("id0", rs.getString(1));
+ assertFalse(rs.getBoolean(2)); // Hello World
+ assertTrue(rs.next());
+ assertEquals("id1", rs.getString(1));
+ assertFalse(rs.getBoolean(2)); // hello world
+ assertTrue(rs.next());
+ assertEquals("id2", rs.getString(1));
+ assertTrue(rs.getBoolean(2)); // Report123
+ assertTrue(rs.next());
+ assertEquals("id3", rs.getString(1));
+ assertFalse(rs.getBoolean(2)); // Test456
+ assertTrue(rs.next());
+ assertEquals("id4", rs.getString(1));
+ assertFalse(rs.getBoolean(2)); // line1\nline2
+ assertTrue(rs.next());
+ assertEquals("id5", rs.getString(1));
+ // NULL val — REGEXP_LIKE returns false for null
+ assertFalse(rs.getBoolean(2));
+ assertFalse(rs.next());
+ conn.close();
+ }
+
+ @Test
+ public void testNullSourceReturnsNull() throws Exception {
+ Connection conn = DriverManager.getConnection(getUrl());
+ // NULL source should not match; use 's' flag so '.' matches newline too
+ ResultSet rs = conn.createStatement().executeQuery(
+ "SELECT ID FROM " + tableName + " WHERE REGEXP_LIKE(VAL, '.*', 's')
ORDER BY ID");
+ // id5 has NULL val, should not appear in results
+ assertTrue(rs.next());
+ assertEquals("id0", rs.getString(1));
+ assertTrue(rs.next());
+ assertEquals("id1", rs.getString(1));
+ assertTrue(rs.next());
+ assertEquals("id2", rs.getString(1));
+ assertTrue(rs.next());
+ assertEquals("id3", rs.getString(1));
+ assertTrue(rs.next());
+ assertEquals("id4", rs.getString(1));
+ assertFalse(rs.next());
+ conn.close();
+ }
+
+ @Test
+ public void testFullMatchPattern() throws Exception {
+ Connection conn = DriverManager.getConnection(getUrl());
+ // REGEXP_LIKE does full match (like Java's matches()), not partial
+ // 'Report' alone should NOT match 'Report123' because it's not a full
match
+ ResultSet rs = conn.createStatement()
+ .executeQuery("SELECT ID FROM " + tableName + " WHERE REGEXP_LIKE(VAL,
'Report')");
+ assertFalse(rs.next());
+
+ // 'Report.*' should match 'Report123'
+ rs = conn.createStatement()
+ .executeQuery("SELECT ID FROM " + tableName + " WHERE REGEXP_LIKE(VAL,
'Report.*')");
+ assertTrue(rs.next());
+ assertEquals("id2", rs.getString(1));
+ assertFalse(rs.next());
+ conn.close();
+ }
+
+ @Test
+ public void testMultilineFlag() throws Exception {
+ Connection conn = DriverManager.getConnection(getUrl());
+ // Without 'm' flag, ^ only matches start of string
+ // 'line2' is on the second line of id4's value
+ // '^line2$' should NOT match without multiline
+ ResultSet rs = conn.createStatement()
+ .executeQuery("SELECT ID FROM " + tableName + " WHERE REGEXP_LIKE(VAL,
'^line2$')");
+ assertFalse(rs.next());
+
+ // With 'm' flag, ^ and $ match line boundaries
+ // But REGEXP_LIKE is a full match, so we need '.*line2.*' with DOTALL
+ // or use multiline with a pattern that matches the whole string
+ rs = conn.createStatement().executeQuery(
+ "SELECT ID FROM " + tableName + " WHERE REGEXP_LIKE(VAL,
'(?s).*^line2$.*', 'm')");
+ assertTrue(rs.next());
+ assertEquals("id4", rs.getString(1));
+ assertFalse(rs.next());
+ conn.close();
+ }
+
+ @Test
+ public void testDotallFlag() throws Exception {
+ Connection conn = DriverManager.getConnection(getUrl());
+ // Without 's' flag, '.' does not match newline
+ // 'line1.line2' should NOT match because there's a \n between them
+ ResultSet rs = conn.createStatement()
+ .executeQuery("SELECT ID FROM " + tableName + " WHERE REGEXP_LIKE(VAL,
'line1.line2')");
+ assertFalse(rs.next());
+
+ // With 's' flag (dotall), '.' matches newline
+ rs = conn.createStatement()
+ .executeQuery("SELECT ID FROM " + tableName + " WHERE REGEXP_LIKE(VAL,
'line1.line2', 's')");
+ assertTrue(rs.next());
+ assertEquals("id4", rs.getString(1));
+ assertFalse(rs.next());
+ conn.close();
+ }
+
+ @Test
+ public void testNotRegexpLike() throws Exception {
+ Connection conn = DriverManager.getConnection(getUrl());
+ // NOT REGEXP_LIKE — negate the function
+ // '.*\\d+.*' with 's' flag matches any string containing digits
(including newlines)
+ ResultSet rs = conn.createStatement().executeQuery(
+ "SELECT ID FROM " + tableName + " WHERE NOT REGEXP_LIKE(VAL, '.*\\d+.*',
's') ORDER BY ID");
+ assertTrue(rs.next());
+ assertEquals("id0", rs.getString(1)); // Hello World
+ assertTrue(rs.next());
+ assertEquals("id1", rs.getString(1)); // hello world
+ assertFalse(rs.next());
+ conn.close();
+ }
+
+ @Test
+ public void testCaseSensitiveThenInsensitive_LastWins() throws Exception {
+ Connection conn = DriverManager.getConnection(getUrl());
+ // 'ci' — 'i' comes last, so case-insensitive: matches both Hello and hello
+ ResultSet rs = conn.createStatement().executeQuery(
+ "SELECT ID FROM " + tableName + " WHERE REGEXP_LIKE(VAL, 'hello.*',
'ci') ORDER BY ID");
+ assertTrue(rs.next());
+ assertEquals("id0", rs.getString(1)); // Hello World
+ assertTrue(rs.next());
+ assertEquals("id1", rs.getString(1)); // hello world
+ assertFalse(rs.next());
+ conn.close();
+ }
+
+ @Test
+ public void testCombinedCaseInsensitiveAndDotall() throws Exception {
+ Connection conn = DriverManager.getConnection(getUrl());
+ // 'is' — case-insensitive + dotall: '.' matches newline
+ // Pattern 'LINE1.LINE2' with 'is' should match 'line1\nline2'
+ ResultSet rs = conn.createStatement().executeQuery(
+ "SELECT ID FROM " + tableName + " WHERE REGEXP_LIKE(VAL, 'LINE1.LINE2',
'is') ORDER BY ID");
+ assertTrue(rs.next());
+ assertEquals("id4", rs.getString(1));
+ assertFalse(rs.next());
+ conn.close();
+ }
+
+ @Test
+ public void testCombinedMultilineAndDotall() throws Exception {
+ Connection conn = DriverManager.getConnection(getUrl());
+ // 'ms' — multiline + dotall combined
+ // With dotall, '.*' matches newlines; with multiline, ^ and $ match line
boundaries
+ ResultSet rs = conn.createStatement().executeQuery(
+ "SELECT ID FROM " + tableName + " WHERE REGEXP_LIKE(VAL, '.*line2.*',
'ms') ORDER BY ID");
+ assertTrue(rs.next());
+ assertEquals("id4", rs.getString(1));
+ assertFalse(rs.next());
+ conn.close();
+ }
+
+ @Test
+ public void testInvalidFlagThrowsError() throws Exception {
+ Connection conn = DriverManager.getConnection(getUrl());
+ try {
+ conn.createStatement()
+ .executeQuery("SELECT ID FROM " + tableName + " WHERE REGEXP_LIKE(VAL,
'Hello.*', 'x')");
+ assertFalse("Expected exception for invalid flag", true);
+ } catch (Exception e) {
+ // Expected: invalid match_parameter character 'x'
+ assertTrue(e.getMessage().contains("Invalid match_parameter character")
+ || e.getCause().getMessage().contains("Invalid match_parameter
character"));
+ }
+ conn.close();
+ }
+
+ @Test
+ public void testWithPreparedStatement() throws Exception {
+ Connection conn = DriverManager.getConnection(getUrl());
+ // Test REGEXP_LIKE with a parameterized pattern via PreparedStatement
+ String sql = "SELECT ID FROM " + tableName + " WHERE REGEXP_LIKE(VAL, ?)
ORDER BY ID";
+ PreparedStatement stmt = conn.prepareStatement(sql);
+ stmt.setString(1, "Hello.*");
+ ResultSet rs = stmt.executeQuery();
+ assertTrue(rs.next());
+ assertEquals("id0", rs.getString(1));
+ assertFalse(rs.next());
+
+ // Test with a different pattern
+ stmt.setString(1, ".*\\d+.*");
+ rs = stmt.executeQuery();
+ assertTrue(rs.next());
+ assertEquals("id2", rs.getString(1)); // Report123
+ assertTrue(rs.next());
+ assertEquals("id3", rs.getString(1)); // Test456
+ assertFalse(rs.next());
+
+ stmt.close();
+ conn.close();
+ }
+
+ // ---- Tests for dynamic (evaluate-time) pattern compilation ----
+ // These tests exercise the code path where this.pattern is null after init()
+ // because the pattern expression is not stateless (fails isStateless()
check).
+ // In these cases, the pattern is compiled per-row during evaluate().
+
+ /**
+ * Helper to create and populate a table with columns needed for dynamic
pattern tests.
+ */
+ private String createDynamicPatternTable() throws Exception {
+ String dynTable = generateUniqueName();
+ Connection conn = DriverManager.getConnection(getUrl());
+ conn.createStatement()
+ .execute("CREATE TABLE " + dynTable + " (ID VARCHAR NOT NULL PRIMARY
KEY, VAL VARCHAR,"
+ + " PATTERN_COL VARCHAR, CATEGORY VARCHAR, PREFIX_COL VARCHAR)");
+ PreparedStatement stmt = conn.prepareStatement("UPSERT INTO " + dynTable
+ + " (ID, VAL, PATTERN_COL, CATEGORY, PREFIX_COL) VALUES (?, ?, ?, ?,
?)");
+ // id0: Hello World, pattern "Hello.*", greeting, prefix "Hello"
+ upsertDynRow(stmt, "id0", "Hello World", "Hello.*", "greeting", "Hello");
+ // id1: hello world, pattern "hello.*", greeting, prefix "hello"
+ upsertDynRow(stmt, "id1", "hello world", "hello.*", "greeting", "hello");
+ // id2: Report123, pattern ".*\\d+.*", code, prefix "Report"
+ upsertDynRow(stmt, "id2", "Report123", ".*\\d+.*", "code", "Report");
+ // id3: Test456, pattern "Test.*", code, prefix "Test"
+ upsertDynRow(stmt, "id3", "Test456", "Test.*", "code", "Test");
+ // id4: line1\nline2, pattern "NOMATCH", other, prefix "line1"
+ upsertDynRow(stmt, "id4", "line1\nline2", "NOMATCH", "other", "line1");
+ // id5: null val, null pattern, null category, null prefix
+ upsertDynRow(stmt, "id5", null, null, null, null);
+ conn.commit();
+ conn.close();
+ return dynTable;
+ }
+
+ private void upsertDynRow(PreparedStatement stmt, String id, String val,
String patternCol,
+ String category, String prefixCol) throws SQLException {
+ stmt.setString(1, id);
+ stmt.setString(2, val);
+ stmt.setString(3, patternCol);
+ stmt.setString(4, category);
+ stmt.setString(5, prefixCol);
+ stmt.executeUpdate();
+ }
+
+ @Test
+ public void testDynamicPatternFromColumn() throws Exception {
+ // Category: not stateless — pattern is a column reference.
+ // PATTERN_COL is not a literal constant, so init() cannot pre-compile.
+ // Each row supplies its own regex via PATTERN_COL, compiled at evaluate
time.
+ String dynTable = createDynamicPatternTable();
+ Connection conn = DriverManager.getConnection(getUrl());
+ ResultSet rs = conn.createStatement().executeQuery(
+ "SELECT ID FROM " + dynTable + " WHERE REGEXP_LIKE(VAL, PATTERN_COL)
ORDER BY ID");
+ assertTrue(rs.next());
+ assertEquals("id0", rs.getString(1)); // "Hello World" matches "Hello.*"
+ assertTrue(rs.next());
+ assertEquals("id1", rs.getString(1)); // "hello world" matches "hello.*"
+ assertTrue(rs.next());
+ assertEquals("id2", rs.getString(1)); // "Report123" matches ".*\\d+.*"
+ assertTrue(rs.next());
+ assertEquals("id3", rs.getString(1)); // "Test456" matches "Test.*"
+ // id4: "line1\nline2" does NOT match "NOMATCH"
+ // id5: null val and null pattern — excluded
+ assertFalse(rs.next());
+ conn.close();
+ }
+
+ @Test
+ public void testDynamicPatternFromCaseExpression() throws Exception {
+ // Category: not stateless — CASE expression depends on CATEGORY column.
+ // The CASE references a column, so the entire expression is not stateless.
+ // Pattern is determined per-row based on the CATEGORY value.
+ String dynTable = createDynamicPatternTable();
+ Connection conn = DriverManager.getConnection(getUrl());
+ ResultSet rs = conn.createStatement()
+ .executeQuery("SELECT ID FROM " + dynTable + " WHERE REGEXP_LIKE(VAL,"
+ + " CASE WHEN CATEGORY = 'greeting' THEN 'Hello.*'"
+ + " WHEN CATEGORY = 'code' THEN '.*\\d+.*'" + " ELSE
'NOMATCH' END) ORDER BY ID");
+ assertTrue(rs.next());
+ assertEquals("id0", rs.getString(1)); // greeting → "Hello.*" matches
"Hello World"
+ // id1: greeting → "Hello.*" does NOT match "hello world" (case-sensitive)
+ assertTrue(rs.next());
+ assertEquals("id2", rs.getString(1)); // code → ".*\\d+.*" matches
"Report123"
+ assertTrue(rs.next());
+ assertEquals("id3", rs.getString(1)); // code → ".*\\d+.*" matches
"Test456"
+ // id4: other → "NOMATCH" does NOT match "line1\nline2"
+ // id5: null category → ELSE "NOMATCH", null val → excluded
+ assertFalse(rs.next());
+ conn.close();
+ }
+
+ @Test
+ public void testDynamicPatternFromColumnExpression() throws Exception {
+ // Category: not stateless — expression involves PREFIX_COL column.
+ // PREFIX_COL || '.*' is a concatenation that includes a column reference,
+ // so the expression is not stateless. Pattern is built per-row at
evaluate time.
+ String dynTable = createDynamicPatternTable();
+ Connection conn = DriverManager.getConnection(getUrl());
+ ResultSet rs = conn.createStatement().executeQuery(
+ "SELECT ID FROM " + dynTable + " WHERE REGEXP_LIKE(VAL, PREFIX_COL ||
'.*') ORDER BY ID");
+ assertTrue(rs.next());
+ assertEquals("id0", rs.getString(1)); // "Hello" || ".*" → "Hello.*"
matches "Hello World"
+ assertTrue(rs.next());
+ assertEquals("id1", rs.getString(1)); // "hello" || ".*" → "hello.*"
matches "hello world"
+ assertTrue(rs.next());
+ assertEquals("id2", rs.getString(1)); // "Report" || ".*" → "Report.*"
matches "Report123"
+ assertTrue(rs.next());
+ assertEquals("id3", rs.getString(1)); // "Test" || ".*" → "Test.*" matches
"Test456"
+ // id4: "line1" || ".*" → "line1.*" does NOT match "line1\nline2" (dot
doesn't match \n)
+ // id5: null prefix → null pattern → excluded
+ assertFalse(rs.next());
+ conn.close();
+ }
+}
diff --git
a/phoenix-core/src/test/java/org/apache/phoenix/expression/function/RegexpLikeFunctionTest.java
b/phoenix-core/src/test/java/org/apache/phoenix/expression/function/RegexpLikeFunctionTest.java
new file mode 100644
index 0000000000..b33d9a3920
--- /dev/null
+++
b/phoenix-core/src/test/java/org/apache/phoenix/expression/function/RegexpLikeFunctionTest.java
@@ -0,0 +1,241 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.expression.function;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+import java.lang.reflect.Field;
+import java.util.Arrays;
+import java.util.regex.Pattern;
+import org.apache.phoenix.expression.Determinism;
+import org.apache.phoenix.expression.Expression;
+import org.apache.phoenix.expression.LiteralExpression;
+import org.apache.phoenix.schema.types.PVarchar;
+import org.junit.Test;
+
+/**
+ * Unit tests for {@link RegexpLikeFunction}.
+ */
+public class RegexpLikeFunctionTest {
+
+ @Test
+ public void testNullMatchParameter() {
+ assertEquals(0, RegexpLikeFunction.parseMatchParameter(null));
+ }
+
+ @Test
+ public void testEmptyMatchParameter() {
+ assertEquals(0, RegexpLikeFunction.parseMatchParameter(""));
+ }
+
+ @Test
+ public void testCaseInsensitiveFlag() {
+ int flags = RegexpLikeFunction.parseMatchParameter("i");
+ assertEquals(Pattern.CASE_INSENSITIVE, flags & Pattern.CASE_INSENSITIVE);
+ }
+
+ @Test
+ public void testCaseSensitiveFlag() {
+ // 'c' alone should result in no CASE_INSENSITIVE flag (default behavior)
+ int flags = RegexpLikeFunction.parseMatchParameter("c");
+ assertEquals(0, flags & Pattern.CASE_INSENSITIVE);
+ assertEquals(0, flags); // no flags set at all
+ }
+
+ @Test
+ public void testMultilineFlag() {
+ int flags = RegexpLikeFunction.parseMatchParameter("m");
+ assertEquals(Pattern.MULTILINE, flags & Pattern.MULTILINE);
+ }
+
+ @Test
+ public void testDotallFlag() {
+ int flags = RegexpLikeFunction.parseMatchParameter("s");
+ assertEquals(Pattern.DOTALL, flags & Pattern.DOTALL);
+ }
+
+ @Test
+ public void testCaseInsensitiveThenCaseSensitive_LastWins() {
+ // 'ic' — 'c' comes last, so case-sensitive (no CASE_INSENSITIVE flag)
+ int flags = RegexpLikeFunction.parseMatchParameter("ic");
+ assertEquals(0, flags & Pattern.CASE_INSENSITIVE);
+ }
+
+ @Test
+ public void testCaseSensitiveThenCaseInsensitive_LastWins() {
+ // 'ci' — 'i' comes last, so case-insensitive
+ int flags = RegexpLikeFunction.parseMatchParameter("ci");
+ assertEquals(Pattern.CASE_INSENSITIVE, flags & Pattern.CASE_INSENSITIVE);
+ }
+
+ @Test
+ public void testCombinedCaseInsensitiveAndDotall() {
+ // 'is' — both case-insensitive and dotall
+ int flags = RegexpLikeFunction.parseMatchParameter("is");
+ assertEquals(Pattern.CASE_INSENSITIVE, flags & Pattern.CASE_INSENSITIVE);
+ assertEquals(Pattern.DOTALL, flags & Pattern.DOTALL);
+ }
+
+ @Test
+ public void testCombinedMultilineAndDotall() {
+ // 'ms' — both multiline and dotall
+ int flags = RegexpLikeFunction.parseMatchParameter("ms");
+ assertEquals(Pattern.MULTILINE, flags & Pattern.MULTILINE);
+ assertEquals(Pattern.DOTALL, flags & Pattern.DOTALL);
+ }
+
+ @Test
+ public void testAllFlagsCombined() {
+ // 'ims' — case-insensitive, multiline, and dotall
+ int flags = RegexpLikeFunction.parseMatchParameter("ims");
+ assertEquals(Pattern.CASE_INSENSITIVE, flags & Pattern.CASE_INSENSITIVE);
+ assertEquals(Pattern.MULTILINE, flags & Pattern.MULTILINE);
+ assertEquals(Pattern.DOTALL, flags & Pattern.DOTALL);
+ }
+
+ @Test
+ public void testAllFlagsWithCaseSensitiveOverride() {
+ // 'imsc' — 'c' at end overrides 'i', so no CASE_INSENSITIVE, but
multiline and dotall remain
+ int flags = RegexpLikeFunction.parseMatchParameter("imsc");
+ assertEquals(0, flags & Pattern.CASE_INSENSITIVE);
+ assertEquals(Pattern.MULTILINE, flags & Pattern.MULTILINE);
+ assertEquals(Pattern.DOTALL, flags & Pattern.DOTALL);
+ }
+
+ @Test
+ public void testDuplicateFlags() {
+ // 'iimm' — duplicate flags should be idempotent
+ int flags = RegexpLikeFunction.parseMatchParameter("iimm");
+ assertEquals(Pattern.CASE_INSENSITIVE, flags & Pattern.CASE_INSENSITIVE);
+ assertEquals(Pattern.MULTILINE, flags & Pattern.MULTILINE);
+ }
+
+ @Test
+ public void testComplexOverrideSequence() {
+ // 'icims' — i, then c (clears i), then i (sets again), then m, then s
+ int flags = RegexpLikeFunction.parseMatchParameter("icims");
+ assertEquals(Pattern.CASE_INSENSITIVE, flags & Pattern.CASE_INSENSITIVE);
+ assertEquals(Pattern.MULTILINE, flags & Pattern.MULTILINE);
+ assertEquals(Pattern.DOTALL, flags & Pattern.DOTALL);
+ }
+
+ @Test(expected = IllegalArgumentException.class)
+ public void testInvalidFlagThrowsException() {
+ RegexpLikeFunction.parseMatchParameter("x");
+ }
+
+ @Test(expected = IllegalArgumentException.class)
+ public void testInvalidFlagInCombination() {
+ // 'iz' — 'z' is invalid, should throw even though 'i' is valid
+ RegexpLikeFunction.parseMatchParameter("iz");
+ }
+
+ @Test(expected = IllegalArgumentException.class)
+ public void testNumericFlagThrowsException() {
+ RegexpLikeFunction.parseMatchParameter("1");
+ }
+
+ @Test(expected = IllegalArgumentException.class)
+ public void testUpperCaseFlagThrowsException() {
+ // 'I' (uppercase) is not valid — only lowercase 'i' is
+ RegexpLikeFunction.parseMatchParameter("I");
+ }
+
+ // ---- Tests for pattern initialization based on isStateless() ----
+
+ /**
+ * Helper method to access the private 'pattern' field via reflection.
+ */
+ private Object getPatternField(RegexpLikeFunction function) throws Exception
{
+ Field patternField = RegexpLikeFunction.class.getDeclaredField("pattern");
+ patternField.setAccessible(true);
+ return patternField.get(function);
+ }
+
+ @Test
+ public void testPatternInitializedWhenStateless() throws Exception {
+ // When pattern is a constant literal (stateless), it should be compiled
during init()
+ Expression sourceExpr = LiteralExpression.newConstant("test string",
PVarchar.INSTANCE);
+ Expression patternExpr = LiteralExpression.newConstant("test.*",
PVarchar.INSTANCE);
+ Expression matchParamExpr = LiteralExpression.newConstant(null,
PVarchar.INSTANCE);
+
+ // Verify the pattern expression is stateless
+ assertTrue("Pattern expression should be stateless",
patternExpr.isStateless());
+
+ // Create the function with stateless expressions
+ StringBasedRegexpLikeFunction function =
+ new StringBasedRegexpLikeFunction(Arrays.asList(sourceExpr, patternExpr,
matchParamExpr));
+
+ // Verify that the pattern was initialized (not null) because the
expression is stateless
+ Object pattern = getPatternField(function);
+ assertNotNull("Pattern should be initialized when pattern expression is
stateless", pattern);
+ }
+
+ @Test
+ public void testPatternNotInitializedWhenNotStateless() throws Exception {
+ // When pattern comes from a column (not stateless), it should NOT be
compiled during init()
+ // Instead, it will be compiled per-row during evaluate()
+ Expression sourceExpr = LiteralExpression.newConstant("test string",
PVarchar.INSTANCE);
+
+ // Mock a non-stateless pattern expression (e.g., column reference)
+ Expression patternExpr = mock(Expression.class);
+ when(patternExpr.isStateless()).thenReturn(false);
+ when(patternExpr.getDeterminism()).thenReturn(Determinism.PER_ROW);
+ when(patternExpr.getDataType()).thenReturn(PVarchar.INSTANCE);
+
+ Expression matchParamExpr = LiteralExpression.newConstant(null,
PVarchar.INSTANCE);
+
+ // Verify the pattern expression is NOT stateless
+ assertFalse("Pattern expression should not be stateless",
patternExpr.isStateless());
+
+ // Create the function with non-stateless pattern expression
+ StringBasedRegexpLikeFunction function =
+ new StringBasedRegexpLikeFunction(Arrays.asList(sourceExpr, patternExpr,
matchParamExpr));
+
+ // Verify that the pattern was NOT initialized (is null) because the
expression is not stateless
+ Object pattern = getPatternField(function);
+ assertNull("Pattern should not be initialized when pattern expression is
not stateless",
+ pattern);
+ }
+
+ @Test
+ public void testPatternInitializedWithFlags() throws Exception {
+ // When both pattern and match_parameter are constants, pattern should be
compiled with flags
+ Expression sourceExpr = LiteralExpression.newConstant("test string",
PVarchar.INSTANCE);
+ Expression patternExpr = LiteralExpression.newConstant("test.*",
PVarchar.INSTANCE);
+ Expression matchParamExpr = LiteralExpression.newConstant("i",
PVarchar.INSTANCE);
+
+ // Verify all expressions are stateless
+ assertTrue("Pattern expression should be stateless",
patternExpr.isStateless());
+ assertTrue("Match parameter expression should be stateless",
matchParamExpr.isStateless());
+
+ // Create the function
+ StringBasedRegexpLikeFunction function =
+ new StringBasedRegexpLikeFunction(Arrays.asList(sourceExpr, patternExpr,
matchParamExpr));
+
+ // Verify that the pattern was initialized with flags
+ Object pattern = getPatternField(function);
+ assertNotNull("Pattern should be initialized when all expressions are
stateless", pattern);
+ }
+}