This is an automated email from the ASF dual-hosted git repository.

dockerzhang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/inlong.git


The following commit(s) were added to refs/heads/master by this push:
     new cc06854292 [INLONG-11060][SDK] Transform support REGEXP_...() related 
functions (#11102)
cc06854292 is described below

commit cc06854292e6fc1318135711f669f363018d9f91
Author: emptyOVO <118812562+empty...@users.noreply.github.com>
AuthorDate: Fri Sep 20 17:29:30 2024 +0800

    [INLONG-11060][SDK] Transform support REGEXP_...() related functions 
(#11102)
---
 .../process/function/RegexpCountFunction.java      | 71 ++++++++++++++++
 .../process/function/RegexpExtractAllFunction.java | 94 ++++++++++++++++++++++
 .../process/function/RegexpExtractFunction.java    | 84 +++++++++++++++++++
 .../transform/process/function/RegexpFunction.java | 65 +++++++++++++++
 .../process/function/RegexpInstrFunction.java      | 71 ++++++++++++++++
 .../process/function/RegexpReplaceFunction.java    | 67 +++++++++++++++
 .../process/function/RegexpSubstrFunction.java     | 70 ++++++++++++++++
 .../function/string/TestRegexExtractFunction.java  | 91 +++++++++++++++++++++
 .../function/string/TestRegexpCountFunction.java   | 77 ++++++++++++++++++
 .../string/TestRegexpExtractAllFunction.java       | 92 +++++++++++++++++++++
 .../function/string/TestRegexpFunction.java        | 77 ++++++++++++++++++
 .../function/string/TestRegexpInstrFunction.java   | 84 +++++++++++++++++++
 .../function/string/TestRegexpReplaceFunction.java | 65 +++++++++++++++
 .../function/string/TestRegexpSubstrFunction.java  | 84 +++++++++++++++++++
 14 files changed, 1092 insertions(+)

diff --git 
a/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/RegexpCountFunction.java
 
b/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/RegexpCountFunction.java
new file mode 100644
index 0000000000..bc7091a0ce
--- /dev/null
+++ 
b/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/RegexpCountFunction.java
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.inlong.sdk.transform.process.function;
+
+import org.apache.inlong.sdk.transform.decode.SourceData;
+import org.apache.inlong.sdk.transform.process.Context;
+import org.apache.inlong.sdk.transform.process.operator.OperatorTools;
+import org.apache.inlong.sdk.transform.process.parser.ValueParser;
+
+import net.sf.jsqlparser.expression.Expression;
+import net.sf.jsqlparser.expression.Function;
+
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * RegexpCountFunction
+ * description: REGEXP_COUNT(str, regexp)--Returns the number of times str 
matches the regexp pattern.
+ *              regexp must be a Java regular expression.
+ *              Returns an INTEGER representation of the number of matches.
+ *              NULL if any of the arguments are NULL or regexp is invalid.
+ */
+@TransformFunction(names = {"regexp_count"})
+public class RegexpCountFunction implements ValueParser {
+
+    private ValueParser inputStringParser;
+
+    private ValueParser patternStringParser;
+
+    public RegexpCountFunction(Function expr) {
+        if (expr.getParameters() != null) {
+            List<Expression> expressions = 
expr.getParameters().getExpressions();
+            if (expressions != null && expressions.size() >= 2) {
+                inputStringParser = 
OperatorTools.buildParser(expressions.get(0));
+                patternStringParser = 
OperatorTools.buildParser(expressions.get(1));
+            }
+        }
+    }
+
+    @Override
+    public Object parse(SourceData sourceData, int rowIndex, Context context) {
+        if (inputStringParser == null || patternStringParser == null) {
+            return null;
+        }
+        String inputString = 
OperatorTools.parseString(inputStringParser.parse(sourceData, rowIndex, 
context));
+        String patternString = 
OperatorTools.parseString(patternStringParser.parse(sourceData, rowIndex, 
context));
+        Pattern pattern = Pattern.compile(patternString);
+        Matcher matcher = pattern.matcher(inputString);
+        int count = 0;
+        while (matcher.find()) {
+            count++;
+        }
+        return count;
+    }
+}
diff --git 
a/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/RegexpExtractAllFunction.java
 
b/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/RegexpExtractAllFunction.java
new file mode 100644
index 0000000000..5c825915d3
--- /dev/null
+++ 
b/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/RegexpExtractAllFunction.java
@@ -0,0 +1,94 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.inlong.sdk.transform.process.function;
+
+import org.apache.inlong.sdk.transform.decode.SourceData;
+import org.apache.inlong.sdk.transform.process.Context;
+import org.apache.inlong.sdk.transform.process.operator.OperatorTools;
+import org.apache.inlong.sdk.transform.process.parser.ValueParser;
+
+import net.sf.jsqlparser.expression.Expression;
+import net.sf.jsqlparser.expression.Function;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * RegexpExtractAllFunction
+ * description: REGEXP_EXTRACT_ALL(str, regexp[, extractIndex])--Returns an 
ARRAY representation of all the matched substrings.
+ *              NULL if any of the arguments are NULL or invalid.Extracts all 
the substrings in str that match the regexp
+ *              expression and correspond to the regexp group extractIndex. 
regexp may contain multiple groups. extractIndex
+ *              indicates which regexp group to extract and starts from 1, 
also the default value if not specified.
+ *              0 means matching the entire regular expression.
+ * for example: REGEXP_EXTRACT_ALL("abc123def456ghi789", "(\\d+)", 0)--return 
[123, 456, 789]
+ *              REGEXP_EXTRACT_ALL("Name: John, Age: 25, Location: NY", "Name: 
(\\w+), Age: (\\d+), Location: (\\w+)", 1)--return [John]
+ *              REGEXP_EXTRACT_ALL("Name: John, Age: 25, Location: NY", "Name: 
(\\w+), Age: (\\d+), Location: (\\w+)", 0)--return [Name: John, Age: 25, 
Location: NY]
+ */
+@TransformFunction(names = {"regexp_extract_all"})
+public class RegexpExtractAllFunction implements ValueParser {
+
+    private ValueParser inputStringParser;
+
+    private ValueParser patternStringParser;
+
+    private ValueParser indexIntegerParser;
+
+    public RegexpExtractAllFunction(Function expr) {
+        if (expr.getParameters() != null) {
+            List<Expression> expressions = 
expr.getParameters().getExpressions();
+            if (expressions != null && expressions.size() >= 2) {
+                inputStringParser = 
OperatorTools.buildParser(expressions.get(0));
+                patternStringParser = 
OperatorTools.buildParser(expressions.get(1));
+                if (expressions.size() >= 3) {
+                    indexIntegerParser = 
OperatorTools.buildParser(expressions.get(2));
+                }
+            }
+        }
+    }
+
+    @Override
+    public Object parse(SourceData sourceData, int rowIndex, Context context) {
+        if (inputStringParser == null || patternStringParser == null) {
+            return null;
+        }
+        String inputString = 
OperatorTools.parseString(inputStringParser.parse(sourceData, rowIndex, 
context));
+        String patternString = 
OperatorTools.parseString(patternStringParser.parse(sourceData, rowIndex, 
context));
+        int index = 0;
+        if (indexIntegerParser != null) {
+            index = 
OperatorTools.parseBigDecimal(indexIntegerParser.parse(sourceData, rowIndex, 
context)).intValue();
+        }
+        if (index < 0) {
+            return null;
+        }
+        List<String> resultList = new ArrayList<>();
+
+        Pattern pattern = Pattern.compile(patternString);
+        Matcher matcher = pattern.matcher(inputString);
+        while (matcher.find()) {
+            if (index <= matcher.groupCount()) {
+                resultList.add(matcher.group(index));
+            } else {
+                return null;
+            }
+        }
+
+        return resultList.isEmpty() ? null : resultList;
+    }
+}
diff --git 
a/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/RegexpExtractFunction.java
 
b/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/RegexpExtractFunction.java
new file mode 100644
index 0000000000..20a2d28925
--- /dev/null
+++ 
b/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/RegexpExtractFunction.java
@@ -0,0 +1,84 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.inlong.sdk.transform.process.function;
+
+import org.apache.inlong.sdk.transform.decode.SourceData;
+import org.apache.inlong.sdk.transform.process.Context;
+import org.apache.inlong.sdk.transform.process.operator.OperatorTools;
+import org.apache.inlong.sdk.transform.process.parser.ValueParser;
+
+import net.sf.jsqlparser.expression.Expression;
+import net.sf.jsqlparser.expression.Function;
+
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * RegexpExtractFunction
+ * description: REGEXP_EXTRACT(string1, string2[, integer])--Returns a string 
from string1 which extracted with a specified
+ *              regular expression string2 and a regexp match group index 
integer.The regexp match group index starts
+ *              from 1 and 0 means matching the whole regexp. In addition, the 
regexp match group index should not exceed
+ *              the number of the defined groups.
+ * for example: REGEXP_EXTRACT("abc123def", "(\\d+)", 1)--return 123
+ *              REGEXP_EXTRACT("Name: John, Age: 25, Location: NY", "Name: 
(\\w+), Age: (\\d+), Location: (\\w+)", 2)--return 25
+ *              REGEXP_EXTRACT("abc123def", "(\\d+)", 2)--return null
+ *              REGEXP_EXTRACT("abc123def", "abcdef", 1)--return null
+ */
+@TransformFunction(names = {"regexp_extract"})
+public class RegexpExtractFunction implements ValueParser {
+
+    private ValueParser inputStringParser;
+
+    private ValueParser patternStringParser;
+
+    private ValueParser indexIntegerParser;
+
+    public RegexpExtractFunction(Function expr) {
+        if (expr.getParameters() != null) {
+            List<Expression> expressions = 
expr.getParameters().getExpressions();
+            if (expressions != null && expressions.size() >= 3) {
+                inputStringParser = 
OperatorTools.buildParser(expressions.get(0));
+                patternStringParser = 
OperatorTools.buildParser(expressions.get(1));
+                indexIntegerParser = 
OperatorTools.buildParser(expressions.get(2));
+            }
+        }
+    }
+
+    @Override
+    public Object parse(SourceData sourceData, int rowIndex, Context context) {
+        if (inputStringParser == null || patternStringParser == null || 
indexIntegerParser == null) {
+            return null;
+        }
+        String inputString = 
OperatorTools.parseString(inputStringParser.parse(sourceData, rowIndex, 
context));
+        String patternString = 
OperatorTools.parseString(patternStringParser.parse(sourceData, rowIndex, 
context));
+        int indexInteger =
+                
OperatorTools.parseBigDecimal(indexIntegerParser.parse(sourceData, rowIndex, 
context)).intValue();
+        if (indexInteger < 0) {
+            return null;
+        }
+        Pattern pattern = Pattern.compile(patternString);
+        Matcher matcher = pattern.matcher(inputString);
+        if (matcher.find()) {
+            if (indexInteger <= matcher.groupCount()) {
+                return matcher.group(indexInteger);
+            }
+        }
+        return null;
+    }
+}
diff --git 
a/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/RegexpFunction.java
 
b/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/RegexpFunction.java
new file mode 100644
index 0000000000..4e43b5bc9e
--- /dev/null
+++ 
b/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/RegexpFunction.java
@@ -0,0 +1,65 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.inlong.sdk.transform.process.function;
+
+import org.apache.inlong.sdk.transform.decode.SourceData;
+import org.apache.inlong.sdk.transform.process.Context;
+import org.apache.inlong.sdk.transform.process.operator.OperatorTools;
+import org.apache.inlong.sdk.transform.process.parser.ValueParser;
+
+import net.sf.jsqlparser.expression.Expression;
+import net.sf.jsqlparser.expression.Function;
+
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+/**
+ * RegexpFunction
+ * description: REGEX(string1, string2)--Returns TRUE if any (possibly empty) 
substring of string1 matches the Java
+ *              regular expression string2, otherwise FALSE. Returns NULL if 
any of arguments is NULL.
+ *              SIMILAR(string1, string2)--Same as above
+ */
+@TransformFunction(names = {"regex", "similar"})
+public class RegexpFunction implements ValueParser {
+
+    private ValueParser inputParser;
+
+    private ValueParser patternParser;
+
+    public RegexpFunction(Function expr) {
+        if (expr.getParameters() != null) {
+            List<Expression> expressions = 
expr.getParameters().getExpressions();
+            if (expressions != null && expressions.size() == 2) {
+                inputParser = OperatorTools.buildParser(expressions.get(0));
+                patternParser = OperatorTools.buildParser(expressions.get(1));
+            }
+        }
+    }
+
+    @Override
+    public Object parse(SourceData sourceData, int rowIndex, Context context) {
+        if (inputParser == null || patternParser == null) {
+            return null;
+        }
+        String inputString = 
OperatorTools.parseString(inputParser.parse(sourceData, rowIndex, context));
+        String patternString = 
OperatorTools.parseString(patternParser.parse(sourceData, rowIndex, context));
+        Pattern pattern = Pattern.compile(patternString);
+        Matcher matcher = pattern.matcher(inputString);
+        return matcher.find();
+    }
+}
diff --git 
a/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/RegexpInstrFunction.java
 
b/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/RegexpInstrFunction.java
new file mode 100644
index 0000000000..9cd128e532
--- /dev/null
+++ 
b/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/RegexpInstrFunction.java
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.inlong.sdk.transform.process.function;
+
+import org.apache.inlong.sdk.transform.decode.SourceData;
+import org.apache.inlong.sdk.transform.process.Context;
+import org.apache.inlong.sdk.transform.process.operator.OperatorTools;
+import org.apache.inlong.sdk.transform.process.parser.ValueParser;
+
+import net.sf.jsqlparser.expression.Expression;
+import net.sf.jsqlparser.expression.Function;
+
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * RegexpInstrFunction
+ * description: REGEXP_INSTR(str, regexp)--Returns the position of the first 
substring in str that matches regexp.
+ *              Result indexes begin at 1, 0 if there is no match.
+ *              Returns an INTEGER representation of the first matched 
substring index.
+ *              NULL if any of the arguments are NULL or regexp is invalid.
+ */
+@TransformFunction(names = {"regexp_instr"})
+public class RegexpInstrFunction implements ValueParser {
+
+    private ValueParser inputStringParser;
+
+    private ValueParser patternStringParser;
+
+    public RegexpInstrFunction(Function expr) {
+        if (expr.getParameters() != null) {
+            List<Expression> expressions = 
expr.getParameters().getExpressions();
+            if (expressions != null && expressions.size() >= 2) {
+                inputStringParser = 
OperatorTools.buildParser(expressions.get(0));
+                patternStringParser = 
OperatorTools.buildParser(expressions.get(1));
+            }
+        }
+    }
+
+    @Override
+    public Object parse(SourceData sourceData, int rowIndex, Context context) {
+        if (inputStringParser == null || patternStringParser == null) {
+            return null;
+        }
+        String inputString = 
OperatorTools.parseString(inputStringParser.parse(sourceData, rowIndex, 
context));
+        String patternString = 
OperatorTools.parseString(patternStringParser.parse(sourceData, rowIndex, 
context));
+        Pattern pattern = Pattern.compile(patternString);
+        Matcher matcher = pattern.matcher(inputString);
+        if (matcher.find()) {
+            return matcher.start() + 1;
+        } else {
+            return 0;
+        }
+    }
+}
diff --git 
a/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/RegexpReplaceFunction.java
 
b/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/RegexpReplaceFunction.java
new file mode 100644
index 0000000000..834739b801
--- /dev/null
+++ 
b/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/RegexpReplaceFunction.java
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.inlong.sdk.transform.process.function;
+
+import org.apache.inlong.sdk.transform.decode.SourceData;
+import org.apache.inlong.sdk.transform.process.Context;
+import org.apache.inlong.sdk.transform.process.operator.OperatorTools;
+import org.apache.inlong.sdk.transform.process.parser.ValueParser;
+
+import net.sf.jsqlparser.expression.Expression;
+import net.sf.jsqlparser.expression.Function;
+
+import java.util.List;
+import java.util.regex.Pattern;
+
+/**
+ * RegexpReplaceFunction
+ * description: REGEXP_REPLACE(string1, string2, string3)--Returns a string 
from STRING1 with all the substrings that
+ *              match a regular expression STRING2 consecutively being 
replaced with STRING3.
+ */
+@TransformFunction(names = {"regexp_replace"})
+public class RegexpReplaceFunction implements ValueParser {
+
+    private ValueParser inputStringParser;
+
+    private ValueParser patternStringParser;
+
+    private ValueParser replaceStringParser;
+
+    public RegexpReplaceFunction(Function expr) {
+        if (expr.getParameters() != null) {
+            List<Expression> expressions = 
expr.getParameters().getExpressions();
+            if (expressions != null && expressions.size() >= 3) {
+                inputStringParser = 
OperatorTools.buildParser(expressions.get(0));
+                patternStringParser = 
OperatorTools.buildParser(expressions.get(1));
+                replaceStringParser = 
OperatorTools.buildParser(expressions.get(2));
+            }
+        }
+    }
+
+    @Override
+    public Object parse(SourceData sourceData, int rowIndex, Context context) {
+        if (inputStringParser == null || patternStringParser == null || 
replaceStringParser == null) {
+            return null;
+        }
+        String inputString = 
OperatorTools.parseString(inputStringParser.parse(sourceData, rowIndex, 
context));
+        String patternString = 
OperatorTools.parseString(patternStringParser.parse(sourceData, rowIndex, 
context));
+        String replaceString = 
OperatorTools.parseString(replaceStringParser.parse(sourceData, rowIndex, 
context));
+        Pattern pattern = Pattern.compile(patternString);
+        return pattern.matcher(inputString).replaceAll(replaceString);
+    }
+}
diff --git 
a/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/RegexpSubstrFunction.java
 
b/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/RegexpSubstrFunction.java
new file mode 100644
index 0000000000..9e2a46af36
--- /dev/null
+++ 
b/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/RegexpSubstrFunction.java
@@ -0,0 +1,70 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.inlong.sdk.transform.process.function;
+
+import org.apache.inlong.sdk.transform.decode.SourceData;
+import org.apache.inlong.sdk.transform.process.Context;
+import org.apache.inlong.sdk.transform.process.operator.OperatorTools;
+import org.apache.inlong.sdk.transform.process.parser.ValueParser;
+
+import net.sf.jsqlparser.expression.Expression;
+import net.sf.jsqlparser.expression.Function;
+
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * RegexpSubstrFunction
+ * description: REGEXP_SUBSTR(str, regexp)--Returns the first substring in str 
that matches regexp.
+ *              Returns an STRING representation of the first matched 
substring. NULL if any of the arguments are NULL
+ *              or regexp if invalid or pattern is not found.
+ */
+@TransformFunction(names = {"regex_substr"})
+public class RegexpSubstrFunction implements ValueParser {
+
+    private ValueParser inputStringParser;
+
+    private ValueParser patternStringParser;
+
+    public RegexpSubstrFunction(Function expr) {
+        if (expr.getParameters() != null) {
+            List<Expression> expressions = 
expr.getParameters().getExpressions();
+            if (expressions != null && expressions.size() >= 2) {
+                inputStringParser = 
OperatorTools.buildParser(expressions.get(0));
+                patternStringParser = 
OperatorTools.buildParser(expressions.get(1));
+            }
+        }
+    }
+
+    @Override
+    public Object parse(SourceData sourceData, int rowIndex, Context context) {
+        if (inputStringParser == null || patternStringParser == null) {
+            return null;
+        }
+        String inputString = 
OperatorTools.parseString(inputStringParser.parse(sourceData, rowIndex, 
context));
+        String patternString = 
OperatorTools.parseString(patternStringParser.parse(sourceData, rowIndex, 
context));
+        Pattern pattern = Pattern.compile(patternString);
+        Matcher matcher = pattern.matcher(inputString);
+        if (matcher.find()) {
+            return matcher.group(0);
+        } else {
+            return null;
+        }
+    }
+}
diff --git 
a/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestRegexExtractFunction.java
 
b/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestRegexExtractFunction.java
new file mode 100644
index 0000000000..4749cd3959
--- /dev/null
+++ 
b/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestRegexExtractFunction.java
@@ -0,0 +1,91 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.inlong.sdk.transform.process.function.string;
+
+import org.apache.inlong.sdk.transform.decode.SourceDecoderFactory;
+import org.apache.inlong.sdk.transform.encode.SinkEncoderFactory;
+import org.apache.inlong.sdk.transform.pojo.TransformConfig;
+import org.apache.inlong.sdk.transform.process.TransformProcessor;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.util.HashMap;
+import java.util.List;
+import java.util.regex.Pattern;
+import java.util.regex.PatternSyntaxException;
+
+import static org.junit.Assert.assertThrows;
+import static org.junit.Assert.assertTrue;
+
+public class TestRegexExtractFunction extends AbstractFunctionStringTestBase {
+
+    @Test
+    public void testRegexpExtractFunction() throws Exception {
+        String transformSql1 = "select 
regexp_extract(string1,string2,numeric1) from source";
+        TransformConfig config1 = new TransformConfig(transformSql1);
+        TransformProcessor<String, String> processor1 = TransformProcessor
+                .create(config1, 
SourceDecoderFactory.createCsvDecoder(csvSource),
+                        SinkEncoderFactory.createKvEncoder(kvSink));
+        // case1: regexp_extract("abc123def", "(\\d+)", 1)
+        List<String> output1 = 
processor1.transform("abc123def|(\\\\d+)|2|1|3|4", new HashMap<>());
+        Assert.assertEquals(1, output1.size());
+        Assert.assertEquals(output1.get(0), "result=123");
+        // case2: regexp_extract("abc123def123", "(\\d+)", 0)
+        List<String> output2 = 
processor1.transform("abc123def124|(\\\\d+)|1|0|3", new HashMap<>());
+        Assert.assertEquals(1, output2.size());
+        Assert.assertEquals(output2.get(0), "result=123");
+        // case3: regexp_extract("Name: John, Age: 25, Location: NY", "Name: 
(\\w+), Age: (\\d+), Location: (\\w+)", 2)
+        List<String> output3 = processor1.transform(
+                "Name: John, Age: 25, Location: NY|Name: (\\\\w+), Age: 
(\\\\d+), Location: (\\\\w+)|1|2|3",
+                new HashMap<>());
+        Assert.assertEquals(1, output3.size());
+        Assert.assertEquals(output3.get(0), "result=25");
+        // case4: regexp_extract("Email: john....@example.com", 
"([a-zA-Z]+)\\.([a-zA-Z]+)@([a-zA-Z]+)\\.([a-zA-Z]+)",
+        // 3)
+        List<String> output4 = processor1.transform(
+                "Email: 
john....@example.com|([a-zA-Z]+)\\\\.([a-zA-Z]+)@([a-zA-Z]+)\\\\.([a-zA-Z]+)|1|3|2",
+                new HashMap<>());
+        Assert.assertEquals(1, output4.size());
+        Assert.assertEquals(output4.get(0), "result=example");
+
+        String transformSql2 = "select regexp_extract(string1) from source";
+        TransformConfig config2 = new TransformConfig(transformSql2);
+        TransformProcessor<String, String> processor2 = TransformProcessor
+                .create(config2, 
SourceDecoderFactory.createCsvDecoder(csvSource),
+                        SinkEncoderFactory.createKvEncoder(kvSink));
+        // case4: regexp_extract("The quick brown fox quick")
+        List<String> output5 =
+                processor2.transform("The quick brown fox 
quick|quick|QAQ|2|1|3", new HashMap<>());
+        Assert.assertEquals(1, output5.size());
+        Assert.assertEquals(output5.get(0), "result=");
+        String transformSql3 = "select regexp_extract(string1,string2) from 
source";
+        TransformConfig config3 = new TransformConfig(transformSql3);
+        TransformProcessor<String, String> processor3 = TransformProcessor
+                .create(config3, 
SourceDecoderFactory.createCsvDecoder(csvSource),
+                        SinkEncoderFactory.createKvEncoder(kvSink));
+        // case5: regexp_extract("The quick brown fox quick", "[q-")
+        List<String> output6 =
+                processor3.transform("The quick brown fox 
quick|[q-|QAQ|2|1|3", new HashMap<>());
+        Assert.assertEquals(1, output6.size());
+        PatternSyntaxException exception = 
assertThrows(PatternSyntaxException.class, () -> {
+            Pattern.compile("[q-");
+        });
+        assertTrue(exception.getMessage().contains("Illegal character range 
near index 3"));
+    }
+}
diff --git 
a/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestRegexpCountFunction.java
 
b/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestRegexpCountFunction.java
new file mode 100644
index 0000000000..b0c83c86c9
--- /dev/null
+++ 
b/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestRegexpCountFunction.java
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.inlong.sdk.transform.process.function.string;
+
+import org.apache.inlong.sdk.transform.decode.SourceDecoderFactory;
+import org.apache.inlong.sdk.transform.encode.SinkEncoderFactory;
+import org.apache.inlong.sdk.transform.pojo.TransformConfig;
+import org.apache.inlong.sdk.transform.process.TransformProcessor;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.util.HashMap;
+import java.util.List;
+import java.util.regex.Pattern;
+import java.util.regex.PatternSyntaxException;
+
+import static org.junit.Assert.assertThrows;
+import static org.junit.Assert.assertTrue;
+
+public class TestRegexpCountFunction extends AbstractFunctionStringTestBase {
+
+    @Test
+    public void testRegexpCountFunction() throws Exception {
+        String transformSql1 = "select regexp_count(string1,string2) from 
source";
+        TransformConfig config1 = new TransformConfig(transformSql1);
+        TransformProcessor<String, String> processor1 = TransformProcessor
+                .create(config1, 
SourceDecoderFactory.createCsvDecoder(csvSource),
+                        SinkEncoderFactory.createKvEncoder(kvSink));
+        // case1: regexp_count("The quick brown fox quick", "quick")
+        List<String> output1 = processor1.transform("The quick brown fox 
quick|quick|slow|2|1|3", new HashMap<>());
+        Assert.assertEquals(1, output1.size());
+        Assert.assertEquals(output1.get(0), "result=2");
+        // case2: regexp_count("The quick brown fox quick", "slow")
+        List<String> output2 = processor1.transform("The quick brown fox 
quick|slow|2|1|3", new HashMap<>());
+        Assert.assertEquals(1, output2.size());
+        Assert.assertEquals(output2.get(0), "result=0");
+        String transformSql2 = "select regexp_count(string1) from source";
+        TransformConfig config2 = new TransformConfig(transformSql2);
+        TransformProcessor<String, String> processor2 = TransformProcessor
+                .create(config2, 
SourceDecoderFactory.createCsvDecoder(csvSource),
+                        SinkEncoderFactory.createKvEncoder(kvSink));
+        // case3: regexp_count("The quick brown fox quick")
+        List<String> output3 =
+                processor2.transform("The quick brown fox 
quick|quick|QAQ|2|1|3", new HashMap<>());
+        Assert.assertEquals(1, output3.size());
+        Assert.assertEquals(output3.get(0), "result=");
+        String transformSql3 = "select regexp_count(string1,string2) from 
source";
+        TransformConfig config3 = new TransformConfig(transformSql3);
+        TransformProcessor<String, String> processor3 = TransformProcessor
+                .create(config3, 
SourceDecoderFactory.createCsvDecoder(csvSource),
+                        SinkEncoderFactory.createKvEncoder(kvSink));
+        // case4: regexp_count("The quick brown fox quick", "[q-")
+        List<String> output4 =
+                processor3.transform("The quick brown fox 
quick|[q-|QAQ|2|1|3", new HashMap<>());
+        Assert.assertEquals(1, output4.size());
+        PatternSyntaxException exception = 
assertThrows(PatternSyntaxException.class, () -> {
+            Pattern.compile("[q-");
+        });
+        assertTrue(exception.getMessage().contains("Illegal character range 
near index 3"));
+    }
+}
diff --git 
a/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestRegexpExtractAllFunction.java
 
b/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestRegexpExtractAllFunction.java
new file mode 100644
index 0000000000..1774a52d09
--- /dev/null
+++ 
b/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestRegexpExtractAllFunction.java
@@ -0,0 +1,92 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.inlong.sdk.transform.process.function.string;
+
+import org.apache.inlong.sdk.transform.decode.SourceDecoderFactory;
+import org.apache.inlong.sdk.transform.encode.SinkEncoderFactory;
+import org.apache.inlong.sdk.transform.pojo.TransformConfig;
+import org.apache.inlong.sdk.transform.process.TransformProcessor;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.util.HashMap;
+import java.util.List;
+import java.util.regex.Pattern;
+import java.util.regex.PatternSyntaxException;
+
+import static org.junit.Assert.assertThrows;
+import static org.junit.Assert.assertTrue;
+
+public class TestRegexpExtractAllFunction extends 
AbstractFunctionStringTestBase {
+
+    @Test
+    public void testRegexpExtractAllFunction() throws Exception {
+        String transformSql1 = "select 
regexp_extract_all(string1,string2,numeric1) from source";
+        TransformConfig config1 = new TransformConfig(transformSql1);
+        TransformProcessor<String, String> processor1 = TransformProcessor
+                .create(config1, 
SourceDecoderFactory.createCsvDecoder(csvSource),
+                        SinkEncoderFactory.createKvEncoder(kvSink));
+        // case1: regexp_extract_all("abc123def456ghi789", "(\\d+)", 1)
+        List<String> output1 = 
processor1.transform("abc123def456ghi789|(\\\\d+)|2|1|3|4", new HashMap<>());
+        Assert.assertEquals(1, output1.size());
+        Assert.assertEquals(output1.get(0), "result=[123, 456, 789]");
+        // case2: regexp_extract_all("abc123def124", "(\\d+)", 0)
+        List<String> output2 = 
processor1.transform("abc123def124|(\\\\d+)|1|0|3", new HashMap<>());
+        Assert.assertEquals(1, output2.size());
+        Assert.assertEquals(output2.get(0), "result=[123, 124]");
+        // case3: regexp_extract_all("Name: John, Age: 25, Location: NY", 
"Name: (\\w+), Age: (\\d+)
+        // , Location: (\\w+)", 1)
+        List<String> output3 = processor1.transform(
+                "Name: John, Age: 25, Location: NY|Name: (\\\\w+), Age: 
(\\\\d+), Location: (\\\\w+)|2|1|3",
+                new HashMap<>());
+        Assert.assertEquals(1, output3.size());
+        Assert.assertEquals(output3.get(0), "result=[John]");
+        // case4: regexp_extract_all("Name: John, Age: 25, Location: NY", 
"Name: (\\w+), Age: (\\d+)
+        // , Location: (\\w+)", 4)
+        List<String> output4 = processor1.transform(
+                "Name: John, Age: 25, Location: NY|Name: (\\\\w+), Age: 
(\\\\d+), Location: (\\\\w+)|1|4|3",
+                new HashMap<>());
+        Assert.assertEquals(1, output4.size());
+        Assert.assertEquals(output4.get(0), "result=");
+
+        String transformSql2 = "select regexp_extract_all(string1,string2) 
from source";
+        TransformConfig config2 = new TransformConfig(transformSql2);
+        TransformProcessor<String, String> processor2 = TransformProcessor
+                .create(config2, 
SourceDecoderFactory.createCsvDecoder(csvSource),
+                        SinkEncoderFactory.createKvEncoder(kvSink));
+        // case4: regexp_extract_all("The quick brown fox quick",quick)
+        List<String> output5 =
+                processor2.transform("The quick brown fox 
quick|quick|QAQ|2|1|3", new HashMap<>());
+        Assert.assertEquals(1, output5.size());
+        Assert.assertEquals(output5.get(0), "result=[quick, quick]");
+        String transformSql3 = "select regexp_extract_all(string1,string2) 
from source";
+        TransformConfig config3 = new TransformConfig(transformSql3);
+        TransformProcessor<String, String> processor3 = TransformProcessor
+                .create(config3, 
SourceDecoderFactory.createCsvDecoder(csvSource),
+                        SinkEncoderFactory.createKvEncoder(kvSink));
+        // case5: regexp_extract_all("The quick brown fox quick", "[q-")
+        List<String> output6 =
+                processor3.transform("The quick brown fox 
quick|[q-|QAQ|2|1|3", new HashMap<>());
+        Assert.assertEquals(1, output6.size());
+        PatternSyntaxException exception = 
assertThrows(PatternSyntaxException.class, () -> {
+            Pattern.compile("[q-");
+        });
+        assertTrue(exception.getMessage().contains("Illegal character range 
near index 3"));
+    }
+}
diff --git 
a/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestRegexpFunction.java
 
b/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestRegexpFunction.java
new file mode 100644
index 0000000000..c050689772
--- /dev/null
+++ 
b/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestRegexpFunction.java
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.inlong.sdk.transform.process.function.string;
+
+import org.apache.inlong.sdk.transform.decode.SourceDecoderFactory;
+import org.apache.inlong.sdk.transform.encode.SinkEncoderFactory;
+import org.apache.inlong.sdk.transform.pojo.TransformConfig;
+import org.apache.inlong.sdk.transform.process.TransformProcessor;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.util.HashMap;
+import java.util.List;
+
+public class TestRegexpFunction extends AbstractFunctionStringTestBase {
+
+    @Test
+    public void testRegexFunction() throws Exception {
+        String transformSql1 = "select regex(string1, string2) from source";
+        TransformConfig config1 = new TransformConfig(transformSql1);
+        TransformProcessor<String, String> processor1 = TransformProcessor
+                .create(config1, 
SourceDecoderFactory.createCsvDecoder(csvSource),
+                        SinkEncoderFactory.createKvEncoder(kvSink));
+        // case1: regexp("The quick brown fox", "quick")
+        List<String> output1 = processor1.transform("The quick brown 
fox|quick|5|2|1|3", new HashMap<>());
+        Assert.assertEquals(1, output1.size());
+        Assert.assertEquals(output1.get(0), "result=true");
+
+        // case2: regexp("The quick brown fox", "cold")
+        List<String> output2 = processor1.transform("The quick brown 
fox|cold|5|2|1|3", new HashMap<>());
+        Assert.assertEquals(1, output2.size());
+        Assert.assertEquals(output2.get(0), "result=false");
+
+        String transformSql2 = "select regex(string1) from source";
+        TransformConfig config2 = new TransformConfig(transformSql2);
+        TransformProcessor<String, String> processor2 = TransformProcessor
+                .create(config2, 
SourceDecoderFactory.createCsvDecoder(csvSource),
+                        SinkEncoderFactory.createKvEncoder(kvSink));
+        // case3: regexp("User: Alice, ID: 12345")
+        List<String> output3 =
+                processor2.transform("User: Alice, ID: 12345|User: (\\\\w+), 
ID: (\\\\d+)|5|2|1|3", new HashMap<>());
+        Assert.assertEquals(1, output3.size());
+        Assert.assertEquals(output3.get(0), "result=");
+
+        String transformSql3 = "select similar(string1, string2) from source";
+        TransformConfig config3 = new TransformConfig(transformSql3);
+        TransformProcessor<String, String> processor3 = TransformProcessor
+                .create(config3, 
SourceDecoderFactory.createCsvDecoder(csvSource),
+                        SinkEncoderFactory.createKvEncoder(kvSink));
+
+        // case4: similar("The quick brown fox", "quick")
+        List<String> output4 = processor3.transform("The quick brown 
fox|quick|5|2|1|3", new HashMap<>());
+        Assert.assertEquals(1, output4.size());
+        Assert.assertEquals(output4.get(0), "result=true");
+
+        // case5: similar("The quick brown fox", "cold")
+        List<String> output5 = processor3.transform("The quick brown 
fox|cold|5|2|1|3", new HashMap<>());
+        Assert.assertEquals(1, output5.size());
+        Assert.assertEquals(output5.get(0), "result=false");
+    }
+}
diff --git 
a/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestRegexpInstrFunction.java
 
b/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestRegexpInstrFunction.java
new file mode 100644
index 0000000000..b65a2e9801
--- /dev/null
+++ 
b/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestRegexpInstrFunction.java
@@ -0,0 +1,84 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.inlong.sdk.transform.process.function.string;
+
+import org.apache.inlong.sdk.transform.decode.SourceDecoderFactory;
+import org.apache.inlong.sdk.transform.encode.SinkEncoderFactory;
+import org.apache.inlong.sdk.transform.pojo.TransformConfig;
+import org.apache.inlong.sdk.transform.process.TransformProcessor;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.util.HashMap;
+import java.util.List;
+import java.util.regex.Pattern;
+import java.util.regex.PatternSyntaxException;
+
+import static org.junit.Assert.assertThrows;
+import static org.junit.Assert.assertTrue;
+
+public class TestRegexpInstrFunction extends AbstractFunctionStringTestBase {
+
+    @Test
+    public void testRegexpInstrFunction() throws Exception {
+        String transformSql1 = "select regexp_instr(string1,string2) from 
source";
+        TransformConfig config1 = new TransformConfig(transformSql1);
+        TransformProcessor<String, String> processor1 = TransformProcessor
+                .create(config1, 
SourceDecoderFactory.createCsvDecoder(csvSource),
+                        SinkEncoderFactory.createKvEncoder(kvSink));
+        // case1: regexp_instr("abc123def", "(\\d+)")
+        List<String> output1 = 
processor1.transform("abc123def|(\\\\d+)|2|1|3|4", new HashMap<>());
+        Assert.assertEquals(1, output1.size());
+        Assert.assertEquals(output1.get(0), "result=4");
+        // case2: regexp_instr("hello world!", "world")
+        List<String> output2 = processor1.transform("hello 
world!|world|1|0|3", new HashMap<>());
+        Assert.assertEquals(1, output2.size());
+        Assert.assertEquals(output2.get(0), "result=7");
+        // case3: regexp_instr("abcdef", "\\d+")
+        List<String> output3 = processor1.transform(
+                "abcdef|\\\\d+|1|2|3",
+                new HashMap<>());
+        Assert.assertEquals(1, output3.size());
+        Assert.assertEquals(output3.get(0), "result=0");
+
+        String transformSql2 = "select regexp_instr(string1) from source";
+        TransformConfig config2 = new TransformConfig(transformSql2);
+        TransformProcessor<String, String> processor2 = TransformProcessor
+                .create(config2, 
SourceDecoderFactory.createCsvDecoder(csvSource),
+                        SinkEncoderFactory.createKvEncoder(kvSink));
+        // case4: regexp_instr("The quick brown fox quick")
+        List<String> output5 =
+                processor2.transform("The quick brown fox 
quick|quick|QAQ|2|1|3", new HashMap<>());
+        Assert.assertEquals(1, output5.size());
+        Assert.assertEquals(output5.get(0), "result=");
+        String transformSql3 = "select regexp_instr(string1,string2) from 
source";
+        TransformConfig config3 = new TransformConfig(transformSql3);
+        TransformProcessor<String, String> processor3 = TransformProcessor
+                .create(config3, 
SourceDecoderFactory.createCsvDecoder(csvSource),
+                        SinkEncoderFactory.createKvEncoder(kvSink));
+        // case5: regexp_instr("abc123def", "[q-")
+        List<String> output6 =
+                processor3.transform("abc123def|[q-|QAQ|2|1|3", new 
HashMap<>());
+        Assert.assertEquals(1, output6.size());
+        PatternSyntaxException exception = 
assertThrows(PatternSyntaxException.class, () -> {
+            Pattern.compile("[q-");
+        });
+        assertTrue(exception.getMessage().contains("Illegal character range 
near index 3"));
+    }
+}
diff --git 
a/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestRegexpReplaceFunction.java
 
b/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestRegexpReplaceFunction.java
new file mode 100644
index 0000000000..08cd9fce93
--- /dev/null
+++ 
b/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestRegexpReplaceFunction.java
@@ -0,0 +1,65 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.inlong.sdk.transform.process.function.string;
+
+import org.apache.inlong.sdk.transform.decode.SourceDecoderFactory;
+import org.apache.inlong.sdk.transform.encode.SinkEncoderFactory;
+import org.apache.inlong.sdk.transform.pojo.TransformConfig;
+import org.apache.inlong.sdk.transform.process.TransformProcessor;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.util.HashMap;
+import java.util.List;
+
+public class TestRegexpReplaceFunction extends AbstractFunctionStringTestBase {
+
+    @Test
+    public void testRegexpReplaceFunction() throws Exception {
+        String transformSql1 = "select regexp_replace(string1,string2,string3) 
from source";
+        TransformConfig config1 = new TransformConfig(transformSql1);
+        TransformProcessor<String, String> processor1 = TransformProcessor
+                .create(config1, 
SourceDecoderFactory.createCsvDecoder(csvSource),
+                        SinkEncoderFactory.createKvEncoder(kvSink));
+        // case1: regexp_replace("The quick brown fox quick", "quick", "slow")
+        List<String> output1 = processor1.transform("The quick brown fox 
quick|quick|slow|2|1|3", new HashMap<>());
+        Assert.assertEquals(1, output1.size());
+        Assert.assertEquals(output1.get(0), "result=The slow brown fox slow");
+        String transformSql2 = "select regexp_replace(string1,string2,string3) 
from source";
+        TransformConfig config2 = new TransformConfig(transformSql2);
+        TransformProcessor<String, String> processor2 = TransformProcessor
+                .create(config2, 
SourceDecoderFactory.createCsvDecoder(csvSource),
+                        SinkEncoderFactory.createKvEncoder(kvSink));
+        // case2: regexp_replace("User: Alice, ID: 12345", "\\d+", "QAQ")
+        List<String> output2 =
+                processor2.transform("User: Alice, ID: 
12345|\\\\d+|QAQ|2|1|3", new HashMap<>());
+        Assert.assertEquals(1, output2.size());
+        Assert.assertEquals(output2.get(0), "result=User: Alice, ID: QAQ");
+        String transformSql3 = "select regexp_replace(string1,string2) from 
source";
+        TransformConfig config3 = new TransformConfig(transformSql3);
+        TransformProcessor<String, String> processor3 = TransformProcessor
+                .create(config3, 
SourceDecoderFactory.createCsvDecoder(csvSource),
+                        SinkEncoderFactory.createKvEncoder(kvSink));
+        // case3: regexp_replace("User: Alice, ID: 12345", "\\d+")
+        List<String> output3 =
+                processor3.transform("User: Alice, ID: 
12345|\\\\d+|QAQ|2|1|3", new HashMap<>());
+        Assert.assertEquals(1, output3.size());
+        Assert.assertEquals(output3.get(0), "result=");
+    }
+}
diff --git 
a/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestRegexpSubstrFunction.java
 
b/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestRegexpSubstrFunction.java
new file mode 100644
index 0000000000..eb5cea2176
--- /dev/null
+++ 
b/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestRegexpSubstrFunction.java
@@ -0,0 +1,84 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.inlong.sdk.transform.process.function.string;
+
+import org.apache.inlong.sdk.transform.decode.SourceDecoderFactory;
+import org.apache.inlong.sdk.transform.encode.SinkEncoderFactory;
+import org.apache.inlong.sdk.transform.pojo.TransformConfig;
+import org.apache.inlong.sdk.transform.process.TransformProcessor;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.util.HashMap;
+import java.util.List;
+import java.util.regex.Pattern;
+import java.util.regex.PatternSyntaxException;
+
+import static org.junit.Assert.assertThrows;
+import static org.junit.Assert.assertTrue;
+
+public class TestRegexpSubstrFunction extends AbstractFunctionStringTestBase {
+
+    @Test
+    public void testRegexpSubstrFunction() throws Exception {
+        String transformSql1 = "select regex_substr(string1,string2) from 
source";
+        TransformConfig config1 = new TransformConfig(transformSql1);
+        TransformProcessor<String, String> processor1 = TransformProcessor
+                .create(config1, 
SourceDecoderFactory.createCsvDecoder(csvSource),
+                        SinkEncoderFactory.createKvEncoder(kvSink));
+        // case1: regex_substr("abc123def", "(\\d+)")
+        List<String> output1 = 
processor1.transform("abc123def|(\\\\d+)|2|1|3|4", new HashMap<>());
+        Assert.assertEquals(1, output1.size());
+        Assert.assertEquals(output1.get(0), "result=123");
+        // case2: regex_substr("hello world!", "\\w+")
+        List<String> output2 = processor1.transform("hello 
world!|\\\\w+|1|0|3", new HashMap<>());
+        Assert.assertEquals(1, output2.size());
+        Assert.assertEquals(output2.get(0), "result=hello");
+        // case3: regex_substr("abcdef", "\\d+")
+        List<String> output3 = processor1.transform(
+                "abcdef|\\\\d+|1|2|3",
+                new HashMap<>());
+        Assert.assertEquals(1, output3.size());
+        Assert.assertEquals(output3.get(0), "result=");
+
+        String transformSql2 = "select regex_substr(string1) from source";
+        TransformConfig config2 = new TransformConfig(transformSql2);
+        TransformProcessor<String, String> processor2 = TransformProcessor
+                .create(config2, 
SourceDecoderFactory.createCsvDecoder(csvSource),
+                        SinkEncoderFactory.createKvEncoder(kvSink));
+        // case4: regex_substr("The quick brown fox quick")
+        List<String> output5 =
+                processor2.transform("The quick brown fox 
quick|quick|QAQ|2|1|3", new HashMap<>());
+        Assert.assertEquals(1, output5.size());
+        Assert.assertEquals(output5.get(0), "result=");
+        String transformSql3 = "select regex_substr(string1,string2) from 
source";
+        TransformConfig config3 = new TransformConfig(transformSql3);
+        TransformProcessor<String, String> processor3 = TransformProcessor
+                .create(config3, 
SourceDecoderFactory.createCsvDecoder(csvSource),
+                        SinkEncoderFactory.createKvEncoder(kvSink));
+        // case5: regex_substr("abc123def", "[q-")
+        List<String> output6 =
+                processor3.transform("abc123def|[q-|QAQ|2|1|3", new 
HashMap<>());
+        Assert.assertEquals(1, output6.size());
+        PatternSyntaxException exception = 
assertThrows(PatternSyntaxException.class, () -> {
+            Pattern.compile("[q-");
+        });
+        assertTrue(exception.getMessage().contains("Illegal character range 
near index 3"));
+    }
+}

Reply via email to