This is an automated email from the ASF dual-hosted git repository.

morrysnow pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 5e64736aa96 [fix](Nereids) string constant folding process regex delim 
by mistake (#48783)
5e64736aa96 is described below

commit 5e64736aa963c163247b3ac777091d4a11cf899a
Author: morrySnow <zhangwen...@selectdb.com>
AuthorDate: Fri Mar 7 16:36:30 2025 +0800

    [fix](Nereids) string constant folding process regex delim by mistake 
(#48783)
    
    ### What problem does this PR solve?
    
    Related PR: #40441
    
    Problem Summary:
    
    The delimiters in split-related string functions that have special
    meanings in regular expressions should be escaped.
    
    .$|()[{^?*+\
---
 .../functions/executable/StringArithmetic.java     |  57 +++++-----
 .../fold_constant_string_arithmatic.groovy         | 123 ++++++++++++++++++++-
 2 files changed, 149 insertions(+), 31 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/executable/StringArithmetic.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/executable/StringArithmetic.java
index bc056a03bcb..18ec333882c 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/executable/StringArithmetic.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/executable/StringArithmetic.java
@@ -38,6 +38,9 @@ import 
org.apache.doris.nereids.trees.expressions.literal.StringLikeLiteral;
 import org.apache.doris.nereids.trees.expressions.literal.StringLiteral;
 import org.apache.doris.nereids.trees.expressions.literal.TinyIntLiteral;
 import org.apache.doris.nereids.trees.expressions.literal.VarcharLiteral;
+import org.apache.doris.nereids.types.ArrayType;
+
+import com.google.common.collect.ImmutableList;
 
 import java.io.UnsupportedEncodingException;
 import java.net.MalformedURLException;
@@ -50,6 +53,7 @@ import java.security.MessageDigest;
 import java.security.NoSuchAlgorithmException;
 import java.util.ArrayList;
 import java.util.List;
+import java.util.regex.Pattern;
 
 /**
  * executable functions:
@@ -658,14 +662,18 @@ public class StringArithmetic {
     }
 
     /**
-     * Executable arithmetic functions split_by_char
+     * Executable arithmetic functions split_by_string
      */
-    @ExecFunction(name = "split_by_char")
-    public static Expression splitByChar(StringLikeLiteral first, 
StringLikeLiteral second) {
-        String[] result = first.getValue().split(second.getValue(), -1);
+    @ExecFunction(name = "split_by_string")
+    public static Expression splitByString(StringLikeLiteral first, 
StringLikeLiteral second) {
+        if (first.getValue().isEmpty()) {
+            return new ArrayLiteral(ImmutableList.of(), 
ArrayType.of(first.getDataType()));
+        }
+        int limit = second.getValue().isEmpty() ? 0 : -1;
+        String[] result = 
first.getValue().split(Pattern.quote(second.getValue()), limit);
         List<Literal> items = new ArrayList<>();
-        for (int i = 1; i < result.length; i++) {
-            items.add((Literal) castStringLikeLiteral(first, result[i]));
+        for (String s : result) {
+            items.add((Literal) castStringLikeLiteral(first, s));
         }
         return new ArrayLiteral(items);
     }
@@ -675,35 +683,34 @@ public class StringArithmetic {
      */
     @ExecFunction(name = "split_part")
     public static Expression splitPart(StringLikeLiteral first, 
StringLikeLiteral chr, IntegerLiteral number) {
+        if (number.getValue() == 0) {
+            return new NullLiteral(first.getDataType());
+        }
+        if (chr.getValue().isEmpty()) {
+            return castStringLikeLiteral(first, "");
+        }
+        if (first.getValue().isEmpty()) {
+            return new NullLiteral(first.getDataType());
+        }
         if (first.getValue().equals(chr.getValue())) {
             if (Math.abs(number.getValue()) == 1 || 
Math.abs(number.getValue()) == 2) {
                 return castStringLikeLiteral(first, "");
+            } else {
+                return new NullLiteral(first.getDataType());
             }
         }
         String separator = chr.getValue();
-        String[] parts = null;
+        String[] parts;
         if (number.getValue() < 0) {
             StringBuilder sb = new StringBuilder(first.getValue());
-            StringBuilder seperatorBuilder = new StringBuilder(separator);
-            separator = seperatorBuilder.reverse().toString();
-            if (".$|()[{^?*+\\".contains(separator) || 
separator.startsWith("\\")) {
-                separator = "\\" + separator;
-            }
-            parts = sb.reverse().toString().split(separator, -1);
+            StringBuilder separatorBuilder = new StringBuilder(separator);
+            separator = separatorBuilder.reverse().toString();
+            parts = sb.reverse().toString().split(Pattern.quote(separator), 
-1);
         } else {
-            if (".$|()[{^?*+\\".contains(separator) || 
separator.startsWith("\\")) {
-                separator = "\\" + separator;
-            }
-            parts = first.getValue().split(separator, -1);
+            parts = first.getValue().split(Pattern.quote(separator), -1);
         }
 
-        if (parts.length < Math.abs(number.getValue()) || number.getValue() == 
0) {
-            if (parts.length == Math.abs(number.getValue())) {
-                if (number.getValue() < 0 && 
first.getValue().startsWith(chr.getValue())
-                        || number.getValue() > 0 && 
first.getValue().endsWith(chr.getValue())) {
-                    return castStringLikeLiteral(first, "");
-                }
-            }
+        if (parts.length < Math.abs(number.getValue())) {
             return new NullLiteral(first.getDataType());
         } else if (number.getValue() < 0) {
             StringBuilder result = new 
StringBuilder(parts[Math.abs(number.getValue()) - 1]);
@@ -721,7 +728,7 @@ public class StringArithmetic {
         if (chr.getValue().isEmpty()) {
             return chr;
         }
-        String[] parts = first.getValue().split(chr.getValue(), -1);
+        String[] parts = first.getValue().split(Pattern.quote(chr.getValue()), 
-1);
         if (Math.abs(number.getValue()) >= parts.length) {
             return first;
         }
diff --git 
a/regression-test/suites/nereids_p0/expression/fold_constant/fold_constant_string_arithmatic.groovy
 
b/regression-test/suites/nereids_p0/expression/fold_constant/fold_constant_string_arithmatic.groovy
index e3ed0024b6f..0f2ddac1fa9 100644
--- 
a/regression-test/suites/nereids_p0/expression/fold_constant/fold_constant_string_arithmatic.groovy
+++ 
b/regression-test/suites/nereids_p0/expression/fold_constant/fold_constant_string_arithmatic.groovy
@@ -461,18 +461,80 @@ suite("fold_constant_string_arithmatic") {
     testFoldConst("select split_by_string(cast('abc' as string), cast('::' as 
string))")
     testFoldConst("select split_by_string('上海天津北京杭州', '北')")
     testFoldConst("select split_by_string('abccccc', 'c')")
+    testFoldConst("select split_by_string('abcde','')")
+    testFoldConst("select split_by_string('你a好b世c界','')")
+    testFoldConst("select split_by_string('12553','')")
+    testFoldConst("select split_by_string('','')")
+    testFoldConst("select split_by_string('',',')")
+    testFoldConst("select split_by_string('','a')")
+    testFoldConst("select split_by_string('','abc')")
+    testFoldConst("select split_by_string('abc','')")
+    testFoldConst("select split_by_string('a1b1c1d','1')")
+    testFoldConst("select split_by_string(',,,',',')")
+    testFoldConst("select split_by_string('a,b,c,abcde',',')")
+    testFoldConst("select split_by_string(',,a,b,c,',',')")
+    testFoldConst("select split_by_string('null',',')")
+    testFoldConst("select split_by_string('1,,2,3,,4,5,,abcde', ',,')")
+    testFoldConst("select split_by_string('abcde','')")
+    testFoldConst("select split_by_string('1,,2,3,,,,,,4,5, abcde', ',,')")
+    testFoldConst("select split_by_string(',,,,',',,')")
+    testFoldConst("select split_by_string('a,,b,,c',',,')")
+    testFoldConst("select split_by_string('a,,b,,c,,',',,')")
+    testFoldConst("select split_by_string(',,a,,b,,c,,',',,')")
+    testFoldConst("SELECT 
split_by_string('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','..')")
+    testFoldConst("SELECT 
split_by_string('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','\$\$')")
+    testFoldConst("SELECT 
split_by_string('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','||')")
+    testFoldConst("SELECT 
split_by_string('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','((')")
+    testFoldConst("SELECT 
split_by_string('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','))')")
+    testFoldConst("SELECT 
split_by_string('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','[[')")
+    testFoldConst("SELECT 
split_by_string('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','{{')")
+    testFoldConst("SELECT 
split_by_string('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','^^')")
+    testFoldConst("SELECT 
split_by_string('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','??')")
+    testFoldConst("SELECT 
split_by_string('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','**')")
+    testFoldConst("SELECT 
split_by_string('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','++')")
+    testFoldConst("SELECT 
split_by_string('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','\\\\')")
     
     // split_part
-    testFoldConst("select split_part('a,b,c', ',', -1)")
-    testFoldConst("select split_part('abc##123###xyz', '##', 0)")
+    testFoldConst("select split_part('a,b,c', '', -2)")
+    testFoldConst("select split_part('a,b,c', '', -1)")
+    testFoldConst("select split_part('a,b,c', '', 0)")
+    testFoldConst("select split_part('a,b,c', '', 1)")
+    testFoldConst("select split_part('a,b,c', '', 2)")
+    testFoldConst("select split_part('', '', -2)")
+    testFoldConst("select split_part('', '', -1)")
+    testFoldConst("select split_part('', '', 0)")
+    testFoldConst("select split_part('', '', 1)")
+    testFoldConst("select split_part('', '', 2)")
+    testFoldConst("select split_part('', 'abc', -2)")
+    testFoldConst("select split_part('', 'abc', -1)")
+    testFoldConst("select split_part('', 'abc', 0)")
+    testFoldConst("select split_part('', 'abc', 1)")
+    testFoldConst("select split_part('', 'abc', 2)")
+    testFoldConst("select split_part('abc##123###xyz', '##', -10)")
+    testFoldConst("select split_part('abc##123###xyz', '##', -4)")
+    testFoldConst("select split_part('abc##123###xyz', '##', -3)")
+    testFoldConst("select split_part('abc##123###xyz', '##', -2)")
     testFoldConst("select split_part('abc##123###xyz', '##', -1)")
+    testFoldConst("select split_part('abc##123###xyz', '##', 0)")
     testFoldConst("select split_part('abc##123###xyz', '##', 1)")
-    testFoldConst("select split_part('abc##123###xyz', '##', -2)")
+    testFoldConst("select split_part('abc##123###xyz', '##', 2)")
     testFoldConst("select split_part('abc##123###xyz', '##', 3)")
-    testFoldConst("select split_part('abc##123###xyz', '##', -4)")
-    testFoldConst("select split_part('abc##123###xyz', '##', 5)")
+    testFoldConst("select split_part('abc##123###xyz', '##', 4)")
+    testFoldConst("select split_part('abc##123###xyz', '##', 10)")
+    testFoldConst("select split_part('a,b,c', ',', -100)")
+    testFoldConst("select split_part('a,b,c', ',', -5)")
+    testFoldConst("select split_part('a,b,c', ',', -4)")
+    testFoldConst("select split_part('a,b,c', ',', -3)")
+    testFoldConst("select split_part('a,b,c', ',', -2)")
+    testFoldConst("select split_part('a,b,c', ',', -1)")
+    testFoldConst("select split_part('a,b,c', ',', -0)")
+    testFoldConst("select split_part('a,b,c', ',', 0)")
+    testFoldConst("select split_part('a,b,c', ',', 1)")
     testFoldConst("select split_part('a,b,c', ',', 2)")
+    testFoldConst("select split_part('a,b,c', ',', 3)")
+    testFoldConst("select split_part('a,b,c', ',', 4)")
     testFoldConst("select split_part('a,b,c', ',', 5)")
+    testFoldConst("select split_part('a,b,c', ',', 100)")
     testFoldConst("select split_part(cast('a,b,c' as string), cast(',' as 
string), -1)")
     testFoldConst("select split_part(cast('a,b,c' as string), cast(',' as 
string), 2)")
     testFoldConst("select split_part(cast('a,b,c' as string), cast(',' as 
string), 5)")
@@ -485,6 +547,7 @@ suite("fold_constant_string_arithmatic") {
     testFoldConst("select split_part('hello world', ' ', -2)")
     testFoldConst("select split_part('hello world', ' ', 2)")
     testFoldConst("select split_part('hello world', ' ', -3)")
+    testFoldConst("select split_part('hello world', ' ', -3)")
     testFoldConst("SELECT split_part('哈哈哈AAA','A', -5)")
     testFoldConst("SELECT split_part('哈哈哈AAA','A', -4)")
     testFoldConst("SELECT split_part('哈哈哈AAA','A', -3)")
@@ -505,7 +568,31 @@ suite("fold_constant_string_arithmatic") {
     testFoldConst("SELECT split_part('哈哈哈AA+','A', 2)")
     testFoldConst("SELECT split_part('哈哈哈AA+','A', 3)")
     testFoldConst("SELECT split_part('哈哈哈AA+','A', 4)")
-    
+    testFoldConst("SELECT 
split_part('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','..', 1)")
+    testFoldConst("SELECT 
split_part('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','\$\$', 1)")
+    testFoldConst("SELECT 
split_part('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','||', 1)")
+    testFoldConst("SELECT 
split_part('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','((', 1)")
+    testFoldConst("SELECT 
split_part('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','))', 1)")
+    testFoldConst("SELECT 
split_part('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','[[', 1)")
+    testFoldConst("SELECT 
split_part('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','{{', 1)")
+    testFoldConst("SELECT 
split_part('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','^^', 1)")
+    testFoldConst("SELECT 
split_part('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','??', 1)")
+    testFoldConst("SELECT 
split_part('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','**', 1)")
+    testFoldConst("SELECT 
split_part('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','++', 1)")
+    testFoldConst("SELECT 
split_part('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','\\\\', 1)")
+    testFoldConst("SELECT 
split_part('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','..', 2)")
+    testFoldConst("SELECT 
split_part('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','\$\$', 2)")
+    testFoldConst("SELECT 
split_part('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','||', 2)")
+    testFoldConst("SELECT 
split_part('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','((', 2)")
+    testFoldConst("SELECT 
split_part('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','))', 2)")
+    testFoldConst("SELECT 
split_part('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','[[', 2)")
+    testFoldConst("SELECT 
split_part('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','{{', 2)")
+    testFoldConst("SELECT 
split_part('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','^^', 2)")
+    testFoldConst("SELECT 
split_part('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','??', 2)")
+    testFoldConst("SELECT 
split_part('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','**', 2)")
+    testFoldConst("SELECT 
split_part('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','++', 2)")
+    testFoldConst("SELECT 
split_part('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','\\\\', 2)")
+
     // starts_with
     testFoldConst("select starts_with('hello world','hello')")
     testFoldConst("select starts_with('hello world',null)")
@@ -650,6 +737,30 @@ suite("fold_constant_string_arithmatic") {
     testFoldConst("SELECT substring_index('哈哈哈AA+','A', 2)")
     testFoldConst("SELECT substring_index('哈哈哈AA+','A', 3)")
     testFoldConst("SELECT substring_index('哈哈哈AA+','A', 4)")
+    testFoldConst("SELECT 
substring_index('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','..', 1)")
+    testFoldConst("SELECT 
substring_index('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','\$\$', 1)")
+    testFoldConst("SELECT 
substring_index('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','||', 1)")
+    testFoldConst("SELECT 
substring_index('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','((', 1)")
+    testFoldConst("SELECT 
substring_index('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','))', 1)")
+    testFoldConst("SELECT 
substring_index('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','[[', 1)")
+    testFoldConst("SELECT 
substring_index('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','{{', 1)")
+    testFoldConst("SELECT 
substring_index('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','^^', 1)")
+    testFoldConst("SELECT 
substring_index('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','??', 1)")
+    testFoldConst("SELECT 
substring_index('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','**', 1)")
+    testFoldConst("SELECT 
substring_index('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','++', 1)")
+    testFoldConst("SELECT 
substring_index('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','\\\\', 1)")
+    testFoldConst("SELECT 
substring_index('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','..', 2)")
+    testFoldConst("SELECT 
substring_index('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','\$\$', 2)")
+    testFoldConst("SELECT 
substring_index('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','||', 2)")
+    testFoldConst("SELECT 
substring_index('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','((', 2)")
+    testFoldConst("SELECT 
substring_index('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','))', 2)")
+    testFoldConst("SELECT 
substring_index('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','[[', 2)")
+    testFoldConst("SELECT 
substring_index('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','{{', 2)")
+    testFoldConst("SELECT 
substring_index('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','^^', 2)")
+    testFoldConst("SELECT 
substring_index('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','??', 2)")
+    testFoldConst("SELECT 
substring_index('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','**', 2)")
+    testFoldConst("SELECT 
substring_index('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','++', 2)")
+    testFoldConst("SELECT 
substring_index('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','\\\\', 2)")
 
     // trim
     testFoldConst("select trim('11111', 11)")


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to