This is an automated email from the ASF dual-hosted git repository.
morrysnow pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 095a2aed994 [fix](Nereids) fold constant for string function process
emoji character by mistake (#49087)
095a2aed994 is described below
commit 095a2aed994bac2e6a668193fa5154beb6eb56e3
Author: LiBinfeng <[email protected]>
AuthorDate: Fri Mar 21 11:43:29 2025 +0800
[fix](Nereids) fold constant for string function process emoji character by
mistake (#49087)
### What problem does this PR solve?
Related PR: #40441
Problem Summary:
wrong calculation of emoji character length in some String function when
do constant folding in FE. For example:
select STRLEFT('😊😉👍', 2);
should return 😊😉, but fe return 😊 only when folding constant
fixed functions:
- left
- strleft
- right
- strright
- locate
- character_length
- split_by_string
- overlay
- replace_empty
---
.../doris/catalog/BuiltinScalarFunctions.java | 8 +-
.../functions/executable/StringArithmetic.java | 125 +++++++-------
.../expressions/functions/scalar/StrLeft.java | 70 --------
.../expressions/functions/scalar/StrRight.java | 70 --------
.../expressions/visitor/ScalarFunctionVisitor.java | 10 --
.../nereids/rules/expression/FoldConstantTest.java | 126 ++++++++++++++
.../string_functions/test_string_function.out | Bin 4892 -> 4890 bytes
.../fold_constant_string_arithmatic.groovy | 188 ++++++++++++++++++++-
8 files changed, 381 insertions(+), 216 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java
b/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java
index 466503ee47d..e6ce3de8c6f 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java
@@ -416,8 +416,6 @@ import
org.apache.doris.nereids.trees.expressions.functions.scalar.StPolygonfrom
import org.apache.doris.nereids.trees.expressions.functions.scalar.StX;
import org.apache.doris.nereids.trees.expressions.functions.scalar.StY;
import org.apache.doris.nereids.trees.expressions.functions.scalar.StartsWith;
-import org.apache.doris.nereids.trees.expressions.functions.scalar.StrLeft;
-import org.apache.doris.nereids.trees.expressions.functions.scalar.StrRight;
import org.apache.doris.nereids.trees.expressions.functions.scalar.StrToDate;
import org.apache.doris.nereids.trees.expressions.functions.scalar.Strcmp;
import
org.apache.doris.nereids.trees.expressions.functions.scalar.StructElement;
@@ -767,7 +765,7 @@ public class BuiltinScalarFunctions implements
FunctionHelper {
scalar(L2Distance.class, "l2_distance"),
scalar(LastDay.class, "last_day"),
scalar(Least.class, "least"),
- scalar(Left.class, "left"),
+ scalar(Left.class, "left", "strleft"),
scalar(Length.class, "length"),
scalar(Crc32.class, "crc32"),
scalar(Like.class, "like"),
@@ -852,7 +850,7 @@ public class BuiltinScalarFunctions implements
FunctionHelper {
scalar(Replace.class, "replace"),
scalar(ReplaceEmpty.class, "replace_empty"),
scalar(Reverse.class, "reverse"),
- scalar(Right.class, "right"),
+ scalar(Right.class, "right", "strright"),
scalar(Round.class, "round"),
scalar(RoundBankers.class, "round_bankers"),
scalar(Rpad.class, "rpad"),
@@ -913,8 +911,6 @@ public class BuiltinScalarFunctions implements
FunctionHelper {
scalar(StY.class, "st_y"),
scalar(StartsWith.class, "starts_with"),
scalar(Strcmp.class, "strcmp"),
- scalar(StrLeft.class, "strleft"),
- scalar(StrRight.class, "strright"),
scalar(StrToDate.class, "str_to_date"),
scalar(SubBitmap.class, "sub_bitmap"),
scalar(SubReplace.class, "sub_replace"),
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/executable/StringArithmetic.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/executable/StringArithmetic.java
index 06cee19345e..082387d52ca 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/executable/StringArithmetic.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/executable/StringArithmetic.java
@@ -41,6 +41,7 @@ import
org.apache.doris.nereids.trees.expressions.literal.VarcharLiteral;
import org.apache.doris.nereids.types.ArrayType;
import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Lists;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
@@ -60,7 +61,7 @@ import java.util.regex.Pattern;
* concat
*/
public class StringArithmetic {
- private static Expression castStringLikeLiteral(StringLikeLiteral first,
String value) {
+ private static Literal castStringLikeLiteral(StringLikeLiteral first,
String value) {
if (first instanceof StringLiteral) {
return new StringLiteral(value);
} else if (first instanceof VarcharLiteral) {
@@ -79,7 +80,7 @@ public class StringArithmetic {
}
private static String substringImpl(String first, int second, int third) {
- int stringLength = first.length();
+ int stringLength = first.codePointCount(0, first.length());
if (stringLength == 0) {
return "";
}
@@ -101,8 +102,11 @@ public class StringArithmetic {
} else {
rightIndex = third + leftIndex;
}
+ // at here leftIndex and rightIndex can not be exceeding boundary
+ int finalLeftIndex = first.codePointCount(0, (int) leftIndex);
+ int finalRightIndex = first.codePointCount(0, (int) rightIndex);
// left index and right index are in integer range because of
definition, so we can safely cast it to int
- return first.substring((int) leftIndex, (int) rightIndex);
+ return first.substring(finalLeftIndex, finalRightIndex);
}
/**
@@ -293,12 +297,14 @@ public class StringArithmetic {
*/
@ExecFunction(name = "left")
public static Expression left(StringLikeLiteral first, IntegerLiteral
second) {
+ int inputLength = first.getValue().codePointCount(0,
first.getValue().length());
if (second.getValue() <= 0) {
return castStringLikeLiteral(first, "");
- } else if (second.getValue() < first.getValue().length()) {
- return castStringLikeLiteral(first, first.getValue().substring(0,
second.getValue()));
- } else {
+ } else if (second.getValue() >= inputLength) {
return first;
+ } else {
+ int index = first.getValue().codePointCount(0, second.getValue());
+ return castStringLikeLiteral(first, first.getValue().substring(0,
index));
}
}
@@ -307,17 +313,20 @@ public class StringArithmetic {
*/
@ExecFunction(name = "right")
public static Expression right(StringLikeLiteral first, IntegerLiteral
second) {
- if (second.getValue() < (- first.getValue().length()) ||
Math.abs(second.getValue()) == 0) {
+ int inputLength = first.getValue().codePointCount(0,
first.getValue().length());
+ if (second.getValue() < (- inputLength) || Math.abs(second.getValue())
== 0) {
return castStringLikeLiteral(first, "");
- } else if (second.getValue() > first.getValue().length()) {
+ } else if (second.getValue() >= inputLength) {
return first;
} else {
- if (second.getValue() > 0) {
+ if (second.getValue() >= 0) {
+ int index = first.getValue().codePointCount(0,
second.getValue());
return castStringLikeLiteral(first, first.getValue().substring(
- first.getValue().length() - second.getValue(),
first.getValue().length()));
+ inputLength - index, inputLength));
} else {
+ int index =
first.getValue().codePointCount(Math.abs(second.getValue()) - 1,
first.getValue().length());
return castStringLikeLiteral(first, first.getValue().substring(
- Math.abs(second.getValue()) - 1,
first.getValue().length()));
+ Math.abs(index) - 1, inputLength));
}
}
}
@@ -337,7 +346,7 @@ public class StringArithmetic {
public static Expression locate(StringLikeLiteral first, StringLikeLiteral
second, IntegerLiteral third) {
int result = second.getValue().indexOf(first.getValue()) + 1;
if (third.getValue() <= 0 || !substringImpl(second.getValue(),
third.getValue(),
- second.getValue().length()).contains(first.getValue())) {
+ second.getValue().codePointCount(0,
second.getValue().length())).contains(first.getValue())) {
result = 0;
}
return new IntegerLiteral(result);
@@ -408,7 +417,7 @@ public class StringArithmetic {
*/
@ExecFunction(name = "character_length")
public static Expression characterLength(StringLikeLiteral first) {
- return new IntegerLiteral(first.getValue().length());
+ return new IntegerLiteral(first.getValue().codePointCount(0,
first.getValue().length()));
}
private static boolean isAlphabetic(char c) {
@@ -663,6 +672,23 @@ public class StringArithmetic {
return new VarcharLiteral(sb.toString());
}
+ /**
+ * split by char by empty string considering emoji
+ * @param str input string to be split
+ * @return ArrayLiteral
+ */
+ public static List<String> splitByGrapheme(StringLikeLiteral str) {
+ List<String> result =
Lists.newArrayListWithExpectedSize(str.getValue().length());
+ int length = str.getValue().length();
+ for (int i = 0; i < length; ) {
+ int codePoint = str.getValue().codePointAt(i);
+ int charCount = Character.charCount(codePoint);
+ result.add(new String(new int[]{codePoint}, 0, 1));
+ i += charCount;
+ }
+ return result;
+ }
+
/**
* Executable arithmetic functions split_by_string
*/
@@ -671,11 +697,17 @@ public class StringArithmetic {
if (first.getValue().isEmpty()) {
return new ArrayLiteral(ImmutableList.of(),
ArrayType.of(first.getDataType()));
}
- int limit = second.getValue().isEmpty() ? 0 : -1;
- String[] result =
first.getValue().split(Pattern.quote(second.getValue()), limit);
+ if (second.getValue().isEmpty()) {
+ List<Literal> result =
Lists.newArrayListWithExpectedSize(first.getValue().length());
+ for (String resultStr : splitByGrapheme(first)) {
+ result.add(castStringLikeLiteral(first, resultStr));
+ }
+ return new ArrayLiteral(result);
+ }
+ String[] result =
first.getValue().split(Pattern.quote(second.getValue()), -1);
List<Literal> items = new ArrayList<>();
for (String s : result) {
- items.add((Literal) castStringLikeLiteral(first, s));
+ items.add(castStringLikeLiteral(first, s));
}
return new ArrayLiteral(items);
}
@@ -772,40 +804,6 @@ public class StringArithmetic {
}
}
- /**
- * Executable arithmetic functions strLeft
- */
- @ExecFunction(name = "strleft")
- public static Expression strLeft(StringLikeLiteral first, IntegerLiteral
second) {
- if (second.getValue() <= 0) {
- return castStringLikeLiteral(first, "");
- } else if (second.getValue() > first.getValue().length()) {
- return first;
- } else {
- return castStringLikeLiteral(first, first.getValue().substring(0,
second.getValue()));
- }
- }
-
- /**
- * Executable arithmetic functions strRight
- */
- @ExecFunction(name = "strright")
- public static Expression strRight(StringLikeLiteral first, IntegerLiteral
second) {
- if (second.getValue() < (- first.getValue().length()) ||
Math.abs(second.getValue()) == 0) {
- return castStringLikeLiteral(first, "");
- } else if (second.getValue() > first.getValue().length()) {
- return first;
- } else {
- if (second.getValue() > 0) {
- return castStringLikeLiteral(first, first.getValue().substring(
- first.getValue().length() - second.getValue(),
first.getValue().length()));
- } else {
- return castStringLikeLiteral(first, first.getValue().substring(
- Math.abs(second.getValue()) - 1,
first.getValue().length()));
- }
- }
- }
-
/**
* Executable arithmetic functions overlay
*/
@@ -813,18 +811,18 @@ public class StringArithmetic {
public static Expression overlay(StringLikeLiteral originStr,
IntegerLiteral pos, IntegerLiteral len, StringLikeLiteral
insertStr) {
StringBuilder sb = new StringBuilder();
- if (pos.getValue() <= 0 || pos.getValue() >
originStr.getValue().length()) {
+ int totalLength = originStr.getValue().codePointCount(0,
originStr.getValue().length());
+ if (pos.getValue() <= 0 || pos.getValue() > totalLength) {
return originStr;
} else {
- if (len.getValue() < 0 || (pos.getValue() + len.getValue()) >
originStr.getValue().length()) {
- sb.append(originStr.getValue().substring(0, pos.getValue() -
1));
+ if (len.getValue() < 0 || len.getValue() > (totalLength -
pos.getValue())) {
+ sb.append(substringImpl(originStr.getValue(), 1,
pos.getValue() - 1));
sb.append(insertStr.getValue());
return castStringLikeLiteral(originStr, sb.toString());
} else {
- sb.append(originStr.getValue().substring(0, pos.getValue() -
1));
+ sb.append(substringImpl(originStr.getValue(), 1,
pos.getValue() - 1));
sb.append(insertStr.getValue());
- sb.append(originStr.getValue().substring(pos.getValue()
- + len.getValue() - 1, originStr.getValue().length()));
+ sb.append(substringImpl(originStr.getValue(), pos.getValue() +
len.getValue(), totalLength));
return castStringLikeLiteral(originStr, sb.toString());
}
}
@@ -946,7 +944,7 @@ public class StringArithmetic {
*/
@ExecFunction(name = "append_trailing_char_if_absent")
public static Expression appendTrailingCharIfAbsent(StringLikeLiteral
first, StringLikeLiteral second) {
- if (second.getValue().length() != 1) {
+ if (second.getValue().codePointCount(0, second.getValue().length()) !=
1) {
return new NullLiteral(first.getDataType());
}
if (first.getValue().endsWith(second.getValue())) {
@@ -1010,6 +1008,19 @@ public class StringArithmetic {
*/
@ExecFunction(name = "replace_empty")
public static Expression replaceEmpty(StringLikeLiteral first,
StringLikeLiteral second, StringLikeLiteral third) {
+ if (second.getValue().isEmpty()) {
+ if (first.getValue().isEmpty()) {
+ return castStringLikeLiteral(first, third.getValue());
+ }
+ List<String> inputs = splitByGrapheme(first);
+ StringBuilder sb = new StringBuilder();
+ sb.append(third.getValue());
+ for (String input : inputs) {
+ sb.append(input);
+ sb.append(third.getValue());
+ }
+ return castStringLikeLiteral(first, sb.toString());
+ }
return castStringLikeLiteral(first,
first.getValue().replace(second.getValue(), third.getValue()));
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/StrLeft.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/StrLeft.java
deleted file mode 100644
index e8188dbc0f3..00000000000
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/StrLeft.java
+++ /dev/null
@@ -1,70 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-package org.apache.doris.nereids.trees.expressions.functions.scalar;
-
-import org.apache.doris.catalog.FunctionSignature;
-import org.apache.doris.nereids.trees.expressions.Expression;
-import
org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature;
-import org.apache.doris.nereids.trees.expressions.functions.PropagateNullable;
-import org.apache.doris.nereids.trees.expressions.shape.BinaryExpression;
-import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
-import org.apache.doris.nereids.types.IntegerType;
-import org.apache.doris.nereids.types.StringType;
-import org.apache.doris.nereids.types.VarcharType;
-
-import com.google.common.base.Preconditions;
-import com.google.common.collect.ImmutableList;
-
-import java.util.List;
-
-/**
- * ScalarFunction 'strleft'. This class is generated by GenerateFunction.
- */
-public class StrLeft extends ScalarFunction
- implements BinaryExpression, ExplicitlyCastableSignature,
PropagateNullable {
-
- public static final List<FunctionSignature> SIGNATURES = ImmutableList.of(
-
FunctionSignature.ret(VarcharType.SYSTEM_DEFAULT).args(VarcharType.SYSTEM_DEFAULT,
IntegerType.INSTANCE),
-
FunctionSignature.ret(StringType.INSTANCE).args(StringType.INSTANCE,
IntegerType.INSTANCE));
-
- /**
- * constructor with 2 arguments.
- */
- public StrLeft(Expression arg0, Expression arg1) {
- super("strleft", arg0, arg1);
- }
-
- /**
- * withChildren.
- */
- @Override
- public StrLeft withChildren(List<Expression> children) {
- Preconditions.checkArgument(children.size() == 2);
- return new StrLeft(children.get(0), children.get(1));
- }
-
- @Override
- public List<FunctionSignature> getSignatures() {
- return SIGNATURES;
- }
-
- @Override
- public <R, C> R accept(ExpressionVisitor<R, C> visitor, C context) {
- return visitor.visitStrLeft(this, context);
- }
-}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/StrRight.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/StrRight.java
deleted file mode 100644
index 0cb563ce94f..00000000000
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/StrRight.java
+++ /dev/null
@@ -1,70 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-package org.apache.doris.nereids.trees.expressions.functions.scalar;
-
-import org.apache.doris.catalog.FunctionSignature;
-import org.apache.doris.nereids.trees.expressions.Expression;
-import
org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature;
-import org.apache.doris.nereids.trees.expressions.functions.PropagateNullable;
-import org.apache.doris.nereids.trees.expressions.shape.BinaryExpression;
-import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
-import org.apache.doris.nereids.types.IntegerType;
-import org.apache.doris.nereids.types.StringType;
-import org.apache.doris.nereids.types.VarcharType;
-
-import com.google.common.base.Preconditions;
-import com.google.common.collect.ImmutableList;
-
-import java.util.List;
-
-/**
- * ScalarFunction 'strright'. This class is generated by GenerateFunction.
- */
-public class StrRight extends ScalarFunction
- implements BinaryExpression, ExplicitlyCastableSignature,
PropagateNullable {
-
- public static final List<FunctionSignature> SIGNATURES = ImmutableList.of(
-
FunctionSignature.ret(VarcharType.SYSTEM_DEFAULT).args(VarcharType.SYSTEM_DEFAULT,
IntegerType.INSTANCE),
-
FunctionSignature.ret(StringType.INSTANCE).args(StringType.INSTANCE,
IntegerType.INSTANCE));
-
- /**
- * constructor with 2 arguments.
- */
- public StrRight(Expression arg0, Expression arg1) {
- super("strright", arg0, arg1);
- }
-
- /**
- * withChildren.
- */
- @Override
- public StrRight withChildren(List<Expression> children) {
- Preconditions.checkArgument(children.size() == 2);
- return new StrRight(children.get(0), children.get(1));
- }
-
- @Override
- public List<FunctionSignature> getSignatures() {
- return SIGNATURES;
- }
-
- @Override
- public <R, C> R accept(ExpressionVisitor<R, C> visitor, C context) {
- return visitor.visitStrRight(this, context);
- }
-}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java
index d763087d163..c4589cfe02c 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java
@@ -414,8 +414,6 @@ import
org.apache.doris.nereids.trees.expressions.functions.scalar.StPolygonfrom
import org.apache.doris.nereids.trees.expressions.functions.scalar.StX;
import org.apache.doris.nereids.trees.expressions.functions.scalar.StY;
import org.apache.doris.nereids.trees.expressions.functions.scalar.StartsWith;
-import org.apache.doris.nereids.trees.expressions.functions.scalar.StrLeft;
-import org.apache.doris.nereids.trees.expressions.functions.scalar.StrRight;
import org.apache.doris.nereids.trees.expressions.functions.scalar.StrToDate;
import org.apache.doris.nereids.trees.expressions.functions.scalar.Strcmp;
import
org.apache.doris.nereids.trees.expressions.functions.scalar.StructElement;
@@ -2033,14 +2031,6 @@ public interface ScalarFunctionVisitor<R, C> {
return visitScalarFunction(startsWith, context);
}
- default R visitStrLeft(StrLeft strLeft, C context) {
- return visitScalarFunction(strLeft, context);
- }
-
- default R visitStrRight(StrRight strRight, C context) {
- return visitScalarFunction(strRight, context);
- }
-
default R visitStrToDate(StrToDate strToDate, C context) {
return visitScalarFunction(strToDate, context);
}
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/expression/FoldConstantTest.java
b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/expression/FoldConstantTest.java
index 3ceed38aaca..700772ffa1b 100644
---
a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/expression/FoldConstantTest.java
+++
b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/expression/FoldConstantTest.java
@@ -40,6 +40,7 @@ import
org.apache.doris.nereids.trees.expressions.functions.scalar.Asin;
import org.apache.doris.nereids.trees.expressions.functions.scalar.Bin;
import org.apache.doris.nereids.trees.expressions.functions.scalar.BitCount;
import org.apache.doris.nereids.trees.expressions.functions.scalar.Ceil;
+import
org.apache.doris.nereids.trees.expressions.functions.scalar.CharacterLength;
import org.apache.doris.nereids.trees.expressions.functions.scalar.Coalesce;
import org.apache.doris.nereids.trees.expressions.functions.scalar.ConvertTz;
import org.apache.doris.nereids.trees.expressions.functions.scalar.Cos;
@@ -49,16 +50,22 @@ import
org.apache.doris.nereids.trees.expressions.functions.scalar.Exp;
import org.apache.doris.nereids.trees.expressions.functions.scalar.Floor;
import
org.apache.doris.nereids.trees.expressions.functions.scalar.FromUnixtime;
import org.apache.doris.nereids.trees.expressions.functions.scalar.HoursAdd;
+import org.apache.doris.nereids.trees.expressions.functions.scalar.Left;
import org.apache.doris.nereids.trees.expressions.functions.scalar.Ln;
+import org.apache.doris.nereids.trees.expressions.functions.scalar.Locate;
import org.apache.doris.nereids.trees.expressions.functions.scalar.Log;
import org.apache.doris.nereids.trees.expressions.functions.scalar.MinutesAdd;
+import org.apache.doris.nereids.trees.expressions.functions.scalar.Overlay;
import org.apache.doris.nereids.trees.expressions.functions.scalar.Power;
+import
org.apache.doris.nereids.trees.expressions.functions.scalar.ReplaceEmpty;
+import org.apache.doris.nereids.trees.expressions.functions.scalar.Right;
import org.apache.doris.nereids.trees.expressions.functions.scalar.Round;
import org.apache.doris.nereids.trees.expressions.functions.scalar.SecondsAdd;
import org.apache.doris.nereids.trees.expressions.functions.scalar.Sign;
import org.apache.doris.nereids.trees.expressions.functions.scalar.Sin;
import org.apache.doris.nereids.trees.expressions.functions.scalar.Sqrt;
import org.apache.doris.nereids.trees.expressions.functions.scalar.StrToDate;
+import org.apache.doris.nereids.trees.expressions.functions.scalar.Substring;
import org.apache.doris.nereids.trees.expressions.functions.scalar.Tan;
import org.apache.doris.nereids.trees.expressions.functions.scalar.ToDays;
import
org.apache.doris.nereids.trees.expressions.functions.scalar.UnixTimestamp;
@@ -335,6 +342,125 @@ class FoldConstantTest extends
ExpressionRewriteTestHelper {
AppendTrailingCharIfAbsent a = new
AppendTrailingCharIfAbsent(StringLiteral.of("1"), StringLiteral.of("3"));
rewritten = executor.rewrite(a, context);
Assertions.assertEquals(new StringLiteral("13"), rewritten);
+
+ Left left = new Left(StringLiteral.of("hello world"),
IntegerLiteral.of(5));
+ rewritten = executor.rewrite(left, context);
+ Assertions.assertEquals(new StringLiteral("hello"), rewritten);
+ left = new Left(StringLiteral.of("test"), IntegerLiteral.of(10));
+ rewritten = executor.rewrite(left, context);
+ Assertions.assertEquals(new StringLiteral("test"), rewritten);
+ left = new Left(StringLiteral.of("data"), IntegerLiteral.of(0));
+ rewritten = executor.rewrite(left, context);
+ Assertions.assertEquals(new StringLiteral(""), rewritten);
+ left = new Left(StringLiteral.of("data"), IntegerLiteral.of(-3));
+ rewritten = executor.rewrite(left, context);
+ Assertions.assertEquals(new StringLiteral(""), rewritten);
+
+ Right right = new Right(StringLiteral.of("hello world"),
IntegerLiteral.of(5));
+ rewritten = executor.rewrite(right, context);
+ Assertions.assertEquals(new StringLiteral("world"), rewritten);
+ right = new Right(StringLiteral.of("test"), IntegerLiteral.of(10));
+ rewritten = executor.rewrite(right, context);
+ Assertions.assertEquals(new StringLiteral("test"), rewritten);
+ right = new Right(StringLiteral.of("data"), IntegerLiteral.of(0));
+ rewritten = executor.rewrite(right, context);
+ Assertions.assertEquals(new StringLiteral(""), rewritten);
+ right = new Right(StringLiteral.of("data"), IntegerLiteral.of(-3));
+ rewritten = executor.rewrite(right, context);
+ Assertions.assertEquals(new StringLiteral("ata"), rewritten);
+
+ Substring substr = new Substring(
+ StringLiteral.of("database"),
+ IntegerLiteral.of(1),
+ IntegerLiteral.of(4)
+ );
+ rewritten = executor.rewrite(substr, context);
+ Assertions.assertEquals(new StringLiteral("data"), rewritten);
+ substr = new Substring(
+ StringLiteral.of("database"),
+ IntegerLiteral.of(-4),
+ IntegerLiteral.of(4)
+ );
+ rewritten = executor.rewrite(substr, context);
+ Assertions.assertEquals(new StringLiteral("base"), rewritten);
+ substr = new Substring(
+ StringLiteral.of("example"),
+ IntegerLiteral.of(3),
+ IntegerLiteral.of(10)
+ );
+ rewritten = executor.rewrite(substr, context);
+ Assertions.assertEquals(new StringLiteral("ample"), rewritten);
+
+ Locate locate = new Locate(
+ StringLiteral.of("world"),
+ StringLiteral.of("hello world")
+ );
+ rewritten = executor.rewrite(locate, context);
+ Assertions.assertEquals(new IntegerLiteral(7), rewritten);
+ locate = new Locate(
+ StringLiteral.of("test"),
+ StringLiteral.of("hello world")
+ );
+ rewritten = executor.rewrite(locate, context);
+ Assertions.assertEquals(new IntegerLiteral(0), rewritten);
+ locate = new Locate(
+ StringLiteral.of("l"),
+ StringLiteral.of("hello world"),
+ IntegerLiteral.of(3)
+ );
+ rewritten = executor.rewrite(locate, context);
+ Assertions.assertEquals(new IntegerLiteral(3), rewritten);
+
+ CharacterLength len = new CharacterLength(StringLiteral.of("hello"));
+ rewritten = executor.rewrite(len, context);
+ Assertions.assertEquals(new IntegerLiteral(5), rewritten);
+ len = new CharacterLength(StringLiteral.of(""));
+ rewritten = executor.rewrite(len, context);
+ Assertions.assertEquals(new IntegerLiteral(0), rewritten);
+ len = new CharacterLength(StringLiteral.of("😊"));
+ rewritten = executor.rewrite(len, context);
+ Assertions.assertEquals(new IntegerLiteral(1), rewritten);
+
+ Overlay overlay = new Overlay(
+ StringLiteral.of("snow"),
+ IntegerLiteral.of(2),
+ IntegerLiteral.of(2),
+ StringLiteral.of("new")
+ );
+ rewritten = executor.rewrite(overlay, context);
+ Assertions.assertEquals(new StringLiteral("sneww"), rewritten);
+ overlay = new Overlay(
+ StringLiteral.of("snow"),
+ IntegerLiteral.of(2),
+ IntegerLiteral.of(0),
+ StringLiteral.of("n")
+ );
+ rewritten = executor.rewrite(overlay, context);
+ Assertions.assertEquals(new StringLiteral("snnow"), rewritten);
+ overlay = new Overlay(
+ StringLiteral.of("snow"),
+ IntegerLiteral.of(2),
+ IntegerLiteral.of(-1),
+ StringLiteral.of("n")
+ );
+ rewritten = executor.rewrite(overlay, context);
+ Assertions.assertEquals(new StringLiteral("sn"), rewritten);
+ overlay = new Overlay(
+ StringLiteral.of("snow"),
+ IntegerLiteral.of(-1),
+ IntegerLiteral.of(3),
+ StringLiteral.of("n")
+ );
+ rewritten = executor.rewrite(overlay, context);
+ Assertions.assertEquals(new StringLiteral("snow"), rewritten);
+
+ ReplaceEmpty replace = new ReplaceEmpty(
+ StringLiteral.of(""),
+ StringLiteral.of(""),
+ StringLiteral.of("default")
+ );
+ rewritten = executor.rewrite(replace, context);
+ Assertions.assertEquals(new StringLiteral("default"), rewritten);
}
@Test
diff --git
a/regression-test/data/query_p0/sql_functions/string_functions/test_string_function.out
b/regression-test/data/query_p0/sql_functions/string_functions/test_string_function.out
index 4af2997eda2..dacc36966a2 100644
Binary files
a/regression-test/data/query_p0/sql_functions/string_functions/test_string_function.out
and
b/regression-test/data/query_p0/sql_functions/string_functions/test_string_function.out
differ
diff --git
a/regression-test/suites/nereids_p0/expression/fold_constant/fold_constant_string_arithmatic.groovy
b/regression-test/suites/nereids_p0/expression/fold_constant/fold_constant_string_arithmatic.groovy
index fc09cbff786..76de337c7a9 100644
---
a/regression-test/suites/nereids_p0/expression/fold_constant/fold_constant_string_arithmatic.groovy
+++
b/regression-test/suites/nereids_p0/expression/fold_constant/fold_constant_string_arithmatic.groovy
@@ -259,7 +259,6 @@ suite("fold_constant_string_arithmatic") {
testFoldConst("select initcap('semi;test')")
testFoldConst("select initcap('quote''test')")
testFoldConst("select initcap('slash/test')")
- testFoldConst("select initcap('back\slash')")
testFoldConst("select initcap('emoji<d83d><dc3c>test')")
testFoldConst("select initcap('数字123test')")
testFoldConst("select initcap(' leading space')")
@@ -518,7 +517,77 @@ suite("fold_constant_string_arithmatic") {
testFoldConst("select overlay('עברית', 1, 1, '😀')")
testFoldConst("select overlay('a😀bc', 2, 1, 'x')")
testFoldConst("select overlay('日本語', 2, 2, 'xyz')")
-
+ testFoldConst("select overlay('abc', 1, 1, 'x')")
+ testFoldConst("select overlay('abc', 2, 1, 'x')")
+ testFoldConst("select overlay('abc', 3, 1, 'x')")
+ testFoldConst("select overlay('abc', 1, 3, 'xyz')")
+ testFoldConst("select overlay('abc', 0, 1, 'x')") // 越界
+ testFoldConst("select overlay('abc', -1, 1, 'x')") // 越界
+ testFoldConst("select overlay(null, 1, 1, 'x')") // null 原始字符串
+ testFoldConst("select overlay('abc', null, 1, 'x')") // null 起始位置
+ testFoldConst("select overlay('abc', 1, null, 'x')") // null 子串长度
+ testFoldConst("select overlay('abc', 1, 1, null)") // null 新字符串
+ testFoldConst("select overlay('a😀bc', 2, 1, 'x')")
+ testFoldConst("select overlay('αβγ', 1, 1, 'x')")
+ testFoldConst("select overlay('中文', 1, 1, 'x')")
+ testFoldConst("select overlay('日本語', 1, 1, 'x')")
+ testFoldConst("select overlay('한국어', 1, 1, 'x')")
+ testFoldConst("select overlay('русский', 1, 1, 'x')")
+ testFoldConst("select overlay('עברית', 1, 1, 'x')")
+ testFoldConst("select overlay('a😀bc', 2, 2, 'xyz')")
+ testFoldConst("select overlay('αβγ', 2, 2, 'xyz')")
+ testFoldConst("select overlay('中文', 2, 2, 'xyz')")
+ testFoldConst("select overlay('日本語', 2, 2, 'xyz')")
+ testFoldConst("select overlay('한국어', 2, 2, 'xyz')")
+ testFoldConst("select overlay('русский', 2, 2, 'xyz')")
+ testFoldConst("select overlay('עברית', 2, 2, 'xyz')")
+ testFoldConst("select overlay('abc', 1, 1, '😀')")
+ testFoldConst("select overlay('abc', 1, 1, 'α')")
+ testFoldConst("select overlay('abc', 1, 1, '中')")
+ testFoldConst("select overlay('abc', 1, 1, '日')")
+ testFoldConst("select overlay('abc', 1, 1, '한')")
+ testFoldConst("select overlay('abc', 1, 1, 'р')")
+ testFoldConst("select overlay('abc', 1, 1, 'ע')")
+ testFoldConst("select overlay('a😀bc', 1, 1, 'α')")
+ testFoldConst("select overlay('αβγ', 1, 1, '中')")
+ testFoldConst("select overlay('中文', 1, 1, '日')")
+ testFoldConst("select overlay('日本語', 1, 1, '한')")
+ testFoldConst("select overlay('한국어', 1, 1, 'р')")
+ testFoldConst("select overlay('русский', 1, 1, 'ע')")
+ testFoldConst("select overlay('עברית', 1, 1, '😀')")
+ testFoldConst("select overlay('abc', -1, 1, 'x')") // 负数起始位置
+ testFoldConst("select overlay('abc', 1, -1, 'x')") // 负数子串长度
+ testFoldConst("select overlay('abc', 1, 10, 'xyz')") // 子串长度越界
+ testFoldConst("select overlay('abc', 4, 1, 'x')") // 起始位置越界
+ testFoldConst("select overlay('abc', 1, 1, 'xyzw')") // 新字符串长度大于替换长度
+ testFoldConst("select overlay('a😀bc', 1, 1, 'αβγ')") // 新字符串包含多字符
+ testFoldConst("select overlay('αβγ', 1, 1, '中文')") // 新字符串包含多字符
+ testFoldConst("select overlay('中文', 1, 1, '日本語')") // 新字符串包含多字符
+ testFoldConst("select overlay('日本語', 1, 1, '한국어')") // 新字符串包含多字符
+ testFoldConst("select overlay('한국어', 1, 1, 'русский')") // 新字符串包含多字符
+ testFoldConst("select overlay('русский', 1, 1, 'עברית')") // 新字符串包含多字符
+ testFoldConst("select overlay('עברית', 1, 1, 'a😀bc')") // 新字符串包含多字符
+ testFoldConst("select overlay('', 1, 1, 'x')") // 空字符串
+ testFoldConst("select overlay('abc', 1, 0, 'x')") // 子串长度为0
+ testFoldConst("select overlay('abc', 1, 1, '')") // 新字符串为空
+ testFoldConst("select overlay('a😀bc', 1, 0, 'x')") // 子串长度为0,含emoji
+ testFoldConst("select overlay('αβγ', 1, 0, 'x')") // 子串长度为0,含希腊字符
+ testFoldConst("select overlay('中文', 1, 0, 'x')") // 子串长度为0,含中文
+ testFoldConst("select overlay('日本語', 1, 0, 'x')") // 子串长度为0,含日文
+ testFoldConst("select overlay('한국어', 1, 0, 'x')") // 子串长度为0,含韩文
+ testFoldConst("select overlay('русский', 1, 0, 'x')") // 子串长度为0,含俄文
+ testFoldConst("select overlay('עברית', 1, 0, 'x')") // 子串长度为0,含希伯来文
+ testFoldConst("select overlay('abc', 1, 1, '😀α中文日한俄ע')") // 新字符串包含所有字符集
+ testFoldConst("select overlay('😀α中文日한俄ע', 1, 1, 'abc')") // 原始字符串包含所有字符集
+ testFoldConst("select overlay('abc', 1, 1, '😀α')") // 新字符串包含emoji和希腊字符
+ testFoldConst("select overlay('😀α', 1, 1, 'abc')") // 原始字符串包含emoji和希腊字符
+ testFoldConst("select overlay('中文日한俄ע', 1, 1, 'abc')") // 原始字符串包含多语言字符
+ testFoldConst("select overlay('abc', 1, 1, '中文日한俄ע')") // 新字符串包含多语言字符
+ testFoldConst("select overlay('abc', 1, 2147483647, '中文日한俄ע')")
+ testFoldConst("select overlay('abc', 1, 2147483648, '中文日한俄ע')")
+ testFoldConst("select overlay('abc', -2147483647, 1, '中文日한俄ע')")
+ testFoldConst("select overlay('abc', -2147483648, 1, '中文日한俄ע')")
+
// parse_url
testFoldConst("select
parse_url(cast('http://www.example.com/path?query=abc' as string), cast('HOST'
as string))")
testFoldConst("select parse_url('http://www.example.com/path?query=abc',
'HOST')")
@@ -954,6 +1023,60 @@ suite("fold_constant_string_arithmatic") {
testFoldConst("select strleft(' Hello World', 5)")
testFoldConst("select strleft('Hello World ', 50)")
testFoldConst("select strleft(NULL, 1)")
+ testFoldConst("select strleft('😊😉👍', 2)")
+ testFoldConst("select strleft('αβγδ', 3)")
+ testFoldConst("select strleft('你好世界', 4)")
+ testFoldConst("select strleft('こんにちは世界', 5)")
+ testFoldConst("select strleft('안녕하세요', 3)")
+ testFoldConst("select strleft('привет', 4)")
+ testFoldConst("select strleft('שלום', 3)")
+ testFoldConst("select strleft('😊😉👍😊😉', 4)")
+ testFoldConst("select strleft('αβγδεζ', 4)")
+ testFoldConst("select strleft('你好呀,世界', 6)")
+ testFoldConst("select strleft('こんにちは、素晴らしい一日', 7)")
+ testFoldConst("select strleft('안녕하세요 여러분', 5)")
+ testFoldConst("select strleft('привет мир', 6)")
+ testFoldConst("select strleft('שלום עולם', 4)")
+ testFoldConst("select strleft(null, 2)")
+ testFoldConst("select strleft('😊😉👍😊😉👍', 0)")
+ testFoldConst("select strleft('αβγδεζη', -1)")
+ testFoldConst("select strleft('你好,美好的一天', -2)")
+ testFoldConst("select strleft('こんにちは、素晴らしい一日', -3)")
+ testFoldConst("select strleft('안녕하세요 여러분 안녕히가세요', -4)")
+ testFoldConst("select strleft('привет всем друзьям', -5)")
+ testFoldConst("select strleft('שלום לכל החברים', -3)")
+ testFoldConst("select strleft('', 2)")
+ testFoldConst("select strleft('😊😉', -1)")
+ testFoldConst("select strleft('αβ', 0)")
+ testFoldConst("select strleft('你好', -1)")
+ testFoldConst("select strleft('こんにちは', 0)")
+ testFoldConst("select strleft('안녕하세요', -1)")
+ testFoldConst("select strleft('привет', 0)")
+ testFoldConst("select strleft('שלום', -1)")
+ testFoldConst("select strleft('😊😉👍😊😉👍😊', 5)")
+ testFoldConst("select strleft('αβγδεζηθ', 5)")
+ testFoldConst("select strleft('你好,世界!欢迎', 6)")
+ testFoldConst("select strleft('こんにちは、世界!ようこそ', 7)")
+ testFoldConst("select strleft('안녕하세요 세계!', 5)")
+ testFoldConst("select strleft('привет, мир!', 6)")
+ testFoldConst("select strleft('שלום עולם!', 4)")
+ testFoldConst("select strleft('😊😉👍😊😉👍😊😉', 6)")
+ testFoldConst("select strleft('αβγδεζηθι', 6)")
+ testFoldConst("select strleft('你好呀,美好的世界', 7)")
+ testFoldConst("select strleft('こんにちは、素晴らしい世界よ', 8)")
+ testFoldConst("select strleft('안녕하세요, 아름다운 세상', 7)")
+ testFoldConst("select strleft('привет, прекрасный мир', 8)")
+ testFoldConst("select strleft('שלום לעולם יפה', 5)")
+ testFoldConst("select strleft('', -1)")
+ testFoldConst("select strleft('😊😉', 0)")
+ testFoldConst("select strleft('αβ', -1)")
+ testFoldConst("select strleft('你好', 0)")
+ testFoldConst("select strleft('こんにちは', -1)")
+ testFoldConst("select strleft('안녕하세요', 0)")
+ testFoldConst("select strleft('привет', -1)")
+ testFoldConst("select strleft('שלום', 0)")
+ testFoldConst("select strleft('привет', 2147483647)")
+ testFoldConst("select strleft('привет', 2147483648)")
// strright
testFoldConst("select strright('good morning', NULL)")
@@ -964,7 +1087,61 @@ suite("fold_constant_string_arithmatic") {
testFoldConst("select strright(' Hello World', 5)")
testFoldConst("select strright('Hello World ', 5)")
testFoldConst("select strright(NULL, 1)")
-
+ testFoldConst("select strright('😊😉👍', 2)")
+ testFoldConst("select strright('αβγδ', 3)")
+ testFoldConst("select strright('你好世界', 2)")
+ testFoldConst("select strright('こんにちは世界', 3)")
+ testFoldConst("select strright('안녕하세요', 3)")
+ testFoldConst("select strright('привет', 4)")
+ testFoldConst("select strright('שלום', 3)")
+ testFoldConst("select strright('😊😉👍😊😉', 4)")
+ testFoldConst("select strright('αβγδεζ', 4)")
+ testFoldConst("select strright('你好呀,世界', 6)")
+ testFoldConst("select strright('こんにちは、素晴らしい一日', 7)")
+ testFoldConst("select strright('안녕하세요 여러분', 5)")
+ testFoldConst("select strright('привет мир', 6)")
+ testFoldConst("select strright('שלום לכל החברים', 10)")
+ testFoldConst("select strright(null, 2)")
+ testFoldConst("select strright('😊😉👍😊😉👍', 0)")
+ testFoldConst("select strright('αβγδεζη', -1)")
+ testFoldConst("select strright('你好,美好的一天', -2)")
+ testFoldConst("select strright('こんにちは、素晴らしい一日', -3)")
+ testFoldConst("select strright('안녕하세요 여러분 안녕히가세요', -4)")
+ testFoldConst("select strright('привет всем друзьям', -5)")
+ testFoldConst("select strright('שלום עולם!', -3)")
+ testFoldConst("select strright('', 2)")
+ testFoldConst("select strright('😊😉', -1)")
+ testFoldConst("select strright('αβ', 0)")
+ testFoldConst("select strright('你好', -1)")
+ testFoldConst("select strright('こんにちは', 0)")
+ testFoldConst("select strright('안녕하세요', -1)")
+ testFoldConst("select strright('привет', 0)")
+ testFoldConst("select strright('שלום', -1)")
+ testFoldConst("select strright('😊😉👍😊😉👍😊', 5)")
+ testFoldConst("select strright('αβγδεζηθ', 5)")
+ testFoldConst("select strright('你好,世界!欢迎', 6)")
+ testFoldConst("select strright('こんにちは、世界!ようこそ', 7)")
+ testFoldConst("select strright('안녕하세요 세계!', 5)")
+ testFoldConst("select strright('привет, мир!', 6)")
+ testFoldConst("select strright('שלום עולם!', 4)")
+ testFoldConst("select strright('😊😉👍😊😉👍😊😉', 6)")
+ testFoldConst("select strright('αβγδεζηθι', 6)")
+ testFoldConst("select strright('你好呀,美好的世界', 7)")
+ testFoldConst("select strright('こんにちは、素晴らしい世界よ', 8)")
+ testFoldConst("select strright('안녕하세요, 아름다운 세상', 7)")
+ testFoldConst("select strright('привет, прекрасный мир', 8)")
+ testFoldConst("select strright('שלום לעולם יפה', 5)")
+ testFoldConst("select strright('', -1)")
+ testFoldConst("select strright('😊😉', 0)")
+ testFoldConst("select strright('αβ', -1)")
+ testFoldConst("select strright('你好', 0)")
+ testFoldConst("select strright('こんにちは', -1)")
+ testFoldConst("select strright('안녕하세요', 0)")
+ testFoldConst("select strright('привет', -1)")
+ testFoldConst("select strright('שלום', 0)")
+ testFoldConst("select strright('привет', 2147483647)")
+ testFoldConst("select strright('привет', 2147483648)")
+
// sub_replace
testFoldConst("select sub_replace(CAST('doris' AS STRING), CAST('***' AS
STRING), 1, 2)")
testFoldConst("select sub_replace(CAST('doris' AS STRING), CAST('***' AS
STRING), 1, 2)")
@@ -1541,5 +1718,10 @@ suite("fold_constant_string_arithmatic") {
testFoldConst("select
extract_url_parameter('http://user:[email protected]?🌍=b&c=d&e=f&g=h&i=j&k=l',
'')")
testFoldConst("select
extract_url_parameter('http://user:[email protected]?🌍=b&c=d&e=f&g=h&i=j&k=l',
null)")
+ // emoji
+ testFoldConst("select replace_empty('😀abc', '', 'def')")
+ testFoldConst("select split_by_string('a😁a😁a', '')")
+ testFoldConst("select character_length('a😁a😁a')")
+ testFoldConst("select replace_empty('a😁a😁a', '', '2')")
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]