This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.1 by this push:
     new b1305002039 [fix](Nereids) initcap constant folding should upper first 
character in all words (#49061) (#49342)
b1305002039 is described below

commit b1305002039faf34ee0f2077c3628ea0926450e3
Author: LiBinfeng <[email protected]>
AuthorDate: Fri Mar 21 18:03:28 2025 +0800

    [fix](Nereids) initcap constant folding should upper first character in all 
words (#49061) (#49342)
---
 .../functions/executable/StringArithmetic.java     |  11 +-
 .../fold_constant_string_arithmatic.groovy         | 138 +++++++++++++++++++++
 2 files changed, 142 insertions(+), 7 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/executable/StringArithmetic.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/executable/StringArithmetic.java
index 027f75bbc86..532ec04d8aa 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/executable/StringArithmetic.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/executable/StringArithmetic.java
@@ -412,12 +412,9 @@ public class StringArithmetic {
         return new IntegerLiteral(first.getValue().length());
     }
 
-    private static boolean isSeparator(char c) {
-        if (".$|()[{^?*+\\".indexOf(c) == -1) {
-            return false;
-        } else {
-            return true;
-        }
+    private static boolean isAlphabetic(char c) {
+        Pattern pattern = Pattern.compile("\\p{Alnum}");
+        return pattern.matcher(String.valueOf(c)).find();
     }
 
     /**
@@ -429,7 +426,7 @@ public class StringArithmetic {
         boolean capitalizeNext = true;
 
         for (char c : first.getValue().toCharArray()) {
-            if (Character.isWhitespace(c) || isSeparator(c)) {
+            if (Character.isWhitespace(c) || !isAlphabetic(c)) {
                 result.append(c);
                 capitalizeNext = true;  // Next character should be capitalized
             } else if (capitalizeNext) {
diff --git 
a/regression-test/suites/nereids_p0/expression/fold_constant/fold_constant_string_arithmatic.groovy
 
b/regression-test/suites/nereids_p0/expression/fold_constant/fold_constant_string_arithmatic.groovy
index 08f9fca9801..8d440289073 100644
--- 
a/regression-test/suites/nereids_p0/expression/fold_constant/fold_constant_string_arithmatic.groovy
+++ 
b/regression-test/suites/nereids_p0/expression/fold_constant/fold_constant_string_arithmatic.groovy
@@ -213,6 +213,144 @@ suite("fold_constant_string_arithmatic") {
     testFoldConst("select initcap(' hello world')")
     testFoldConst("select initcap('こんにちは')")
     testFoldConst("select initcap('上海天津北京杭州')")
+    testFoldConst("select initcap('ab')")
+    testFoldConst("select initcap('aBc')")
+    testFoldConst("select initcap('a,b,c')")
+    testFoldConst("select initcap('a;b;c')")
+    testFoldConst("select initcap(null)")
+    testFoldConst("select initcap('')")
+    testFoldConst("select initcap(123)")
+    testFoldConst("select initcap(0)")
+    testFoldConst("select initcap(true)")
+    testFoldConst("select initcap(' a ')")
+    testFoldConst("select initcap('中文字')")
+    testFoldConst("select initcap('<d83d><dc3c>abc')")
+    testFoldConst("select initcap('2023-01-01')")
+    testFoldConst("select initcap('aBcDeF')")
+    testFoldConst("select initcap('hello world!')")
+    testFoldConst("select initcap('123abcDEF')")
+    testFoldConst("select initcap(' ')")
+    testFoldConst("select initcap('null')")
+    testFoldConst("select initcap('ärger')")
+    testFoldConst("select initcap('über')")
+    testFoldConst("select initcap('a1!b2@c3#')")
+    testFoldConst("select initcap('john o''connor')")
+    testFoldConst("select initcap('mcdonald''s')")
+    testFoldConst("select initcap('abc-def')")
+    testFoldConst("select initcap('foo_bar')")
+    testFoldConst("select initcap(' test ')")
+    testFoldConst("select initcap('xyz,zyx')")
+    testFoldConst("select initcap('123 456')")
+    testFoldConst("select initcap('.,abc')")
+    testFoldConst("select initcap('[]test')")
+    testFoldConst("select initcap('<d83d><dc3c><d83d><dc3b>')")
+    testFoldConst("select initcap('aaAAaa')")
+    testFoldConst("select initcap(substring('abcd', 2))")
+    testFoldConst("select initcap(concat('a', '-test'))")
+    testFoldConst("select initcap('hello world')")
+    testFoldConst("select initcap('mixedCASE')")
+    testFoldConst("select initcap('UPPERCASE')")
+    testFoldConst("select initcap('lowercase')")
+    testFoldConst("select initcap('multiple spaces')")
+    testFoldConst("select initcap('hyphenated-word')")
+    testFoldConst("select initcap('under_score')")
+    testFoldConst("select initcap('dot.test')")
+    testFoldConst("select initcap('colon:test')")
+    testFoldConst("select initcap('semi;test')")
+    testFoldConst("select initcap('quote''test')")
+    testFoldConst("select initcap('slash/test')")
+    testFoldConst("select initcap('back\slash')")
+    testFoldConst("select initcap('emoji<d83d><dc3c>test')")
+    testFoldConst("select initcap('数字123test')")
+    testFoldConst("select initcap(' leading space')")
+    testFoldConst("select initcap('trailing space ')")
+    testFoldConst("select initcap(' multiple ')")
+    testFoldConst("select initcap('a.b.c.d')")
+    testFoldConst("select initcap('test-123-test')")
+    testFoldConst("select initcap('mixed_separators-here')")
+    testFoldConst("select initcap('ÄÖÜäöü')")
+    testFoldConst("select initcap('àçèñ')")
+    testFoldConst("select initcap('')")
+    testFoldConst("select initcap(' ')")
+    testFoldConst("select initcap('9am')")
+    testFoldConst("select initcap('sign')")
+    testFoldConst("select initcap('hash#tag')")
+    testFoldConst("select initcap('at@sign')")
+    testFoldConst("select initcap('caret^test')")
+    testFoldConst("select initcap('amp&test')")
+    testFoldConst("select initcap('star*test')")
+    testFoldConst("select initcap('plus+test')")
+    testFoldConst("select initcap('minus-test')")
+    testFoldConst("select initcap('equals=test')")
+    testFoldConst("select initcap('tilde~test')")
+    testFoldConst("select initcap('backtick`test')")
+    testFoldConst("select initcap('pipe|test')")
+    testFoldConst("select initcap('brace{test')")
+    testFoldConst("select initcap('bracket[test')")
+    testFoldConst("select initcap('less<test')")
+    testFoldConst("select initcap('greater>test')")
+    testFoldConst("select initcap('slash/test')")
+    testFoldConst("select initcap('question?test')")
+    testFoldConst("select initcap('space test')")
+    testFoldConst("select initcap('emoji<d83d><dc3c>mix')")
+    testFoldConst("select initcap('unicodeñtest')")
+    testFoldConst("select initcap('ÆØÅtest')")
+    testFoldConst("select initcap('çédîñ')")
+    testFoldConst("select initcap('русский')")
+    testFoldConst("select initcap('日本語')")
+    testFoldConst("select initcap('한글')")
+    testFoldConst("select initcap('ﺎﻠﻋﺮﺒﻳﺓ')")
+    testFoldConst("select initcap('<d83d><de0a>test')")
+    testFoldConst("select initcap('<d834><dd1e>music')")
+    testFoldConst("select initcap('<d83c><dd71>button')")
+    testFoldConst("select initcap('<d83c><ddfa><d83c><ddf8>flag')")
+    testFoldConst("select 
initcap('<d83d><dc68><d83d><dc69><d83d><dc67><d83d><dc66>family')")
+    testFoldConst("select initcap('<d83d><dd25>fire')")
+    testFoldConst("select initcap('<d83d><de80>rocket')")
+    testFoldConst("select initcap('<d83d><dcc5>2023')")
+    testFoldConst("select initcap('√square')")
+    testFoldConst("select initcap('∞infinity')")
+    testFoldConst("select initcap('µmicro')")
+    testFoldConst("select initcap('¶pilcrow')")
+    testFoldConst("select initcap('©copyright')")
+    testFoldConst("select initcap('®registered')")
+    testFoldConst("select initcap('™trademark')")
+    testFoldConst("select initcap('§section')")
+    testFoldConst("select initcap('°degree')")
+    testFoldConst("select initcap('±plusminus')")
+    testFoldConst("select initcap('×multiply')")
+    testFoldConst("select initcap('÷divide')")
+    testFoldConst("select initcap('¹superscript')")
+    testFoldConst("select initcap('₂subscript')")
+    testFoldConst("select initcap('Ωomega')")
+    testFoldConst("select initcap('∆delta')")
+    testFoldConst("select initcap('∑sum')")
+    testFoldConst("select initcap('∏product')")
+    testFoldConst("select initcap('∫integral')")
+    testFoldConst("select initcap('⌘command')")
+    testFoldConst("select initcap('⌥option')")
+    testFoldConst("select initcap('⇧shift')")
+    testFoldConst("select initcap('⌃control')")
+    testFoldConst("select initcap('⌦delete')")
+    testFoldConst("select initcap('⇨arrow')")
+    testFoldConst("select initcap('★star')")
+    testFoldConst("select initcap('☀sun')")
+    testFoldConst("select initcap('☔ umbrella')")
+    testFoldConst("select initcap('☎phone')")
+    testFoldConst("select initcap('✉email')")
+    testFoldConst("select initcap('✓check')")
+    testFoldConst("select initcap('✗cross')")
+    testFoldConst("select initcap('⚠warning')")
+    testFoldConst("select initcap('⏰ clock')")
+    testFoldConst("select initcap('<d83c><df82>cake')")
+    testFoldConst("select initcap('<d83c><df89>party')")
+    testFoldConst("select initcap('⚡ bolt')")
+    testFoldConst("select initcap('⛔ forbidden')")
+    testFoldConst("select initcap('✅ check')")
+    testFoldConst("select initcap('✈plane')")
+    testFoldConst("select initcap('❤heart')")
+    testFoldConst("select initcap('⏩ fast')")
+    testFoldConst("select initcap('<d83d><dd11>key')")
 
     // instr
     testFoldConst("select instr('上海天津北京杭州', '北京')")


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to