This is an automated email from the ASF dual-hosted git repository.

kxiao pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.0 by this push:
     new 5e1fd2ff35f [fix](Nereids): support Chinese characters set (#28256) 
(#28357)
5e1fd2ff35f is described below

commit 5e1fd2ff35f541f59e5c51dcfa6d02a608a6ed7d
Author: jakevin <jakevin...@gmail.com>
AuthorDate: Sat Dec 16 19:36:07 2023 +0800

    [fix](Nereids): support Chinese characters set (#28256) (#28357)
---
 .../antlr4/org/apache/doris/nereids/DorisLexer.g4  |  5 ++---
 .../nereids_syntax_p0/chinese_characters_set.out   |  4 ++++
 .../chinese_characters_set.groovy                  | 23 ++++++++++++++++++++++
 3 files changed, 29 insertions(+), 3 deletions(-)

diff --git a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisLexer.g4 
b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisLexer.g4
index b39fc69b44f..abf6e4bb27f 100644
--- a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisLexer.g4
+++ b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisLexer.g4
@@ -631,9 +631,8 @@ fragment DIGIT
 
 fragment LETTER
     : [a-zA-Z$_] // these are the "java letters" below 0x7F
-    |   // covers all characters above 0x7F which are not a surrogate
-    ~[\u0000-\u007F\uD800-\uDBFF]
-     {Character.isJavaIdentifierStart(_input.LA(-1))}?
+    | ~[\u0000-\u007F\uD800-\uDBFF] // covers all characters above 0x7F which 
are not a surrogate
+    | [\uD800-\uDBFF] [\uDC00-\uDFFF] // covers UTF-16 surrogate pairs 
encodings for U+10000 to U+10FFFF
     ;
 
 SIMPLE_COMMENT
diff --git a/regression-test/data/nereids_syntax_p0/chinese_characters_set.out 
b/regression-test/data/nereids_syntax_p0/chinese_characters_set.out
new file mode 100644
index 00000000000..b66acea9495
--- /dev/null
+++ b/regression-test/data/nereids_syntax_p0/chinese_characters_set.out
@@ -0,0 +1,4 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !chinese_characters_set --
+1
+
diff --git 
a/regression-test/suites/nereids_syntax_p0/chinese_characters_set.groovy 
b/regression-test/suites/nereids_syntax_p0/chinese_characters_set.groovy
new file mode 100644
index 00000000000..8cfb1141e47
--- /dev/null
+++ b/regression-test/suites/nereids_syntax_p0/chinese_characters_set.groovy
@@ -0,0 +1,23 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("chinese_characters_set") {
+    sql "SET enable_nereids_planner=true"
+    sql "SET enable_fallback_to_original_planner=false"
+
+    qt_chinese_characters_set "SELECT 1 as 中文字符(!。,?;¥"
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to