This is an automated email from the ASF dual-hosted git repository. kxiao pushed a commit to branch branch-2.0 in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.0 by this push: new 5e1fd2ff35f [fix](Nereids): support Chinese characters set (#28256) (#28357) 5e1fd2ff35f is described below commit 5e1fd2ff35f541f59e5c51dcfa6d02a608a6ed7d Author: jakevin <jakevin...@gmail.com> AuthorDate: Sat Dec 16 19:36:07 2023 +0800 [fix](Nereids): support Chinese characters set (#28256) (#28357) --- .../antlr4/org/apache/doris/nereids/DorisLexer.g4 | 5 ++--- .../nereids_syntax_p0/chinese_characters_set.out | 4 ++++ .../chinese_characters_set.groovy | 23 ++++++++++++++++++++++ 3 files changed, 29 insertions(+), 3 deletions(-) diff --git a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisLexer.g4 b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisLexer.g4 index b39fc69b44f..abf6e4bb27f 100644 --- a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisLexer.g4 +++ b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisLexer.g4 @@ -631,9 +631,8 @@ fragment DIGIT fragment LETTER : [a-zA-Z$_] // these are the "java letters" below 0x7F - | // covers all characters above 0x7F which are not a surrogate - ~[\u0000-\u007F\uD800-\uDBFF] - {Character.isJavaIdentifierStart(_input.LA(-1))}? + | ~[\u0000-\u007F\uD800-\uDBFF] // covers all characters above 0x7F which are not a surrogate + | [\uD800-\uDBFF] [\uDC00-\uDFFF] // covers UTF-16 surrogate pairs encodings for U+10000 to U+10FFFF ; SIMPLE_COMMENT diff --git a/regression-test/data/nereids_syntax_p0/chinese_characters_set.out b/regression-test/data/nereids_syntax_p0/chinese_characters_set.out new file mode 100644 index 00000000000..b66acea9495 --- /dev/null +++ b/regression-test/data/nereids_syntax_p0/chinese_characters_set.out @@ -0,0 +1,4 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !chinese_characters_set -- +1 + diff --git a/regression-test/suites/nereids_syntax_p0/chinese_characters_set.groovy b/regression-test/suites/nereids_syntax_p0/chinese_characters_set.groovy new file mode 100644 index 00000000000..8cfb1141e47 --- /dev/null +++ b/regression-test/suites/nereids_syntax_p0/chinese_characters_set.groovy @@ -0,0 +1,23 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("chinese_characters_set") { + sql "SET enable_nereids_planner=true" + sql "SET enable_fallback_to_original_planner=false" + + qt_chinese_characters_set "SELECT 1 as 中文字符(!。,?;¥" +} --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org