This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new df5bfe8f66b [fix](hyperscan) Fix hyper scan fall back to re2 (#44547)
df5bfe8f66b is described below

commit df5bfe8f66b2fb50285c1c0253abca207f4d7801
Author: zhiqiang <hezhiqi...@selectdb.com>
AuthorDate: Wed Nov 27 10:37:29 2024 +0800

    [fix](hyperscan) Fix hyper scan fall back to re2 (#44547)
    
    
    * Core modification
    When hyper scan failed, we should not set_error in FunctionContext.
    Since set_error will try cancel query, but actually we want to fall back
    to re2 in this case.
    
    * Some refactor
    Rename FunctionRegexp so that we can distinguish regexp match with
    regexp_extract.
    
    * Reproduce
    ```cpp
    SELECT * FROM regexp_test_chinese WHERE city REGEXP "^上海|^北京" ORDER BY id;
    ```
    Note, the `|` in above sql is a Chinese character.
---
 be/src/vec/functions/function_regexp.cpp           | 15 +++++-----
 be/src/vec/functions/like.cpp                      | 21 ++++++-------
 be/src/vec/functions/like.h                        |  4 +--
 .../string_functions/test_regexp_chinese.out       |  3 ++
 .../string_functions/test_regexp_chinese.groovy    | 35 ++++++++++++++++++++++
 5 files changed, 57 insertions(+), 21 deletions(-)

diff --git a/be/src/vec/functions/function_regexp.cpp 
b/be/src/vec/functions/function_regexp.cpp
index ae508120cf9..f740cac273c 100644
--- a/be/src/vec/functions/function_regexp.cpp
+++ b/be/src/vec/functions/function_regexp.cpp
@@ -383,12 +383,13 @@ struct RegexpExtractAllImpl {
     }
 };
 
+// template FunctionRegexpFunctionality is used for regexp_xxxx series 
functions, not for regexp match.
 template <typename Impl>
-class FunctionRegexp : public IFunction {
+class FunctionRegexpFunctionality : public IFunction {
 public:
     static constexpr auto name = Impl::name;
 
-    static FunctionPtr create() { return std::make_shared<FunctionRegexp>(); }
+    static FunctionPtr create() { return 
std::make_shared<FunctionRegexpFunctionality>(); }
 
     String get_name() const override { return name; }
 
@@ -486,11 +487,11 @@ public:
 };
 
 void register_function_regexp_extract(SimpleFunctionFactory& factory) {
-    factory.register_function<FunctionRegexp<RegexpReplaceImpl>>();
-    factory.register_function<FunctionRegexp<RegexpExtractImpl<true>>>();
-    factory.register_function<FunctionRegexp<RegexpExtractImpl<false>>>();
-    factory.register_function<FunctionRegexp<RegexpReplaceOneImpl>>();
-    factory.register_function<FunctionRegexp<RegexpExtractAllImpl>>();
+    
factory.register_function<FunctionRegexpFunctionality<RegexpReplaceImpl>>();
+    
factory.register_function<FunctionRegexpFunctionality<RegexpExtractImpl<true>>>();
+    
factory.register_function<FunctionRegexpFunctionality<RegexpExtractImpl<false>>>();
+    
factory.register_function<FunctionRegexpFunctionality<RegexpReplaceOneImpl>>();
+    
factory.register_function<FunctionRegexpFunctionality<RegexpExtractAllImpl>>();
 }
 
 } // namespace doris::vectorized
diff --git a/be/src/vec/functions/like.cpp b/be/src/vec/functions/like.cpp
index 631ebb8dbe6..d727ba6e850 100644
--- a/be/src/vec/functions/like.cpp
+++ b/be/src/vec/functions/like.cpp
@@ -486,12 +486,10 @@ Status FunctionLikeBase::hs_prepare(FunctionContext* 
context, const char* expres
 
     if (res != HS_SUCCESS) {
         *database = nullptr;
-        if (context) {
-            context->set_error("hs_compile regex pattern error");
-        }
-        return Status::RuntimeError("hs_compile regex pattern error:" +
-                                    std::string(compile_err->message));
+        std::string error_message = compile_err->message;
         hs_free_compile_error(compile_err);
+        // Do not call FunctionContext::set_error here, since we do not want 
to cancel the query here.
+        return Status::RuntimeError<false>("hs_compile regex pattern error:" + 
error_message);
     }
     hs_free_compile_error(compile_err);
 
@@ -499,10 +497,8 @@ Status FunctionLikeBase::hs_prepare(FunctionContext* 
context, const char* expres
         hs_free_database(*database);
         *database = nullptr;
         *scratch = nullptr;
-        if (context) {
-            context->set_error("hs_alloc_scratch allocate scratch space 
error");
-        }
-        return Status::RuntimeError("hs_alloc_scratch allocate scratch space 
error");
+        // Do not call FunctionContext::set_error here, since we do not want 
to cancel the query here.
+        return Status::RuntimeError<false>("hs_alloc_scratch allocate scratch 
space error");
     }
 
     return Status::OK();
@@ -934,7 +930,8 @@ Status FunctionLike::open(FunctionContext* context, 
FunctionContext::FunctionSta
     return Status::OK();
 }
 
-Status FunctionRegexp::open(FunctionContext* context, 
FunctionContext::FunctionStateScope scope) {
+Status FunctionRegexpLike::open(FunctionContext* context,
+                                FunctionContext::FunctionStateScope scope) {
     if (scope != FunctionContext::THREAD_LOCAL) {
         return Status::OK();
     }
@@ -1001,8 +998,8 @@ void register_function_like(SimpleFunctionFactory& 
factory) {
 }
 
 void register_function_regexp(SimpleFunctionFactory& factory) {
-    factory.register_function<FunctionRegexp>();
-    factory.register_alias(FunctionRegexp::name, FunctionRegexp::alias);
+    factory.register_function<FunctionRegexpLike>();
+    factory.register_alias(FunctionRegexpLike::name, 
FunctionRegexpLike::alias);
 }
 
 } // namespace doris::vectorized
diff --git a/be/src/vec/functions/like.h b/be/src/vec/functions/like.h
index 229341e7bd1..fed26718603 100644
--- a/be/src/vec/functions/like.h
+++ b/be/src/vec/functions/like.h
@@ -280,12 +280,12 @@ private:
     static void remove_escape_character(std::string* search_string);
 };
 
-class FunctionRegexp : public FunctionLikeBase {
+class FunctionRegexpLike : public FunctionLikeBase {
 public:
     static constexpr auto name = "regexp";
     static constexpr auto alias = "rlike";
 
-    static FunctionPtr create() { return std::make_shared<FunctionRegexp>(); }
+    static FunctionPtr create() { return 
std::make_shared<FunctionRegexpLike>(); }
 
     String get_name() const override { return name; }
 
diff --git 
a/regression-test/data/query_p0/sql_functions/string_functions/test_regexp_chinese.out
 
b/regression-test/data/query_p0/sql_functions/string_functions/test_regexp_chinese.out
new file mode 100644
index 00000000000..27bb4af4d8a
--- /dev/null
+++ 
b/regression-test/data/query_p0/sql_functions/string_functions/test_regexp_chinese.out
@@ -0,0 +1,3 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !sql_regexp --
+
diff --git 
a/regression-test/suites/query_p0/sql_functions/string_functions/test_regexp_chinese.groovy
 
b/regression-test/suites/query_p0/sql_functions/string_functions/test_regexp_chinese.groovy
new file mode 100644
index 00000000000..900a0a04610
--- /dev/null
+++ 
b/regression-test/suites/query_p0/sql_functions/string_functions/test_regexp_chinese.groovy
@@ -0,0 +1,35 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_regexp_chinese") {
+    sql "DROP TABLE IF EXISTS regexp_test_chinese;"
+    sql """
+        CREATE TABLE regexp_test_chinese (
+            id int NULL DEFAULT "0",
+            city varchar(50) NOT NULL DEFAULT ""
+        ) DISTRIBUTED BY HASH(id) BUCKETS 5 properties("replication_num" = 
"1");
+    """
+
+    sql """
+        INSERT INTO regexp_test_chinese VALUES(1, "上海"),(2, "深圳"),(3, "上海测试"), 
(4, "北京测试");
+    """
+
+    qt_sql_regexp """
+        SELECT * FROM regexp_test_chinese WHERE city REGEXP "^上海|^北京" ORDER BY 
id;
+    """
+}
+


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to