This is an automated email from the ASF dual-hosted git repository. yiguolei pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new df5bfe8f66b [fix](hyperscan) Fix hyper scan fall back to re2 (#44547) df5bfe8f66b is described below commit df5bfe8f66b2fb50285c1c0253abca207f4d7801 Author: zhiqiang <hezhiqi...@selectdb.com> AuthorDate: Wed Nov 27 10:37:29 2024 +0800 [fix](hyperscan) Fix hyper scan fall back to re2 (#44547) * Core modification When hyper scan failed, we should not set_error in FunctionContext. Since set_error will try cancel query, but actually we want to fall back to re2 in this case. * Some refactor Rename FunctionRegexp so that we can distinguish regexp match with regexp_extract. * Reproduce ```cpp SELECT * FROM regexp_test_chinese WHERE city REGEXP "^上海|^北京" ORDER BY id; ``` Note, the `|` in above sql is a Chinese character. --- be/src/vec/functions/function_regexp.cpp | 15 +++++----- be/src/vec/functions/like.cpp | 21 ++++++------- be/src/vec/functions/like.h | 4 +-- .../string_functions/test_regexp_chinese.out | 3 ++ .../string_functions/test_regexp_chinese.groovy | 35 ++++++++++++++++++++++ 5 files changed, 57 insertions(+), 21 deletions(-) diff --git a/be/src/vec/functions/function_regexp.cpp b/be/src/vec/functions/function_regexp.cpp index ae508120cf9..f740cac273c 100644 --- a/be/src/vec/functions/function_regexp.cpp +++ b/be/src/vec/functions/function_regexp.cpp @@ -383,12 +383,13 @@ struct RegexpExtractAllImpl { } }; +// template FunctionRegexpFunctionality is used for regexp_xxxx series functions, not for regexp match. template <typename Impl> -class FunctionRegexp : public IFunction { +class FunctionRegexpFunctionality : public IFunction { public: static constexpr auto name = Impl::name; - static FunctionPtr create() { return std::make_shared<FunctionRegexp>(); } + static FunctionPtr create() { return std::make_shared<FunctionRegexpFunctionality>(); } String get_name() const override { return name; } @@ -486,11 +487,11 @@ public: }; void register_function_regexp_extract(SimpleFunctionFactory& factory) { - factory.register_function<FunctionRegexp<RegexpReplaceImpl>>(); - factory.register_function<FunctionRegexp<RegexpExtractImpl<true>>>(); - factory.register_function<FunctionRegexp<RegexpExtractImpl<false>>>(); - factory.register_function<FunctionRegexp<RegexpReplaceOneImpl>>(); - factory.register_function<FunctionRegexp<RegexpExtractAllImpl>>(); + factory.register_function<FunctionRegexpFunctionality<RegexpReplaceImpl>>(); + factory.register_function<FunctionRegexpFunctionality<RegexpExtractImpl<true>>>(); + factory.register_function<FunctionRegexpFunctionality<RegexpExtractImpl<false>>>(); + factory.register_function<FunctionRegexpFunctionality<RegexpReplaceOneImpl>>(); + factory.register_function<FunctionRegexpFunctionality<RegexpExtractAllImpl>>(); } } // namespace doris::vectorized diff --git a/be/src/vec/functions/like.cpp b/be/src/vec/functions/like.cpp index 631ebb8dbe6..d727ba6e850 100644 --- a/be/src/vec/functions/like.cpp +++ b/be/src/vec/functions/like.cpp @@ -486,12 +486,10 @@ Status FunctionLikeBase::hs_prepare(FunctionContext* context, const char* expres if (res != HS_SUCCESS) { *database = nullptr; - if (context) { - context->set_error("hs_compile regex pattern error"); - } - return Status::RuntimeError("hs_compile regex pattern error:" + - std::string(compile_err->message)); + std::string error_message = compile_err->message; hs_free_compile_error(compile_err); + // Do not call FunctionContext::set_error here, since we do not want to cancel the query here. + return Status::RuntimeError<false>("hs_compile regex pattern error:" + error_message); } hs_free_compile_error(compile_err); @@ -499,10 +497,8 @@ Status FunctionLikeBase::hs_prepare(FunctionContext* context, const char* expres hs_free_database(*database); *database = nullptr; *scratch = nullptr; - if (context) { - context->set_error("hs_alloc_scratch allocate scratch space error"); - } - return Status::RuntimeError("hs_alloc_scratch allocate scratch space error"); + // Do not call FunctionContext::set_error here, since we do not want to cancel the query here. + return Status::RuntimeError<false>("hs_alloc_scratch allocate scratch space error"); } return Status::OK(); @@ -934,7 +930,8 @@ Status FunctionLike::open(FunctionContext* context, FunctionContext::FunctionSta return Status::OK(); } -Status FunctionRegexp::open(FunctionContext* context, FunctionContext::FunctionStateScope scope) { +Status FunctionRegexpLike::open(FunctionContext* context, + FunctionContext::FunctionStateScope scope) { if (scope != FunctionContext::THREAD_LOCAL) { return Status::OK(); } @@ -1001,8 +998,8 @@ void register_function_like(SimpleFunctionFactory& factory) { } void register_function_regexp(SimpleFunctionFactory& factory) { - factory.register_function<FunctionRegexp>(); - factory.register_alias(FunctionRegexp::name, FunctionRegexp::alias); + factory.register_function<FunctionRegexpLike>(); + factory.register_alias(FunctionRegexpLike::name, FunctionRegexpLike::alias); } } // namespace doris::vectorized diff --git a/be/src/vec/functions/like.h b/be/src/vec/functions/like.h index 229341e7bd1..fed26718603 100644 --- a/be/src/vec/functions/like.h +++ b/be/src/vec/functions/like.h @@ -280,12 +280,12 @@ private: static void remove_escape_character(std::string* search_string); }; -class FunctionRegexp : public FunctionLikeBase { +class FunctionRegexpLike : public FunctionLikeBase { public: static constexpr auto name = "regexp"; static constexpr auto alias = "rlike"; - static FunctionPtr create() { return std::make_shared<FunctionRegexp>(); } + static FunctionPtr create() { return std::make_shared<FunctionRegexpLike>(); } String get_name() const override { return name; } diff --git a/regression-test/data/query_p0/sql_functions/string_functions/test_regexp_chinese.out b/regression-test/data/query_p0/sql_functions/string_functions/test_regexp_chinese.out new file mode 100644 index 00000000000..27bb4af4d8a --- /dev/null +++ b/regression-test/data/query_p0/sql_functions/string_functions/test_regexp_chinese.out @@ -0,0 +1,3 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !sql_regexp -- + diff --git a/regression-test/suites/query_p0/sql_functions/string_functions/test_regexp_chinese.groovy b/regression-test/suites/query_p0/sql_functions/string_functions/test_regexp_chinese.groovy new file mode 100644 index 00000000000..900a0a04610 --- /dev/null +++ b/regression-test/suites/query_p0/sql_functions/string_functions/test_regexp_chinese.groovy @@ -0,0 +1,35 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_regexp_chinese") { + sql "DROP TABLE IF EXISTS regexp_test_chinese;" + sql """ + CREATE TABLE regexp_test_chinese ( + id int NULL DEFAULT "0", + city varchar(50) NOT NULL DEFAULT "" + ) DISTRIBUTED BY HASH(id) BUCKETS 5 properties("replication_num" = "1"); + """ + + sql """ + INSERT INTO regexp_test_chinese VALUES(1, "上海"),(2, "深圳"),(3, "上海测试"), (4, "北京测试"); + """ + + qt_sql_regexp """ + SELECT * FROM regexp_test_chinese WHERE city REGEXP "^上海|^北京" ORDER BY id; + """ +} + --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org