This is an automated email from the ASF dual-hosted git repository. morningman pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new b83744d2f6 [feature](function)add regexp functions: regexp_replace_one, regexp_extract_all (#13766) b83744d2f6 is described below commit b83744d2f6a581e4c03b430e218d7905e7e09373 Author: qiye <jianliang5...@gmail.com> AuthorDate: Wed Nov 2 23:15:57 2022 +0800 [feature](function)add regexp functions: regexp_replace_one, regexp_extract_all (#13766) --- be/src/vec/functions/function_regexp.cpp | 263 +++++++++++++++------ be/test/vec/function/function_like_test.cpp | 61 +++++ .../string-functions/regexp/regexp_extract.md | 2 +- .../string-functions/regexp/regexp_extract_all.md | 61 +++++ .../string-functions/regexp/regexp_replace_one.md | 54 +++++ docs/sidebars.json | 2 + .../string-functions/regexp/regexp_extract_all.md | 59 +++++ .../string-functions/regexp/regexp_replace_one.md | 54 +++++ gensrc/script/doris_builtins_functions.py | 8 + .../test_string_function_regexp.out | 15 ++ .../test_string_function_regexp.groovy | 6 + 11 files changed, 512 insertions(+), 73 deletions(-) diff --git a/be/src/vec/functions/function_regexp.cpp b/be/src/vec/functions/function_regexp.cpp index c99bb84d31..0fc3681a1e 100644 --- a/be/src/vec/functions/function_regexp.cpp +++ b/be/src/vec/functions/function_regexp.cpp @@ -29,92 +29,51 @@ #include "vec/utils/util.hpp" namespace doris::vectorized { -template <typename Impl> -class FunctionRegexp : public IFunction { -public: - static constexpr auto name = Impl::name; - - static FunctionPtr create() { return std::make_shared<FunctionRegexp>(); } - - String get_name() const override { return name; } - - bool use_default_implementation_for_constants() const override { return false; } - - bool use_default_implementation_for_nulls() const override { return false; } - - size_t get_number_of_arguments() const override { return 3; } +struct RegexpReplaceImpl { + static constexpr auto name = "regexp_replace"; - DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { - return make_nullable(std::make_shared<DataTypeString>()); - } + static Status execute_impl(FunctionContext* context, ColumnPtr argument_columns[], + size_t input_rows_count, ColumnString::Chars& result_data, + ColumnString::Offsets& result_offset, NullMap& null_map) { + const auto* str_col = check_and_get_column<ColumnString>(argument_columns[0].get()); + const auto* pattern_col = check_and_get_column<ColumnString>(argument_columns[1].get()); + const auto* replace_col = check_and_get_column<ColumnString>(argument_columns[2].get()); - Status prepare(FunctionContext* context, FunctionContext::FunctionStateScope scope) override { - if (scope == FunctionContext::THREAD_LOCAL) { - if (context->is_col_constant(1)) { - DCHECK(!context->get_function_state(scope)); - const auto pattern_col = context->get_constant_col(1)->column_ptr; - const auto& pattern = pattern_col->get_data_at(0).to_string_val(); - if (pattern.is_null) { - return Status::OK(); - } + for (int i = 0; i < input_rows_count; ++i) { + if (null_map[i]) { + StringOP::push_null_string(i, result_data, result_offset, null_map); + continue; + } + re2::RE2* re = reinterpret_cast<re2::RE2*>( + context->get_function_state(FunctionContext::THREAD_LOCAL)); + std::unique_ptr<re2::RE2> scoped_re; // destroys re if state->re is nullptr + if (re == nullptr) { std::string error_str; - re2::RE2* re = - StringFunctions::compile_regex(pattern, &error_str, StringVal::null()); + const auto& pattern = pattern_col->get_data_at(i).to_string_val(); + re = StringFunctions::compile_regex(pattern, &error_str, StringVal::null()); if (re == nullptr) { - context->set_error(error_str.c_str()); - return Status::InvalidArgument(error_str); + context->add_warning(error_str.c_str()); + StringOP::push_null_string(i, result_data, result_offset, null_map); + continue; } - context->set_function_state(scope, re); + scoped_re.reset(re); } - } - return Status::OK(); - } - Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) override { - size_t argument_size = arguments.size(); - ColumnPtr argument_columns[argument_size]; - auto result_null_map = ColumnUInt8::create(input_rows_count, 0); - auto result_data_column = ColumnString::create(); - - auto& result_data = result_data_column->get_chars(); - auto& result_offset = result_data_column->get_offsets(); - result_offset.resize(input_rows_count); + re2::StringPiece replace_str = + re2::StringPiece(replace_col->get_data_at(i).to_string_view()); - for (int i = 0; i < argument_size; ++i) { - argument_columns[i] = - block.get_by_position(arguments[i]).column->convert_to_full_column_if_const(); - if (auto* nullable = check_and_get_column<ColumnNullable>(*argument_columns[i])) { - VectorizedUtils::update_null_map(result_null_map->get_data(), - nullable->get_null_map_data()); - argument_columns[i] = nullable->get_nested_column_ptr(); - } + std::string result_str(str_col->get_data_at(i).to_string()); + re2::RE2::GlobalReplace(&result_str, *re, replace_str); + StringOP::push_value_string(result_str, i, result_data, result_offset); } - Impl::execute_impl(context, argument_columns, input_rows_count, result_data, result_offset, - result_null_map->get_data()); - - block.get_by_position(result).column = - ColumnNullable::create(std::move(result_data_column), std::move(result_null_map)); - return Status::OK(); - } - - Status close(FunctionContext* context, FunctionContext::FunctionStateScope scope) override { - if (scope == FunctionContext::THREAD_LOCAL) { - if (context->is_col_constant(1)) { - re2::RE2* re = reinterpret_cast<re2::RE2*>(context->get_function_state(scope)); - DCHECK(re); - delete re; - context->set_function_state(scope, nullptr); - } - } return Status::OK(); } }; -struct RegexpReplaceImpl { - static constexpr auto name = "regexp_replace"; +struct RegexpReplaceOneImpl { + static constexpr auto name = "regexp_replace_one"; static Status execute_impl(FunctionContext* context, ColumnPtr argument_columns[], size_t input_rows_count, ColumnString::Chars& result_data, @@ -148,7 +107,7 @@ struct RegexpReplaceImpl { re2::StringPiece(replace_col->get_data_at(i).to_string_view()); std::string result_str(str_col->get_data_at(i).to_string()); - re2::RE2::GlobalReplace(&result_str, *re, replace_str); + re2::RE2::Replace(&result_str, *re, replace_str); StringOP::push_value_string(result_str, i, result_data, result_offset); } @@ -215,9 +174,169 @@ struct RegexpExtractImpl { } }; +struct RegexpExtractAllImpl { + static constexpr auto name = "regexp_extract_all"; + + size_t get_number_of_arguments() const { return 2; } + + static Status execute_impl(FunctionContext* context, ColumnPtr argument_columns[], + size_t input_rows_count, ColumnString::Chars& result_data, + ColumnString::Offsets& result_offset, NullMap& null_map) { + const auto* str_col = check_and_get_column<ColumnString>(argument_columns[0].get()); + const auto* pattern_col = check_and_get_column<ColumnString>(argument_columns[1].get()); + for (int i = 0; i < input_rows_count; ++i) { + if (null_map[i]) { + StringOP::push_null_string(i, result_data, result_offset, null_map); + continue; + } + + re2::RE2* re = reinterpret_cast<re2::RE2*>( + context->get_function_state(FunctionContext::THREAD_LOCAL)); + std::unique_ptr<re2::RE2> scoped_re; + if (re == nullptr) { + std::string error_str; + const auto& pattern = pattern_col->get_data_at(i).to_string_val(); + re = StringFunctions::compile_regex(pattern, &error_str, StringVal::null()); + if (re == nullptr) { + context->add_warning(error_str.c_str()); + StringOP::push_null_string(i, result_data, result_offset, null_map); + continue; + } + scoped_re.reset(re); + } + const auto& str = str_col->get_data_at(i); + int max_matches = 1 + re->NumberOfCapturingGroups(); + std::vector<re2::StringPiece> res_matches; + size_t pos = 0; + while (pos < str.size) { + auto str_pos = str.data + pos; + auto str_size = str.size - pos; + re2::StringPiece str_sp = re2::StringPiece(str_pos, str_size); + std::vector<re2::StringPiece> matches(max_matches); + bool success = re->Match(str_sp, 0, str_size, re2::RE2::UNANCHORED, &matches[0], + max_matches); + if (!success) { + StringOP::push_empty_string(i, result_data, result_offset); + break; + } + res_matches.push_back(matches[1]); + auto offset = + std::string(str_pos, str_size).find(std::string(matches[0].as_string())); + pos += offset + matches[0].size(); + } + + if (res_matches.empty()) { + continue; + } + + std::string res = "["; + for (int j = 0; j < res_matches.size(); ++j) { + res += "'" + res_matches[j].as_string() + "'"; + if (j < res_matches.size() - 1) { + res += ","; + } + } + res += "]"; + StringOP::push_value_string(std::string_view(res), i, result_data, result_offset); + } + return Status::OK(); + } +}; + +template <typename Impl> +class FunctionRegexp : public IFunction { +public: + static constexpr auto name = Impl::name; + + static FunctionPtr create() { return std::make_shared<FunctionRegexp>(); } + + String get_name() const override { return name; } + + bool use_default_implementation_for_constants() const override { return false; } + + bool use_default_implementation_for_nulls() const override { return false; } + + size_t get_number_of_arguments() const override { + if constexpr (std::is_same_v<Impl, RegexpExtractAllImpl>) { + return 2; + } + return 3; + } + + DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { + return make_nullable(std::make_shared<DataTypeString>()); + } + + Status prepare(FunctionContext* context, FunctionContext::FunctionStateScope scope) override { + if (scope == FunctionContext::THREAD_LOCAL) { + if (context->is_col_constant(1)) { + DCHECK(!context->get_function_state(scope)); + const auto pattern_col = context->get_constant_col(1)->column_ptr; + const auto& pattern = pattern_col->get_data_at(0).to_string_val(); + if (pattern.is_null) { + return Status::OK(); + } + + std::string error_str; + re2::RE2* re = + StringFunctions::compile_regex(pattern, &error_str, StringVal::null()); + if (re == nullptr) { + context->set_error(error_str.c_str()); + return Status::InvalidArgument(error_str); + } + context->set_function_state(scope, re); + } + } + return Status::OK(); + } + + Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, + size_t result, size_t input_rows_count) override { + size_t argument_size = arguments.size(); + ColumnPtr argument_columns[argument_size]; + auto result_null_map = ColumnUInt8::create(input_rows_count, 0); + auto result_data_column = ColumnString::create(); + + auto& result_data = result_data_column->get_chars(); + auto& result_offset = result_data_column->get_offsets(); + result_offset.resize(input_rows_count); + + for (int i = 0; i < argument_size; ++i) { + argument_columns[i] = + block.get_by_position(arguments[i]).column->convert_to_full_column_if_const(); + if (auto* nullable = check_and_get_column<ColumnNullable>(*argument_columns[i])) { + VectorizedUtils::update_null_map(result_null_map->get_data(), + nullable->get_null_map_data()); + argument_columns[i] = nullable->get_nested_column_ptr(); + } + } + + Impl::execute_impl(context, argument_columns, input_rows_count, result_data, result_offset, + result_null_map->get_data()); + + block.get_by_position(result).column = + ColumnNullable::create(std::move(result_data_column), std::move(result_null_map)); + return Status::OK(); + } + + Status close(FunctionContext* context, FunctionContext::FunctionStateScope scope) override { + if (scope == FunctionContext::THREAD_LOCAL) { + if (context->is_col_constant(1)) { + re2::RE2* re = reinterpret_cast<re2::RE2*>(context->get_function_state(scope)); + DCHECK(re); + delete re; + context->set_function_state(scope, nullptr); + } + } + return Status::OK(); + } +}; + void register_function_regexp_extract(SimpleFunctionFactory& factory) { factory.register_function<FunctionRegexp<RegexpReplaceImpl>>(); factory.register_function<FunctionRegexp<RegexpExtractImpl>>(); + factory.register_function<FunctionRegexp<RegexpReplaceOneImpl>>(); + factory.register_function<FunctionRegexp<RegexpExtractAllImpl>>(); } } // namespace doris::vectorized diff --git a/be/test/vec/function/function_like_test.cpp b/be/test/vec/function/function_like_test.cpp index 2c8299e274..2162c4b99e 100644 --- a/be/test/vec/function/function_like_test.cpp +++ b/be/test/vec/function/function_like_test.cpp @@ -137,6 +137,37 @@ TEST(FunctionLikeTest, regexp_extract) { } } +TEST(FunctionLikeTest, regexp_extract_all) { + std::string func_name = "regexp_extract_all"; + + DataSet data_set = { + {{std::string("x=a3&x=18abc&x=2&y=3&x=4&x=17bcd"), std::string("x=([0-9]+)([a-z]+)")}, + std::string("['18','17']")}, + {{std::string("x=a3&x=18abc&x=2&y=3&x=4"), std::string("^x=([a-z]+)([0-9]+)")}, + std::string("['a']")}, + {{std::string("http://a.m.baidu.com/i41915173660.htm"), std::string("i([0-9]+)")}, + std::string("['41915173660']")}, + {{std::string("http://a.m.baidu.com/i41915i73660.htm"), std::string("i([0-9]+)")}, + std::string("['41915','73660']")}, + + {{std::string("hitdecisiondlist"), std::string("(i)(.*?)(e)")}, std::string("['i']")}, + {{std::string("hitdecisioendlist"), std::string("(i)(.*?)(e)")}, + std::string("['i','i']")}, + {{std::string("hitdecisioendliset"), std::string("(i)(.*?)(e)")}, + std::string("['i','i','i']")}, + // null + {{std::string("abc"), Null()}, Null()}, + {{Null(), std::string("i([0-9]+)")}, Null()}}; + + // pattern is constant value + InputTypeSet const_pattern_input_types = {TypeIndex::String, Consted {TypeIndex::String}}; + for (const auto& line : data_set) { + DataSet const_pattern_dataset = {line}; + check_function<DataTypeString, true>(func_name, const_pattern_input_types, + const_pattern_dataset); + } +} + TEST(FunctionLikeTest, regexp_replace) { std::string func_name = "regexp_replace"; @@ -167,4 +198,34 @@ TEST(FunctionLikeTest, regexp_replace) { } } +TEST(FunctionLikeTest, regexp_replace_one) { + std::string func_name = "regexp_replace_one"; + + DataSet data_set = { + {{std::string("2022-03-02"), std::string("-"), std::string("")}, + std::string("202203-02")}, + {{std::string("2022-03-02"), std::string(""), std::string("s")}, + std::string("s2022-03-02")}, + {{std::string("100-200"), std::string("(\\d+)"), std::string("doris")}, + std::string("doris-200")}, + + {{std::string("a b c"), std::string(" "), std::string("-")}, std::string("a-b c")}, + {{std::string("a b c"), std::string("(b)"), std::string("<\\1>")}, + std::string("a <b> c")}, + {{std::string("qwewe"), std::string(""), std::string("true")}, + std::string("trueqwewe")}, + // null + {{std::string("abc"), std::string("x=18abc"), Null()}, Null()}, + {{Null(), std::string("i([0-9]+)"), std::string("x=18abc")}, Null()}}; + + // pattern is constant value + InputTypeSet const_pattern_input_types = {TypeIndex::String, Consted {TypeIndex::String}, + TypeIndex::String}; + for (const auto& line : data_set) { + DataSet const_pattern_dataset = {line}; + check_function<DataTypeString, true>(func_name, const_pattern_input_types, + const_pattern_dataset); + } +} + } // namespace doris::vectorized diff --git a/docs/en/docs/sql-manual/sql-functions/string-functions/regexp/regexp_extract.md b/docs/en/docs/sql-manual/sql-functions/string-functions/regexp/regexp_extract.md index e2d59ee446..ffc4d5423d 100644 --- a/docs/en/docs/sql-manual/sql-functions/string-functions/regexp/regexp_extract.md +++ b/docs/en/docs/sql-manual/sql-functions/string-functions/regexp/regexp_extract.md @@ -28,7 +28,7 @@ under the License. ### Description #### Syntax -'VARCHAR regexp 'extract (VARCHAR str, VARCHAR pattern, int pos) +`VARCHAR regexp_extract (VARCHAR str, VARCHAR pattern, int pos)` The string STR is matched regularly and the POS matching part which conforms to pattern is extracted. Patterns need to match exactly some part of the STR to return to the matching part of the pattern. If there is no match, return an empty string. diff --git a/docs/en/docs/sql-manual/sql-functions/string-functions/regexp/regexp_extract_all.md b/docs/en/docs/sql-manual/sql-functions/string-functions/regexp/regexp_extract_all.md new file mode 100644 index 0000000000..c63ea34074 --- /dev/null +++ b/docs/en/docs/sql-manual/sql-functions/string-functions/regexp/regexp_extract_all.md @@ -0,0 +1,61 @@ +--- +{ + "title": "regexp_extract_all", + "language": "en" +} +--- + +<!-- +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, +software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, either express or implied. See the License for the +specific language governing permissions and limitations +under the License. +--> + +## regexp_extract_all +### Description +#### Syntax + +`VARCHAR regexp_extract_all (VARCHAR str, VARCHAR pattern)` + +Regularly matches a string str and extracts the first sub-pattern matching part of pattern. The pattern needs to exactly match a part of str in order to return an array of strings for the part of the pattern that needs to be matched. If there is no match, the empty string is returned. + +### example + +``` +mysql> SELECT regexp_extract_all('AbCdE', '([[:lower:]]+)C([[:lower:]]+)'); ++--------------------------------------------------------------+ +| regexp_extract_all('AbCdE', '([[:lower:]]+)C([[:lower:]]+)') | ++--------------------------------------------------------------+ +| ['b'] | ++--------------------------------------------------------------+ + +mysql> SELECT regexp_extract_all('AbCdEfCg', '([[:lower:]]+)C([[:lower:]]+)'); ++-----------------------------------------------------------------+ +| regexp_extract_all('AbCdEfCg', '([[:lower:]]+)C([[:lower:]]+)') | ++-----------------------------------------------------------------+ +| ['b','f'] | ++-----------------------------------------------------------------+ + +mysql> SELECT regexp_extract_all('abc=111, def=222, ghi=333','("[^"]+"|\\w+)=("[^"]+"|\\w+)'); ++--------------------------------------------------------------------------------+ +| regexp_extract_all('abc=111, def=222, ghi=333', '("[^"]+"|\w+)=("[^"]+"|\w+)') | ++--------------------------------------------------------------------------------+ +| ['abc','def','ghi'] | ++--------------------------------------------------------------------------------+ +``` + +### keywords + REGEXP_EXTRACT_ALL,REGEXP,EXTRACT,ALL diff --git a/docs/en/docs/sql-manual/sql-functions/string-functions/regexp/regexp_replace_one.md b/docs/en/docs/sql-manual/sql-functions/string-functions/regexp/regexp_replace_one.md new file mode 100644 index 0000000000..9bc7347955 --- /dev/null +++ b/docs/en/docs/sql-manual/sql-functions/string-functions/regexp/regexp_replace_one.md @@ -0,0 +1,54 @@ +--- +{ + "title": "regexp_replace_one", + "language": "en" +} +--- + +<!-- +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, +software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, either express or implied. See the License for the +specific language governing permissions and limitations +under the License. +--> + +## regexp_replace_one +### description +#### Syntax + +`VARCHAR regexp_replace_one(VARCHAR str, VARCHAR pattern, VARCHAR repl)` + + +Regular matching of STR strings, replacing the part hitting pattern with repl, replacing only the first match. + +### example + +``` +mysql> SELECT regexp_replace_one('a b c', " ", "-"); ++-----------------------------------+ +| regexp_replace_one('a b c', ' ', '-') | ++-----------------------------------+ +| a-b c | ++-----------------------------------+ + +mysql> SELECT regexp_replace_one('a b b','(b)','<\\1>'); ++----------------------------------------+ +| regexp_replace_one('a b b', '(b)', '<\1>') | ++----------------------------------------+ +| a <b> b | ++----------------------------------------+ +``` +### keywords + REGEXP_REPLACE_ONE,REGEXP,REPLACE,ONE diff --git a/docs/sidebars.json b/docs/sidebars.json index ebf2da3ec4..1f91df2d29 100644 --- a/docs/sidebars.json +++ b/docs/sidebars.json @@ -413,7 +413,9 @@ "items": [ "sql-manual/sql-functions/string-functions/regexp/regexp", "sql-manual/sql-functions/string-functions/regexp/regexp_extract", + "sql-manual/sql-functions/string-functions/regexp/regexp_extract_all", "sql-manual/sql-functions/string-functions/regexp/regexp_replace", + "sql-manual/sql-functions/string-functions/regexp/regexp_replace_one", "sql-manual/sql-functions/string-functions/regexp/not_regexp" ] } diff --git a/docs/zh-CN/docs/sql-manual/sql-functions/string-functions/regexp/regexp_extract_all.md b/docs/zh-CN/docs/sql-manual/sql-functions/string-functions/regexp/regexp_extract_all.md new file mode 100644 index 0000000000..e0b63c4c5a --- /dev/null +++ b/docs/zh-CN/docs/sql-manual/sql-functions/string-functions/regexp/regexp_extract_all.md @@ -0,0 +1,59 @@ +--- +{ + "title": "regexp_extract_all", + "language": "zh-CN" +} +--- + +<!-- +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, +software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, either express or implied. See the License for the +specific language governing permissions and limitations +under the License. +--> + +## regexp_extract_all +### description +#### Syntax + +`VARCHAR regexp_extract_all(VARCHAR str, VARCHAR pattern)` + +对字符串 str 进行正则匹配,抽取符合 pattern 的第一个子模式匹配部分。需要 pattern 完全匹配 str 中的某部分,这样才能返回 pattern 部分中需匹配部分的字符串数组。如果没有匹配,返回空字符串。 + +### example + +``` +mysql> SELECT regexp_extract_all('AbCdE', '([[:lower:]]+)C([[:lower:]]+)'); ++--------------------------------------------------------------+ +| regexp_extract_all('AbCdE', '([[:lower:]]+)C([[:lower:]]+)') | ++--------------------------------------------------------------+ +| ['b'] | ++--------------------------------------------------------------+ + +mysql> SELECT regexp_extract_all('AbCdEfCg', '([[:lower:]]+)C([[:lower:]]+)'); ++-----------------------------------------------------------------+ +| regexp_extract_all('AbCdEfCg', '([[:lower:]]+)C([[:lower:]]+)') | ++-----------------------------------------------------------------+ +| ['b','f'] | ++-----------------------------------------------------------------+ + +mysql> SELECT regexp_extract_all('abc=111, def=222, ghi=333','("[^"]+"|\\w+)=("[^"]+"|\\w+)'); ++--------------------------------------------------------------------------------+ +| regexp_extract_all('abc=111, def=222, ghi=333', '("[^"]+"|\w+)=("[^"]+"|\w+)') | ++--------------------------------------------------------------------------------+ +| ['abc','def','ghi'] | ++--------------------------------------------------------------------------------+ +``` + +### keywords + REGEXP_EXTRACT_ALL,REGEXP,EXTRACT,ALL diff --git a/docs/zh-CN/docs/sql-manual/sql-functions/string-functions/regexp/regexp_replace_one.md b/docs/zh-CN/docs/sql-manual/sql-functions/string-functions/regexp/regexp_replace_one.md new file mode 100644 index 0000000000..0910f49b9a --- /dev/null +++ b/docs/zh-CN/docs/sql-manual/sql-functions/string-functions/regexp/regexp_replace_one.md @@ -0,0 +1,54 @@ +--- +{ + "title": "regexp_replace_one", + "language": "zh-CN" +} +--- + +<!-- +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, +software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, either express or implied. See the License for the +specific language governing permissions and limitations +under the License. +--> + +## regexp_replace_one +### description +#### Syntax + +`VARCHAR regexp_replace_one(VARCHAR str, VARCHAR pattern, VARCHAR repl)` + + +对字符串 str 进行正则匹配, 将命中 pattern 的部分使用 repl 来进行替换,仅替换第一个匹配项。 + +### example + +``` +mysql> SELECT regexp_replace_one('a b c', " ", "-"); ++-----------------------------------+ +| regexp_replace_one('a b c', ' ', '-') | ++-----------------------------------+ +| a-b c | ++-----------------------------------+ + +mysql> SELECT regexp_replace_one('a b b','(b)','<\\1>'); ++----------------------------------------+ +| regexp_replace_one('a b b', '(b)', '<\1>') | ++----------------------------------------+ +| a <b> b | ++----------------------------------------+ +``` +### keywords + REGEXP_REPLACE_ONE,REGEXP,REPLACE,ONE diff --git a/gensrc/script/doris_builtins_functions.py b/gensrc/script/doris_builtins_functions.py index 0c3a238761..940ee6b2f3 100755 --- a/gensrc/script/doris_builtins_functions.py +++ b/gensrc/script/doris_builtins_functions.py @@ -2146,6 +2146,10 @@ visible_functions = [ '15FunctionContextENS2_18FunctionStateScopeE', '_ZN5doris15StringFunctions12regexp_closeEPN9doris_udf' '15FunctionContextENS2_18FunctionStateScopeE', 'vec', 'ALWAYS_NULLABLE'], + [['regexp_replace_one'], 'VARCHAR', ['VARCHAR', 'VARCHAR', 'VARCHAR'], + '', '', '', 'vec', 'ALWAYS_NULLABLE'], + [['regexp_extract_all'], 'VARCHAR', ['VARCHAR', 'VARCHAR'], + '', '', '', 'vec', 'ALWAYS_NULLABLE'], [['concat'], 'VARCHAR', ['VARCHAR', '...'], '_ZN5doris15StringFunctions6concatEPN9doris_udf15FunctionContextEiPKNS1_9StringValE', '', '', 'vec', ''], @@ -2282,6 +2286,10 @@ visible_functions = [ '15FunctionContextENS2_18FunctionStateScopeE', '_ZN5doris15StringFunctions12regexp_closeEPN9doris_udf' '15FunctionContextENS2_18FunctionStateScopeE', 'vec', 'ALWAYS_NULLABLE'], + [['regexp_replace_one'], 'STRING', ['STRING', 'STRING', 'STRING'], + '', '', '', 'vec', 'ALWAYS_NULLABLE'], + [['regexp_extract_all'], 'STRING', ['STRING', 'STRING'], + '', '', '', 'vec', 'ALWAYS_NULLABLE'], [['concat'], 'STRING', ['STRING', '...'], '_ZN5doris15StringFunctions6concatEPN9doris_udf15FunctionContextEiPKNS1_9StringValE', '', '', 'vec', ''], diff --git a/regression-test/data/query_p0/sql_functions/string_functions/test_string_function_regexp.out b/regression-test/data/query_p0/sql_functions/string_functions/test_string_function_regexp.out index e9a2b4573a..f5dcf7d0f4 100644 --- a/regression-test/data/query_p0/sql_functions/string_functions/test_string_function_regexp.out +++ b/regression-test/data/query_p0/sql_functions/string_functions/test_string_function_regexp.out @@ -21,12 +21,27 @@ b -- !sql -- d +-- !sql -- +['18','17'] + +-- !sql -- +['41915','73660'] + +-- !sql -- +['abc','def','ghi'] + -- !sql -- a-b-c -- !sql -- a <b> c +-- !sql -- +a-b c + +-- !sql -- +a <b> b + -- !sql -- false 1 1989 1001 11011902 123.123 true 1989-03-21 1989-03-21T13:00 wangjuoo4 0.1 6.333 string12345 170141183460469231731687303715884105727 diff --git a/regression-test/suites/query_p0/sql_functions/string_functions/test_string_function_regexp.groovy b/regression-test/suites/query_p0/sql_functions/string_functions/test_string_function_regexp.groovy index 38ee437e48..1ccdf7196f 100644 --- a/regression-test/suites/query_p0/sql_functions/string_functions/test_string_function_regexp.groovy +++ b/regression-test/suites/query_p0/sql_functions/string_functions/test_string_function_regexp.groovy @@ -44,10 +44,16 @@ suite("test_string_function_regexp") { qt_sql "SELECT regexp_extract('AbCdE', '([[:lower:]]+)C([[:lower:]]+)', 1);" qt_sql "SELECT regexp_extract('AbCdE', '([[:lower:]]+)C([[:lower:]]+)', 2);" + qt_sql "SELECT regexp_extract_all('x=a3&x=18abc&x=2&y=3&x=4&x=17bcd', 'x=([0-9]+)([a-z]+)');" + qt_sql "SELECT regexp_extract_all('http://a.m.baidu.com/i41915i73660.htm', 'i([0-9]+)');" + qt_sql "SELECT regexp_extract_all('abc=111, def=222, ghi=333', '(\"[^\"]+\"|\\\\w+)=(\"[^\"]+\"|\\\\w+)');" qt_sql "SELECT regexp_replace('a b c', \" \", \"-\");" qt_sql "SELECT regexp_replace('a b c','(b)','<\\\\1>');" + qt_sql "SELECT regexp_replace_one('a b c', \" \", \"-\");" + qt_sql "SELECT regexp_replace_one('a b b','(b)','<\\\\1>');" + sql "DROP TABLE ${tbName};" def tableName= "test" --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org