This is an automated email from the ASF dual-hosted git repository. lihaopeng pushed a commit to branch vectorized in repository https://gitbox.apache.org/repos/asf/incubator-doris.git
commit 41b165cbf10b39ec41f63958e2332aa1d5b8fc5d Author: Pxl <952130...@qq.com> AuthorDate: Thu Jan 6 18:57:05 2022 +0800 [Vectorized][Feature] fix core dump when using function override and function alias at the same time && support substr(str,int) override (#7640) --- be/src/vec/functions/function_string.cpp | 19 +++-- be/src/vec/functions/function_string.h | 109 ++++++++++++++++++++----- be/src/vec/functions/function_timestamp.cpp | 11 +-- be/src/vec/functions/simple_function_factory.h | 10 ++- gensrc/script/doris_builtins_functions.py | 4 +- 5 files changed, 111 insertions(+), 42 deletions(-) diff --git a/be/src/vec/functions/function_string.cpp b/be/src/vec/functions/function_string.cpp index 73e2413..34f1c6b 100644 --- a/be/src/vec/functions/function_string.cpp +++ b/be/src/vec/functions/function_string.cpp @@ -293,7 +293,7 @@ struct HexStringImpl { dst_data_ptr++; offset++; } else { - VStringFunctions::hex_encode(source, srclen, reinterpret_cast<char *>(dst_data_ptr)); + VStringFunctions::hex_encode(source, srclen, reinterpret_cast<char*>(dst_data_ptr)); dst_data_ptr[srclen * 2] = '\0'; dst_data_ptr += (srclen * 2 + 1); offset += (srclen * 2 + 1); @@ -513,9 +513,9 @@ struct AesEncryptImpl { int cipher_len = l_size + 16; char p[cipher_len]; - int outlen = - EncryptionUtil::encrypt(AES_128_ECB, (unsigned char*)l_raw, l_size, - (unsigned char*)r_raw, r_size, NULL, true, (unsigned char*)p); + int outlen = EncryptionUtil::encrypt(AES_128_ECB, (unsigned char*)l_raw, l_size, + (unsigned char*)r_raw, r_size, NULL, true, + (unsigned char*)p); if (outlen < 0) { StringOP::push_null_string(i, res_data, res_offsets, null_map_data); } else { @@ -553,9 +553,9 @@ struct AesDecryptImpl { int cipher_len = l_size; char p[cipher_len]; - int outlen = - EncryptionUtil::decrypt(AES_128_ECB, (unsigned char*)l_raw, l_size, - (unsigned char*)r_raw, r_size, NULL, true, (unsigned char*)p); + int outlen = EncryptionUtil::decrypt(AES_128_ECB, (unsigned char*)l_raw, l_size, + (unsigned char*)r_raw, r_size, NULL, true, + (unsigned char*)p); if (outlen < 0) { StringOP::push_null_string(i, res_data, res_offsets, null_map_data); } else { @@ -774,7 +774,8 @@ void register_function_string(SimpleFunctionFactory& factory) { factory.register_function<FunctionLTrim>(); factory.register_function<FunctionRTrim>(); factory.register_function<FunctionTrim>(); - factory.register_function<FunctionSubstring>(); + factory.register_function<FunctionSubstring<Substr3Imp>>(); + factory.register_function<FunctionSubstring<Substr2Imp>>(); factory.register_function<FunctionLeft>(); factory.register_function<FunctionRight>(); factory.register_function<FunctionNullOrEmpty>(); @@ -794,7 +795,7 @@ void register_function_string(SimpleFunctionFactory& factory) { factory.register_alias(FunctionLeft::name, "strleft"); factory.register_alias(FunctionRight::name, "strright"); - factory.register_alias(FunctionSubstring::name, "substr"); + factory.register_alias(SubstringUtil::name, "substr"); factory.register_alias(FunctionToLower::name, "lcase"); factory.register_alias(FunctionStringMd5sum::name, "md5"); } diff --git a/be/src/vec/functions/function_string.h b/be/src/vec/functions/function_string.h index efeef41..3f3e538 100644 --- a/be/src/vec/functions/function_string.h +++ b/be/src/vec/functions/function_string.h @@ -88,25 +88,9 @@ struct StringOP { } }; -class FunctionSubstring : public IFunction { -public: +struct SubstringUtil { static constexpr auto name = "substring"; - static FunctionPtr create() { return std::make_shared<FunctionSubstring>(); } - String get_name() const override { return name; } - size_t get_number_of_arguments() const override { return 3; } - - DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { - return make_nullable(std::make_shared<DataTypeString>()); - } - - bool use_default_implementation_for_nulls() const override { return false; } - bool use_default_implementation_for_constants() const override { return true; } - Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) override { - substring_execute(block, arguments, result, input_rows_count); - return Status::OK(); - } static void substring_execute(Block& block, const ColumnNumbers& arguments, size_t result, size_t input_rows_count) { DCHECK_EQ(arguments.size(), 3); @@ -201,12 +185,89 @@ private: } }; -class FunctionLeft : public FunctionSubstring { +template <typename Impl> +class FunctionSubstring : public IFunction { +public: + static constexpr auto name = SubstringUtil::name; + String get_name() const override { return name; } + static FunctionPtr create() { return std::make_shared<FunctionSubstring<Impl>>(); } + + DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { + return make_nullable(std::make_shared<DataTypeString>()); + } + DataTypes get_variadic_argument_types_impl() const override { + return Impl::get_variadic_argument_types(); + } + size_t get_number_of_arguments() const override { + return get_variadic_argument_types_impl().size(); + } + + bool use_default_implementation_for_nulls() const override { return false; } + bool use_default_implementation_for_constants() const override { return true; } + + Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, + size_t result, size_t input_rows_count) override { + return Impl::execute_impl(context, block, arguments, result, input_rows_count); + } +}; + +struct Substr3Imp { + static DataTypes get_variadic_argument_types() { + return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeInt32>(), + std::make_shared<DataTypeInt32>()}; + } + + static Status execute_impl(FunctionContext* context, Block& block, + const ColumnNumbers& arguments, size_t result, + size_t input_rows_count) { + SubstringUtil::substring_execute(block, arguments, result, input_rows_count); + return Status::OK(); + } +}; + +struct Substr2Imp { + static DataTypes get_variadic_argument_types() { + return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeInt32>()}; + } + + static Status execute_impl(FunctionContext* context, Block& block, + const ColumnNumbers& arguments, size_t result, + size_t input_rows_count) { + auto params = ColumnInt32::create(input_rows_count); + auto& strlen_data = params->get_data(); + + auto str_col = + block.get_by_position(arguments[0]).column->convert_to_full_column_if_const(); + if (auto* nullable = check_and_get_column<const ColumnNullable>(*str_col)) { + str_col = nullable->get_nested_column_ptr(); + } + auto& str_offset = assert_cast<const ColumnString*>(str_col.get())->get_offsets(); + + for (int i = 0; i < input_rows_count; ++i) { + strlen_data[i] = str_offset[i] - str_offset[i - 1]; + } + + block.insert({std::move(params), std::make_shared<DataTypeInt32>(), "strlen"}); + + ColumnNumbers temp_arguments = {arguments[0], arguments[1], block.columns() - 1}; + + SubstringUtil::substring_execute(block, temp_arguments, result, input_rows_count); + return Status::OK(); + } +}; + +class FunctionLeft : public IFunction { public: static constexpr auto name = "left"; static FunctionPtr create() { return std::make_shared<FunctionLeft>(); } String get_name() const override { return name; } size_t get_number_of_arguments() const override { return 2; } + DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { + return make_nullable(std::make_shared<DataTypeString>()); + } + + bool use_default_implementation_for_nulls() const override { return false; } + bool use_default_implementation_for_constants() const override { return true; } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, size_t result, size_t input_rows_count) override { @@ -219,17 +280,23 @@ public: temp_arguments[0] = arguments[0]; temp_arguments[1] = num_columns_without_result; temp_arguments[2] = arguments[1]; - FunctionSubstring::substring_execute(block, temp_arguments, result, input_rows_count); + SubstringUtil::substring_execute(block, temp_arguments, result, input_rows_count); return Status::OK(); } }; -class FunctionRight : public FunctionSubstring { +class FunctionRight : public IFunction { public: static constexpr auto name = "right"; static FunctionPtr create() { return std::make_shared<FunctionRight>(); } String get_name() const override { return name; } size_t get_number_of_arguments() const override { return 2; } + DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { + return make_nullable(std::make_shared<DataTypeString>()); + } + + bool use_default_implementation_for_nulls() const override { return false; } + bool use_default_implementation_for_constants() const override { return true; } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, size_t result, size_t input_rows_count) override { @@ -275,7 +342,7 @@ public: temp_arguments[0] = arguments[0]; temp_arguments[1] = num_columns_without_result; temp_arguments[2] = num_columns_without_result + 1; - FunctionSubstring::substring_execute(block, temp_arguments, result, input_rows_count); + SubstringUtil::substring_execute(block, temp_arguments, result, input_rows_count); return Status::OK(); } }; diff --git a/be/src/vec/functions/function_timestamp.cpp b/be/src/vec/functions/function_timestamp.cpp index c7335c4..f5216e9 100644 --- a/be/src/vec/functions/function_timestamp.cpp +++ b/be/src/vec/functions/function_timestamp.cpp @@ -177,9 +177,7 @@ struct UnixTimeStampImpl { }; struct UnixTimeStampDateImpl { - static DataTypes get_variadic_argument_types() { - return {std::make_shared<vectorized::DataTypeDate>()}; - } + static DataTypes get_variadic_argument_types() { return {std::make_shared<DataTypeDate>()}; } static DataTypePtr get_return_type_impl(const ColumnsWithTypeAndName& arguments) { return make_nullable(std::make_shared<DataTypeInt32>()); @@ -226,14 +224,13 @@ struct UnixTimeStampDateImpl { struct UnixTimeStampDatetimeImpl : public UnixTimeStampDateImpl { static DataTypes get_variadic_argument_types() { - return {std::make_shared<vectorized::DataTypeDateTime>()}; + return {std::make_shared<DataTypeDateTime>()}; } }; struct UnixTimeStampStrImpl { static DataTypes get_variadic_argument_types() { - return {std::make_shared<vectorized::DataTypeString>(), - std::make_shared<vectorized::DataTypeString>()}; + return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()}; } static DataTypePtr get_return_type_impl(const ColumnsWithTypeAndName& arguments) { @@ -294,8 +291,6 @@ public: String get_name() const override { return name; } - bool is_variadic() const override { return true; } - bool use_default_implementation_for_nulls() const override { return false; } size_t get_number_of_arguments() const override { diff --git a/be/src/vec/functions/simple_function_factory.h b/be/src/vec/functions/simple_function_factory.h index 149ce88..c016d35 100644 --- a/be/src/vec/functions/simple_function_factory.h +++ b/be/src/vec/functions/simple_function_factory.h @@ -95,14 +95,19 @@ public: } void register_alias(const std::string& name, const std::string& alias) { - function_creators[alias] = function_creators[name]; + function_alias[alias] = name; } FunctionBasePtr get_function(const std::string& name, const ColumnsWithTypeAndName& arguments, const DataTypePtr& return_type) { std::string key_str = name; + + if (function_alias.count(name)) { + key_str = function_alias[name]; + } + // if function is variadic, added types_str as key - if (function_variadic_set.count(name)) { + if (function_variadic_set.count(key_str)) { for (auto& arg : arguments) { key_str.append(arg.type->is_nullable() ? reinterpret_cast<const DataTypeNullable*>(arg.type.get()) @@ -123,6 +128,7 @@ public: private: FunctionCreators function_creators; FunctionIsVariadic function_variadic_set; + std::unordered_map<std::string, std::string> function_alias; template <typename Function> static FunctionBuilderPtr createDefaultFunction() { diff --git a/gensrc/script/doris_builtins_functions.py b/gensrc/script/doris_builtins_functions.py index 7bf7d2d..5027627 100755 --- a/gensrc/script/doris_builtins_functions.py +++ b/gensrc/script/doris_builtins_functions.py @@ -828,7 +828,7 @@ visible_functions = [ # String builtin functions [['substr', 'substring'], 'VARCHAR', ['VARCHAR', 'INT'], '_ZN5doris15StringFunctions9substringEPN' - '9doris_udf15FunctionContextERKNS1_9StringValERKNS1_6IntValE', '', '', '', 'ALWAYS_NULLABLE'], + '9doris_udf15FunctionContextERKNS1_9StringValERKNS1_6IntValE', '', '', 'vec', 'ALWAYS_NULLABLE'], [['substr', 'substring'], 'VARCHAR', ['VARCHAR', 'INT', 'INT'], '_ZN5doris15StringFunctions9substringEPN' '9doris_udf15FunctionContextERKNS1_9StringValERKNS1_6IntValES9_', '', '', 'vec', 'ALWAYS_NULLABLE'], @@ -950,7 +950,7 @@ visible_functions = [ # Longtext function [['substr', 'substring'], 'STRING', ['STRING', 'INT'], '_ZN5doris15StringFunctions9substringEPN' - '9doris_udf15FunctionContextERKNS1_9StringValERKNS1_6IntValE', '', '', '', 'ALWAYS_NULLABLE'], + '9doris_udf15FunctionContextERKNS1_9StringValERKNS1_6IntValE', '', '', 'vec', 'ALWAYS_NULLABLE'], [['substr', 'substring'], 'STRING', ['STRING', 'INT', 'INT'], '_ZN5doris15StringFunctions9substringEPN' '9doris_udf15FunctionContextERKNS1_9StringValERKNS1_6IntValES9_', '', '', 'vec', 'ALWAYS_NULLABLE'], --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org