This is an automated email from the ASF dual-hosted git repository. morningman pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-doris.git
The following commit(s) were added to refs/heads/master by this push: new 454b45b [feature](vectorize)(function) support regexp&&sm4&&aes functions (#8307) 454b45b is described below commit 454b45bea3cb5d0c6eb0c2391465a07a19aceed4 Author: zhangstar333 <87313068+zhangstar...@users.noreply.github.com> AuthorDate: Tue Mar 8 13:14:02 2022 +0800 [feature](vectorize)(function) support regexp&&sm4&&aes functions (#8307) --- be/src/exprs/encryption_functions.cpp | 21 -- be/src/exprs/encryption_functions.h | 21 +- be/src/exprs/string_functions.cpp | 4 +- be/src/exprs/string_functions.h | 3 + be/src/vec/CMakeLists.txt | 3 + .../aggregate_function_percentile_approx.cpp | 1 - be/src/vec/functions/function_encryption.cpp | 245 +++++++++++++++++++++ be/src/vec/functions/function_hex.cpp | 183 +++++++++++++++ be/src/vec/functions/function_regexp.cpp | 216 ++++++++++++++++++ be/src/vec/functions/function_string.cpp | 132 +---------- be/src/vec/functions/function_string.h | 21 +- be/src/vec/functions/math.cpp | 51 ----- be/src/vec/functions/simple_function_factory.h | 6 + be/test/vec/function/function_like_test.cpp | 59 +++++ be/test/vec/function/function_string_test.cpp | 241 +++++++++++++++++++- gensrc/script/doris_builtins_functions.py | 74 +++---- 16 files changed, 1034 insertions(+), 247 deletions(-) diff --git a/be/src/exprs/encryption_functions.cpp b/be/src/exprs/encryption_functions.cpp index 19ec1a7..a4d93b7 100644 --- a/be/src/exprs/encryption_functions.cpp +++ b/be/src/exprs/encryption_functions.cpp @@ -22,34 +22,13 @@ #include "runtime/string_value.h" #include "runtime/tuple_row.h" #include "util/debug_util.h" -#include "util/encryption_util.h" #include "util/md5.h" #include "util/sm3.h" -#include "util/string_util.h" #include "util/url_coding.h" namespace doris { void EncryptionFunctions::init() {} -StringCaseUnorderedMap<EncryptionMode> aes_mode_map { - {"AES_128_ECB", AES_128_ECB}, {"AES_192_ECB", AES_192_ECB}, - {"AES_256_ECB", AES_256_ECB}, {"AES_128_CBC", AES_128_CBC}, - {"AES_192_CBC", AES_192_CBC}, {"AES_256_CBC", AES_256_CBC}, - {"AES_128_CFB", AES_128_CFB}, {"AES_192_CFB", AES_192_CFB}, - {"AES_256_CFB", AES_256_CFB}, {"AES_128_CFB1", AES_128_CFB1}, - {"AES_192_CFB1", AES_192_CFB1}, {"AES_256_CFB1", AES_256_CFB1}, - {"AES_128_CFB8", AES_128_CFB8}, {"AES_192_CFB8", AES_192_CFB8}, - {"AES_256_CFB8", AES_256_CFB8}, {"AES_128_CFB128", AES_128_CFB128}, - {"AES_192_CFB128", AES_192_CFB128}, {"AES_256_CFB128", AES_256_CFB128}, - {"AES_128_CTR", AES_128_CTR}, {"AES_192_CTR", AES_192_CTR}, - {"AES_256_CTR", AES_256_CTR}, {"AES_128_OFB", AES_128_OFB}, - {"AES_192_OFB", AES_192_OFB}, {"AES_256_OFB", AES_256_OFB}}; -StringCaseUnorderedMap<EncryptionMode> sm4_mode_map {{"SM4_128_ECB", SM4_128_ECB}, - {"SM4_128_CBC", SM4_128_CBC}, - {"SM4_128_CFB128", SM4_128_CFB128}, - {"SM4_128_OFB", SM4_128_OFB}, - {"SM4_128_CTR", SM4_128_CTR}}; - StringVal encrypt(FunctionContext* ctx, const StringVal& src, const StringVal& key, const StringVal& iv, EncryptionMode mode) { if (src.len == 0 || src.is_null) { diff --git a/be/src/exprs/encryption_functions.h b/be/src/exprs/encryption_functions.h index 8a7e45d..0046da8 100644 --- a/be/src/exprs/encryption_functions.h +++ b/be/src/exprs/encryption_functions.h @@ -22,13 +22,32 @@ #include "udf/udf.h" #include "udf/udf_internal.h" +#include "util/encryption_util.h" +#include "util/string_util.h" namespace doris { class Expr; struct ExprValue; class TupleRow; - +static StringCaseUnorderedMap<EncryptionMode> aes_mode_map { + {"AES_128_ECB", AES_128_ECB}, {"AES_192_ECB", AES_192_ECB}, + {"AES_256_ECB", AES_256_ECB}, {"AES_128_CBC", AES_128_CBC}, + {"AES_192_CBC", AES_192_CBC}, {"AES_256_CBC", AES_256_CBC}, + {"AES_128_CFB", AES_128_CFB}, {"AES_192_CFB", AES_192_CFB}, + {"AES_256_CFB", AES_256_CFB}, {"AES_128_CFB1", AES_128_CFB1}, + {"AES_192_CFB1", AES_192_CFB1}, {"AES_256_CFB1", AES_256_CFB1}, + {"AES_128_CFB8", AES_128_CFB8}, {"AES_192_CFB8", AES_192_CFB8}, + {"AES_256_CFB8", AES_256_CFB8}, {"AES_128_CFB128", AES_128_CFB128}, + {"AES_192_CFB128", AES_192_CFB128}, {"AES_256_CFB128", AES_256_CFB128}, + {"AES_128_CTR", AES_128_CTR}, {"AES_192_CTR", AES_192_CTR}, + {"AES_256_CTR", AES_256_CTR}, {"AES_128_OFB", AES_128_OFB}, + {"AES_192_OFB", AES_192_OFB}, {"AES_256_OFB", AES_256_OFB}}; +static StringCaseUnorderedMap<EncryptionMode> sm4_mode_map {{"SM4_128_ECB", SM4_128_ECB}, + {"SM4_128_CBC", SM4_128_CBC}, + {"SM4_128_CFB128", SM4_128_CFB128}, + {"SM4_128_OFB", SM4_128_OFB}, + {"SM4_128_CTR", SM4_128_CTR}}; class EncryptionFunctions { public: static void init(); diff --git a/be/src/exprs/string_functions.cpp b/be/src/exprs/string_functions.cpp index 00f2643..825762c 100644 --- a/be/src/exprs/string_functions.cpp +++ b/be/src/exprs/string_functions.cpp @@ -488,7 +488,7 @@ bool StringFunctions::set_re2_options(const StringVal& match_parameter, std::str } // The caller owns the returned regex. Returns nullptr if the pattern could not be compiled. -static re2::RE2* compile_regex(const StringVal& pattern, std::string* error_str, +re2::RE2* StringFunctions::compile_regex(const StringVal& pattern, std::string* error_str, const StringVal& match_parameter) { re2::StringPiece pattern_sp(reinterpret_cast<char*>(pattern.ptr), pattern.len); re2::RE2::Options options; @@ -512,7 +512,7 @@ static re2::RE2* compile_regex(const StringVal& pattern, std::string* error_str, return nullptr; } return re; -} +} void StringFunctions::regexp_prepare(FunctionContext* context, FunctionContext::FunctionStateScope scope) { diff --git a/be/src/exprs/string_functions.h b/be/src/exprs/string_functions.h index af13f4f..dc14906 100644 --- a/be/src/exprs/string_functions.h +++ b/be/src/exprs/string_functions.h @@ -184,6 +184,9 @@ public: static doris_udf::IntVal bit_length(doris_udf::FunctionContext* context, const doris_udf::StringVal& str); + // The caller owns the returned regex. Returns nullptr if the pattern could not be compiled. + static re2::RE2* compile_regex(const StringVal& pattern, std::string* error_str, + const StringVal& match_parameter); }; } // namespace doris diff --git a/be/src/vec/CMakeLists.txt b/be/src/vec/CMakeLists.txt index 91f65f9..8365714 100644 --- a/be/src/vec/CMakeLists.txt +++ b/be/src/vec/CMakeLists.txt @@ -117,6 +117,9 @@ set(VEC_FILES functions/function_case.cpp functions/function_cast.cpp functions/function_conv.cpp + functions/function_encryption.cpp + functions/function_regexp.cpp + functions/function_hex.cpp functions/function_string.cpp functions/function_timestamp.cpp functions/function_utility.cpp diff --git a/be/src/vec/aggregate_functions/aggregate_function_percentile_approx.cpp b/be/src/vec/aggregate_functions/aggregate_function_percentile_approx.cpp index 7179a65..976565f 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_percentile_approx.cpp +++ b/be/src/vec/aggregate_functions/aggregate_function_percentile_approx.cpp @@ -28,7 +28,6 @@ AggregateFunctionPtr create_aggregate_function_percentile_approx(const std::stri const DataTypes& argument_types, const Array& parameters, const bool result_is_nullable) { - LOG(INFO) << "percentile_approx function size is: " << argument_types.size(); if (argument_types.size() == 1) { return std::make_shared<AggregateFunctionPercentileApproxMerge>(argument_types); diff --git a/be/src/vec/functions/function_encryption.cpp b/be/src/vec/functions/function_encryption.cpp new file mode 100644 index 0000000..3c84d44 --- /dev/null +++ b/be/src/vec/functions/function_encryption.cpp @@ -0,0 +1,245 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "exprs/encryption_functions.h" +#include "runtime/string_search.hpp" +#include "util/encryption_util.h" +#include "util/string_util.h" +#include "vec/common/pod_array_fwd.h" +#include "vec/functions/function_string.h" +#include "vec/functions/function_string_to_string.h" +#include "vec/functions/function_totype.h" +#include "vec/functions/simple_function_factory.h" + +namespace doris::vectorized { + +template <typename Impl, typename FunctionName> +class FunctionEncryptionAndDecrypt : public IFunction { +public: + static constexpr auto name = FunctionName::name; + + String get_name() const override { return name; } + + static FunctionPtr create() { return std::make_shared<FunctionEncryptionAndDecrypt>(); } + + DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { + return make_nullable(std::make_shared<DataTypeString>()); + } + + DataTypes get_variadic_argument_types_impl() const override { + return Impl::get_variadic_argument_types_impl(); + } + + size_t get_number_of_arguments() const override { + return get_variadic_argument_types_impl().size(); + } + + bool use_default_implementation_for_nulls() const override { return false; } + + bool use_default_implementation_for_constants() const override { return true; } + + Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, + size_t result, size_t input_rows_count) override { + size_t argument_size = arguments.size(); + ColumnPtr argument_columns[argument_size]; + std::vector<const ColumnString::Offsets*> offsets_list(argument_size); + std::vector<const ColumnString::Chars*> chars_list(argument_size); + + auto result_null_map = ColumnUInt8::create(input_rows_count, 0); + auto result_data_column = ColumnString::create(); + + auto& result_data = result_data_column->get_chars(); + auto& result_offset = result_data_column->get_offsets(); + result_offset.resize(input_rows_count); + + for (int i = 0; i < argument_size; ++i) { + argument_columns[i] = + block.get_by_position(arguments[i]).column->convert_to_full_column_if_const(); + if (auto* nullable = check_and_get_column<ColumnNullable>(*argument_columns[i])) { + VectorizedUtils::update_null_map(result_null_map->get_data(), + nullable->get_null_map_data()); + argument_columns[i] = nullable->get_nested_column_ptr(); + } + } + + for (size_t i = 0; i < argument_size; ++i) { + auto col_str = assert_cast<const ColumnString*>(argument_columns[i].get()); + offsets_list[i] = &col_str->get_offsets(); + chars_list[i] = &col_str->get_chars(); + } + + Impl::vector_vector(offsets_list, chars_list, input_rows_count, result_data, result_offset, + result_null_map->get_data()); + block.get_by_position(result).column = + ColumnNullable::create(std::move(result_data_column), std::move(result_null_map)); + return Status::OK(); + } +}; + +template <typename Impl, bool is_encrypt> +static void exectue_result(std::vector<const ColumnString::Offsets*>& offsets_list, + std::vector<const ColumnString::Chars*>& chars_list, size_t i, + EncryptionMode& encryption_mode, const char* iv_raw, + ColumnString::Chars& result_data, ColumnString::Offsets& result_offset, + NullMap& null_map) { + int src_size = (*offsets_list[0])[i] - (*offsets_list[0])[i - 1] - 1; + const auto src_raw = + reinterpret_cast<const char*>(&(*chars_list[0])[(*offsets_list[0])[i - 1]]); + int key_size = (*offsets_list[1])[i] - (*offsets_list[1])[i - 1] - 1; + const auto key_raw = + reinterpret_cast<const char*>(&(*chars_list[1])[(*offsets_list[1])[i - 1]]); + if (*src_raw == '\0' || src_size == 0) { + StringOP::push_null_string(i, result_data, result_offset, null_map); + return; + } + int cipher_len = src_size; + if constexpr (is_encrypt) { + cipher_len += 16; + } + std::unique_ptr<char[]> p; + p.reset(new char[cipher_len]); + int ret_code = 0; + + ret_code = Impl::exectue_impl(encryption_mode, (unsigned char*)src_raw, src_size, + (unsigned char*)key_raw, key_size, iv_raw, true, + (unsigned char*)p.get()); + + if (ret_code < 0) { + StringOP::push_null_string(i, result_data, result_offset, null_map); + } else { + StringOP::push_value_string(std::string_view(p.get(), ret_code), i, result_data, + result_offset); + } +} + +template <typename Impl, EncryptionMode mode, bool is_encrypt> +struct EncryptionAndDecryptTwoImpl { + static DataTypes get_variadic_argument_types_impl() { + return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()}; + } + + static Status vector_vector(std::vector<const ColumnString::Offsets*>& offsets_list, + std::vector<const ColumnString::Chars*>& chars_list, + size_t input_rows_count, ColumnString::Chars& result_data, + ColumnString::Offsets& result_offset, NullMap& null_map) { + for (int i = 0; i < input_rows_count; ++i) { + if (null_map[i]) { + StringOP::push_null_string(i, result_data, result_offset, null_map); + continue; + } + EncryptionMode encryption_mode = mode; + exectue_result<Impl, is_encrypt>(offsets_list, chars_list, i, encryption_mode, nullptr, + result_data, result_offset, null_map); + } + return Status::OK(); + } +}; + +template <typename Impl, EncryptionMode mode, bool is_encrypt, bool is_sm_mode> +struct EncryptionAndDecryptFourImpl { + static DataTypes get_variadic_argument_types_impl() { + return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>(), + std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()}; + } + + static Status vector_vector(std::vector<const ColumnString::Offsets*>& offsets_list, + std::vector<const ColumnString::Chars*>& chars_list, + size_t input_rows_count, ColumnString::Chars& result_data, + ColumnString::Offsets& result_offset, NullMap& null_map) { + for (int i = 0; i < input_rows_count; ++i) { + if (null_map[i]) { + StringOP::push_null_string(i, result_data, result_offset, null_map); + continue; + } + + EncryptionMode encryption_mode = mode; + int mode_size = (*offsets_list[3])[i] - (*offsets_list[3])[i - 1] - 1; + const auto mode_raw = + reinterpret_cast<const char*>(&(*chars_list[3])[(*offsets_list[3])[i - 1]]); + const auto iv_raw = + reinterpret_cast<const char*>(&(*chars_list[2])[(*offsets_list[2])[i - 1]]); + if (*mode_raw != '\0' || mode_size != 0) { + std::string mode_str(mode_raw, mode_size); + if constexpr (is_sm_mode) { + if (sm4_mode_map.count(mode_str) == 0) { + StringOP::push_null_string(i, result_data, result_offset, null_map); + continue; + } + encryption_mode = sm4_mode_map.at(mode_str); + } else { + if (aes_mode_map.count(mode_str) == 0) { + StringOP::push_null_string(i, result_data, result_offset, null_map); + continue; + } + encryption_mode = aes_mode_map.at(mode_str); + } + } + + exectue_result<Impl, is_encrypt>(offsets_list, chars_list, i, encryption_mode, iv_raw, + result_data, result_offset, null_map); + } + return Status::OK(); + } +}; + +struct EncryptImpl { + static int exectue_impl(EncryptionMode mode, const unsigned char* source, + uint32_t source_length, const unsigned char* key, uint32_t key_length, + const char* iv, bool padding, unsigned char* encrypt) { + return EncryptionUtil::encrypt(mode, source, source_length, key, key_length, iv, true, + encrypt); + } +}; + +struct DecryptImpl { + static int exectue_impl(EncryptionMode mode, const unsigned char* source, + uint32_t source_length, const unsigned char* key, uint32_t key_length, + const char* iv, bool padding, unsigned char* encrypt) { + return EncryptionUtil::decrypt(mode, source, source_length, key, key_length, iv, true, + encrypt); + } +}; + +struct SM4EncryptName { + static constexpr auto name = "sm4_encrypt"; +}; + +struct SM4DecryptName { + static constexpr auto name = "sm4_decrypt"; +}; + +struct AESEncryptName { + static constexpr auto name = "aes_encrypt"; +}; + +struct AESDecryptName { + static constexpr auto name = "aes_decrypt"; +}; + +void register_function_encryption(SimpleFunctionFactory& factory) { + factory.register_function<FunctionEncryptionAndDecrypt<EncryptionAndDecryptTwoImpl<EncryptImpl, SM4_128_ECB, true>, SM4EncryptName>>(); + factory.register_function<FunctionEncryptionAndDecrypt<EncryptionAndDecryptTwoImpl<DecryptImpl, SM4_128_ECB, false>, SM4DecryptName>>(); + factory.register_function<FunctionEncryptionAndDecrypt<EncryptionAndDecryptTwoImpl<EncryptImpl, AES_128_ECB, true>, AESEncryptName>>(); + factory.register_function<FunctionEncryptionAndDecrypt<EncryptionAndDecryptTwoImpl<DecryptImpl, AES_128_ECB, false>, AESDecryptName>>(); + + factory.register_function<FunctionEncryptionAndDecrypt<EncryptionAndDecryptFourImpl<EncryptImpl, SM4_128_ECB, true, true>, SM4EncryptName>>(); + factory.register_function<FunctionEncryptionAndDecrypt<EncryptionAndDecryptFourImpl<DecryptImpl, SM4_128_ECB, false, true>, SM4DecryptName>>(); + factory.register_function<FunctionEncryptionAndDecrypt<EncryptionAndDecryptFourImpl<EncryptImpl, AES_128_ECB, true, false>, AESEncryptName>>(); + factory.register_function<FunctionEncryptionAndDecrypt<EncryptionAndDecryptFourImpl<DecryptImpl, AES_128_ECB, false, false>, AESDecryptName>>(); +} + +} // namespace doris::vectorized diff --git a/be/src/vec/functions/function_hex.cpp b/be/src/vec/functions/function_hex.cpp new file mode 100644 index 0000000..2e26104 --- /dev/null +++ b/be/src/vec/functions/function_hex.cpp @@ -0,0 +1,183 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + + +#include "vec/columns/column_complex.h" +#include "vec/data_types/data_type.h" +#include "vec/data_types/data_type_hll.h" +#include "vec/functions/function_const.h" +#include "vec/functions/function_string.h" +#include "vec/functions/function_totype.h" +#include "vec/functions/simple_function_factory.h" +#include "util/simd/vstring_function.h" //place this header file at last to compile + +namespace doris::vectorized { +template <typename Impl> +class FunctionHexVariadic : public IFunction { +public: + static constexpr auto name = "hex"; + + static FunctionPtr create() { return std::make_shared<FunctionHexVariadic>(); } + + String get_name() const override { return name; } + + size_t get_number_of_arguments() const override { return 1; } + + DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { + return std::make_shared<DataTypeString>(); + } + + DataTypes get_variadic_argument_types_impl() const override { + return Impl::get_variadic_argument_types(); + } + + bool use_default_implementation_for_constants() const override { return true; } + + Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, + size_t result, size_t input_rows_count) override { + ColumnPtr argument_column = + block.get_by_position(arguments[0]).column->convert_to_full_column_if_const(); + + auto result_data_column = ColumnString::create(); + auto& result_data = result_data_column->get_chars(); + auto& result_offset = result_data_column->get_offsets(); + + Impl::vector(argument_column, input_rows_count, result_data, result_offset); + block.replace_by_position(result, std::move(result_data_column)); + return Status::OK(); + } +}; + +static void hex_encode(const unsigned char* source, size_t srclen, unsigned char*& dst_data_ptr, + size_t& offset) { + if (srclen == 0) { + DCHECK(*source == '\0'); + *dst_data_ptr = '\0'; + dst_data_ptr++; + offset++; + } else { + doris::simd::VStringFunctions::hex_encode(source, srclen, + reinterpret_cast<char*>(dst_data_ptr)); + dst_data_ptr[srclen * 2] = '\0'; + dst_data_ptr += (srclen * 2 + 1); + offset += (srclen * 2 + 1); + } +} + +struct HexStringImpl { + static DataTypes get_variadic_argument_types() { return {std::make_shared<DataTypeString>()}; } + + static Status vector(ColumnPtr argument_column, size_t input_rows_count, + ColumnString::Chars& dst_data, ColumnString::Offsets& dst_offsets) { + const auto* str_col = check_and_get_column<ColumnString>(argument_column.get()); + auto& data = str_col->get_chars(); + auto& offsets = str_col->get_offsets(); + dst_offsets.resize(input_rows_count); + dst_data.resize(data.size() * 2); + + size_t offset = 0; + auto dst_data_ptr = dst_data.data(); + for (int i = 0; i < input_rows_count; ++i) { + auto source = reinterpret_cast<const unsigned char*>(&data[offsets[i - 1]]); + size_t srclen = offsets[i] - offsets[i - 1] - 1; + hex_encode(source, srclen, dst_data_ptr, offset); + dst_offsets[i] = offset; + } + return Status::OK(); + } +}; + +struct HexIntImpl { + static DataTypes get_variadic_argument_types() { + return {std::make_shared<vectorized::DataTypeInt64>()}; + } + + static std::string_view hex(uint64_t num, char* ans) { + static constexpr auto hex_table = "0123456789ABCDEF"; + // uint64_t max value 0xFFFFFFFFFFFFFFFF , 16 'F' + if (num == 0) { + return {hex_table, 1}; + } + + int i = 0; + while (num) { + ans[i++] = hex_table[num & 15]; + num = num >> 4; + } + ans[i] = '\0'; + + // reverse + for (int k = 0, j = i - 1; k <= j && k <= 16; k++, j--) { + char tmp = ans[j]; + ans[j] = ans[k]; + ans[k] = tmp; + } + + return {ans, static_cast<size_t>(i)}; + } + + static Status vector(ColumnPtr argument_column, size_t input_rows_count, + ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) { + const auto* str_col = check_and_get_column<ColumnVector<Int64>>(argument_column.get()); + auto& data = str_col->get_data(); + + res_offsets.resize(input_rows_count); + char ans[17]; + for (size_t i = 0; i < input_rows_count; ++i) { + StringOP::push_value_string(hex(data[i], ans), i, res_data, res_offsets); + } + return Status::OK(); + } +}; + +struct HexHLLImpl { + static DataTypes get_variadic_argument_types() { + return {std::make_shared<vectorized::DataTypeHLL>()}; + } + + static Status vector(ColumnPtr argument_column, size_t input_rows_count, + ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) { + const auto* str_col = check_and_get_column<ColumnHLL>(argument_column.get()); + const auto& hll_data = str_col->get_data(); + res_offsets.resize(input_rows_count); + size_t total_length = 0, offset = 0; + std::string hll_str; + unsigned char* dst_data_ptr = nullptr; + + for (size_t i = 0; i < input_rows_count; ++i) { + hll_str.resize(hll_data[i].max_serialized_size(), '0'); + size_t actual_size = hll_data[i].serialize((uint8_t*)hll_str.data()); + hll_str.resize(actual_size); + total_length += actual_size; + + res_data.resize(total_length * 2 + (i + 1)); + dst_data_ptr = res_data.data() + offset; + hex_encode(reinterpret_cast<const unsigned char*>(hll_str.data()), hll_str.length(), + dst_data_ptr, offset); + res_offsets[i] = offset; + hll_str.clear(); + } + return Status::OK(); + } +}; + +void register_function_hex_variadic(SimpleFunctionFactory& factory) { + factory.register_function<FunctionHexVariadic<HexStringImpl>>(); + factory.register_function<FunctionHexVariadic<HexIntImpl>>(); + factory.register_function<FunctionHexVariadic<HexHLLImpl>>(); +} +} // namespace doris::vectorized diff --git a/be/src/vec/functions/function_regexp.cpp b/be/src/vec/functions/function_regexp.cpp new file mode 100644 index 0000000..4613fa9 --- /dev/null +++ b/be/src/vec/functions/function_regexp.cpp @@ -0,0 +1,216 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include <re2/re2.h> + +#include <random> + +#include "exprs/string_functions.h" +#include "runtime/string_value.h" +#include "udf/udf.h" +#include "vec/data_types/data_type_number.h" +#include "vec/data_types/data_type_string.h" +#include "vec/functions/function_string.h" +#include "vec/functions/simple_function_factory.h" +#include "vec/utils/util.hpp" +namespace doris::vectorized { + +template <typename Impl> +class FunctionRegexp : public IFunction { +public: + static constexpr auto name = Impl::name; + + static FunctionPtr create() { return std::make_shared<FunctionRegexp>(); } + + String get_name() const override { return name; } + + bool use_default_implementation_for_constants() const override { return false; } + + bool use_default_implementation_for_nulls() const override { return false; } + + size_t get_number_of_arguments() const override { return 3; } + + DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { + return make_nullable(std::make_shared<DataTypeString>()); + } + + Status prepare(FunctionContext* context, FunctionContext::FunctionStateScope scope) override { + if (scope != FunctionContext::FRAGMENT_LOCAL) { + return Status::OK(); + } + + if (context->is_col_constant(1)) { + const auto pattern_col = context->get_constant_col(1)->column_ptr; + const auto& pattern = pattern_col->get_data_at(0).to_string_val(); + if (pattern.is_null) { + return Status::OK(); + } + + std::string error_str; + re2::RE2* re = StringFunctions::compile_regex(pattern, &error_str, StringVal::null()); + if (re == nullptr) { + context->set_error(error_str.c_str()); + return Status::InvalidArgument(error_str); + } + context->set_function_state(scope, re); + } + return Status::OK(); + } + + Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, + size_t result, size_t input_rows_count) override { + size_t argument_size = arguments.size(); + ColumnPtr argument_columns[argument_size]; + auto result_null_map = ColumnUInt8::create(input_rows_count, 0); + auto result_data_column = ColumnString::create(); + + auto& result_data = result_data_column->get_chars(); + auto& result_offset = result_data_column->get_offsets(); + result_offset.resize(input_rows_count); + + for (int i = 0; i < argument_size; ++i) { + argument_columns[i] = + block.get_by_position(arguments[i]).column->convert_to_full_column_if_const(); + if (auto* nullable = check_and_get_column<ColumnNullable>(*argument_columns[i])) { + VectorizedUtils::update_null_map(result_null_map->get_data(), + nullable->get_null_map_data()); + argument_columns[i] = nullable->get_nested_column_ptr(); + } + } + + Impl::execute_impl(context, argument_columns, input_rows_count, result_data, result_offset, + result_null_map->get_data()); + + block.get_by_position(result).column = + ColumnNullable::create(std::move(result_data_column), std::move(result_null_map)); + return Status::OK(); + } + + Status close(FunctionContext* context, FunctionContext::FunctionStateScope scope) override { + if (scope == FunctionContext::FRAGMENT_LOCAL) { + re2::RE2* re = reinterpret_cast<re2::RE2*>(context->get_function_state(scope)); + delete re; + } + return Status::OK(); + } +}; + +struct RegexpReplaceImpl { + static constexpr auto name = "regexp_replace"; + + static Status execute_impl(FunctionContext* context, ColumnPtr argument_columns[], + size_t input_rows_count, ColumnString::Chars& result_data, + ColumnString::Offsets& result_offset, NullMap& null_map) { + const auto* str_col = check_and_get_column<ColumnString>(argument_columns[0].get()); + const auto* pattern_col = check_and_get_column<ColumnString>(argument_columns[1].get()); + const auto* replace_col = check_and_get_column<ColumnString>(argument_columns[2].get()); + + for (int i = 0; i < input_rows_count; ++i) { + if (null_map[i]) { + StringOP::push_null_string(i, result_data, result_offset, null_map); + continue; + } + re2::RE2* re = reinterpret_cast<re2::RE2*>( + context->get_function_state(FunctionContext::FRAGMENT_LOCAL)); + std::unique_ptr<re2::RE2> scoped_re; // destroys re if state->re is nullptr + if (re == nullptr) { + std::string error_str; + const auto& pattern = pattern_col->get_data_at(i).to_string_val(); + re = StringFunctions::compile_regex(pattern, &error_str, StringVal::null()); + if (re == nullptr) { + context->add_warning(error_str.c_str()); + StringOP::push_null_string(i, result_data, result_offset, null_map); + continue; + } + scoped_re.reset(re); + } + + re2::StringPiece replace_str = + re2::StringPiece(replace_col->get_data_at(i).to_string()); + std::string result_str(str_col->get_data_at(i).to_string()); + re2::RE2::GlobalReplace(&result_str, *re, replace_str); + StringOP::push_value_string(result_str, i, result_data, result_offset); + } + + return Status::OK(); + } +}; + +struct RegexpExtractImpl { + static constexpr auto name = "regexp_extract"; + + static Status execute_impl(FunctionContext* context, ColumnPtr argument_columns[], + size_t input_rows_count, ColumnString::Chars& result_data, + ColumnString::Offsets& result_offset, NullMap& null_map) { + const auto* str_col = check_and_get_column<ColumnString>(argument_columns[0].get()); + const auto* pattern_col = check_and_get_column<ColumnString>(argument_columns[1].get()); + const auto* index_col = + check_and_get_column<ColumnVector<Int64>>(argument_columns[2].get()); + for (int i = 0; i < input_rows_count; ++i) { + if (null_map[i]) { + StringOP::push_null_string(i, result_data, result_offset, null_map); + continue; + } + const auto& index_data = index_col->get_int(i); + if (index_data < 0) { + StringOP::push_empty_string(i, result_data, result_offset); + continue; + } + re2::RE2* re = reinterpret_cast<re2::RE2*>( + context->get_function_state(FunctionContext::FRAGMENT_LOCAL)); + std::unique_ptr<re2::RE2> scoped_re; + if (re == nullptr) { + std::string error_str; + const auto& pattern = pattern_col->get_data_at(i).to_string_val(); + re = StringFunctions::compile_regex(pattern, &error_str, StringVal::null()); + if (re == nullptr) { + context->add_warning(error_str.c_str()); + StringOP::push_null_string(i, result_data, result_offset, null_map); + continue; + } + scoped_re.reset(re); + } + const auto& str = str_col->get_data_at(i); + re2::StringPiece str_sp = re2::StringPiece(str.data,str.size); + + int max_matches = 1 + re->NumberOfCapturingGroups(); + if (index_data >= max_matches) { + StringOP::push_empty_string(i, result_data, result_offset); + continue; + } + + std::vector<re2::StringPiece> matches(max_matches); + bool success = re->Match(str_sp, 0, str.size, re2::RE2::UNANCHORED, &matches[0], + max_matches); + if (!success) { + StringOP::push_empty_string(i, result_data, result_offset); + continue; + } + const re2::StringPiece& match = matches[index_data]; + StringOP::push_value_string(std::string_view(match.data(), match.size()), i, + result_data, result_offset); + } + return Status::OK(); + } +}; + +void register_function_regexp_extract(SimpleFunctionFactory& factory) { + factory.register_function<FunctionRegexp<RegexpReplaceImpl>>(); + factory.register_function<FunctionRegexp<RegexpExtractImpl>>(); +} + +} // namespace doris::vectorized diff --git a/be/src/vec/functions/function_string.cpp b/be/src/vec/functions/function_string.cpp index 926e8ed..5d84f01 100644 --- a/be/src/vec/functions/function_string.cpp +++ b/be/src/vec/functions/function_string.cpp @@ -261,43 +261,6 @@ struct ReverseImpl { } }; -struct HexStringName { - static constexpr auto name = "hex"; -}; - -struct HexStringImpl { - static DataTypes get_variadic_argument_types() { return {std::make_shared<DataTypeString>()}; } - - static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets, - ColumnString::Chars& dst_data, ColumnString::Offsets& dst_offsets) { - auto rows_count = offsets.size(); - dst_offsets.resize(rows_count); - dst_data.resize(data.size() * 2); - - size_t offset = 0; - auto dst_data_ptr = dst_data.data(); - for (int i = 0; i < rows_count; ++i) { - auto source = reinterpret_cast<const unsigned char*>(&data[offsets[i - 1]]); - size_t srclen = offsets[i] - offsets[i - 1] - 1; - - if (srclen == 0) { - DCHECK(*source == '\0'); - *dst_data_ptr = '\0'; - dst_data_ptr++; - offset++; - } else { - simd::VStringFunctions::hex_encode(source, srclen, - reinterpret_cast<char*>(dst_data_ptr)); - dst_data_ptr[srclen * 2] = '\0'; - dst_data_ptr += (srclen * 2 + 1); - offset += (srclen * 2 + 1); - } - dst_offsets[i] = offset; - } - return Status::OK(); - } -}; - struct NameToLower { static constexpr auto name = "lower"; }; @@ -479,86 +442,6 @@ struct StringSpace { } }; -struct AesEncryptImpl { - static constexpr auto name = "aes_encrypt"; - using Chars = ColumnString::Chars; - using Offsets = ColumnString::Offsets; - using ReturnType = DataTypeString; - using ColumnType = ColumnString; - static void vector_vector(FunctionContext* context, const Chars& ldata, const Offsets& loffsets, - const Chars& rdata, const Offsets& roffsets, Chars& res_data, - Offsets& res_offsets, NullMap& null_map_data) { - DCHECK_EQ(loffsets.size(), roffsets.size()); - size_t input_rows_count = loffsets.size(); - res_offsets.resize(input_rows_count); - - for (size_t i = 0; i < input_rows_count; ++i) { - int l_size = loffsets[i] - loffsets[i - 1] - 1; - const auto l_raw = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]); - - int r_size = roffsets[i] - roffsets[i - 1] - 1; - const auto r_raw = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]); - - if (*l_raw == '\0' || l_size == 0) { - StringOP::push_null_string(i, res_data, res_offsets, null_map_data); - continue; - } - - int cipher_len = l_size + 16; - char p[cipher_len]; - - int outlen = EncryptionUtil::encrypt(AES_128_ECB, (unsigned char*)l_raw, l_size, - (unsigned char*)r_raw, r_size, NULL, true, - (unsigned char*)p); - if (outlen < 0) { - StringOP::push_null_string(i, res_data, res_offsets, null_map_data); - } else { - StringOP::push_value_string(std::string_view(p, outlen), i, res_data, res_offsets); - } - } - } -}; - -struct AesDecryptImpl { - static constexpr auto name = "aes_decrypt"; - using Chars = ColumnString::Chars; - using Offsets = ColumnString::Offsets; - using ReturnType = DataTypeString; - using ColumnType = ColumnString; - static void vector_vector(FunctionContext* context, const Chars& ldata, const Offsets& loffsets, - const Chars& rdata, const Offsets& roffsets, Chars& res_data, - Offsets& res_offsets, NullMap& null_map_data) { - DCHECK_EQ(loffsets.size(), roffsets.size()); - size_t input_rows_count = loffsets.size(); - res_offsets.resize(input_rows_count); - - for (size_t i = 0; i < input_rows_count; ++i) { - int l_size = loffsets[i] - loffsets[i - 1] - 1; - const auto l_raw = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]); - - int r_size = roffsets[i] - roffsets[i - 1] - 1; - const auto r_raw = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]); - - if (*l_raw == '\0' || l_size == 0) { - StringOP::push_null_string(i, res_data, res_offsets, null_map_data); - continue; - } - - int cipher_len = l_size; - char p[cipher_len]; - - int outlen = EncryptionUtil::decrypt(AES_128_ECB, (unsigned char*)l_raw, l_size, - (unsigned char*)r_raw, r_size, NULL, true, - (unsigned char*)p); - if (outlen < 0) { - StringOP::push_null_string(i, res_data, res_offsets, null_map_data); - } else { - StringOP::push_value_string(std::string_view(p, outlen), i, res_data, res_offsets); - } - } - } -}; - struct ToBase64Impl { static constexpr auto name = "to_base64"; using ReturnType = DataTypeString; @@ -724,8 +607,6 @@ using FunctionStringFindInSet = using FunctionReverse = FunctionStringToString<ReverseImpl, NameReverse>; -using FunctionHexString = FunctionStringToString<HexStringImpl, HexStringName>; - using FunctionUnHex = FunctionStringOperateToNullType<UnHexImpl>; using FunctionToLower = FunctionStringToString<TransferImpl<::tolower>, NameToLower>; @@ -738,10 +619,6 @@ using FunctionRTrim = FunctionStringToString<TrimImpl<false, true>, NameRTrim>; using FunctionTrim = FunctionStringToString<TrimImpl<true, true>, NameTrim>; -using FunctionAesEncrypt = FunctionBinaryStringOperateToNullType<AesEncryptImpl>; - -using FunctionAesDecrypt = FunctionBinaryStringOperateToNullType<AesDecryptImpl>; - using FunctionToBase64 = FunctionStringOperateToNullType<ToBase64Impl>; using FunctionFromBase64 = FunctionStringOperateToNullType<FromBase64Impl>; @@ -764,7 +641,6 @@ void register_function_string(SimpleFunctionFactory& factory) { factory.register_function<FunctionStringLocate>(); factory.register_function<FunctionStringLocatePos>(); factory.register_function<FunctionReverse>(); - factory.register_function<FunctionHexString>(); factory.register_function<FunctionUnHex>(); factory.register_function<FunctionToLower>(); factory.register_function<FunctionToUpper>(); @@ -782,25 +658,25 @@ void register_function_string(SimpleFunctionFactory& factory) { factory.register_function<FunctionStringRepeat>(); factory.register_function<FunctionStringLPad>(); factory.register_function<FunctionStringRPad>(); - factory.register_function<FunctionAesEncrypt>(); - factory.register_function<FunctionAesDecrypt>(); factory.register_function<FunctionToBase64>(); factory.register_function<FunctionFromBase64>(); factory.register_function<FunctionSplitPart>(); - factory.register_function<FunctionStringMd5sum>(); + factory.register_function<FunctionStringMd5AndSM3<MD5Sum>>(); factory.register_function<FunctionStringParseUrl>(); factory.register_function<FunctionMoneyFormat<MoneyFormatDoubleImpl>>(); factory.register_function<FunctionMoneyFormat<MoneyFormatInt64Impl>>(); factory.register_function<FunctionMoneyFormat<MoneyFormatInt128Impl>>(); factory.register_function<FunctionMoneyFormat<MoneyFormatDecimalImpl>>(); + factory.register_function<FunctionStringMd5AndSM3<SM3Sum>>(); factory.register_alias(FunctionLeft::name, "strleft"); factory.register_alias(FunctionRight::name, "strright"); factory.register_alias(SubstringUtil::name, "substr"); factory.register_alias(FunctionToLower::name, "lcase"); factory.register_alias(FunctionToUpper::name, "ucase"); - factory.register_alias(FunctionStringMd5sum::name, "md5"); + factory.register_alias(FunctionStringMd5AndSM3<MD5Sum>::name, "md5"); factory.register_alias(FunctionStringUTF8Length::name, "character_length"); + factory.register_alias(FunctionStringMd5AndSM3<SM3Sum>::name, "sm3"); } } // namespace doris::vectorized diff --git a/be/src/vec/functions/function_string.h b/be/src/vec/functions/function_string.h index c14e639..613d4ca 100644 --- a/be/src/vec/functions/function_string.h +++ b/be/src/vec/functions/function_string.h @@ -30,6 +30,7 @@ #include "runtime/string_value.hpp" #include "udf/udf.h" #include "util/md5.h" +#include "util/sm3.h" #include "util/url_parser.h" #include "vec/columns/column_decimal.h" #include "vec/columns/column_nullable.h" @@ -922,10 +923,21 @@ public: } }; -class FunctionStringMd5sum : public IFunction { -public: +struct SM3Sum { + static constexpr auto name = "sm3sum"; + using ObjectData = SM3Digest; +}; + +struct MD5Sum { static constexpr auto name = "md5sum"; - static FunctionPtr create() { return std::make_shared<FunctionStringMd5sum>(); } + using ObjectData = Md5Digest; +}; + +template <typename Impl> +class FunctionStringMd5AndSM3 : public IFunction { +public: + static constexpr auto name = Impl::name; + static FunctionPtr create() { return std::make_shared<FunctionStringMd5AndSM3>(); } String get_name() const override { return name; } size_t get_number_of_arguments() const override { return 0; } bool is_variadic() const override { return true; } @@ -964,7 +976,8 @@ public: res_offset.resize(input_rows_count); for (size_t i = 0; i < input_rows_count; ++i) { - Md5Digest digest; + using ObjectData = typename Impl::ObjectData; + ObjectData digest; for (size_t j = 0; j < offsets_list.size(); ++j) { auto& current_offsets = *offsets_list[j]; auto& current_chars = *chars_list[j]; diff --git a/be/src/vec/functions/math.cpp b/be/src/vec/functions/math.cpp index a265168..92ef20f 100644 --- a/be/src/vec/functions/math.cpp +++ b/be/src/vec/functions/math.cpp @@ -167,56 +167,6 @@ struct CeilName { }; using FunctionCeil = FunctionMathUnary<UnaryFunctionVectorized<CeilName, std::ceil, DataTypeInt64>>; -struct HexIntName { - static constexpr auto name = "hex"; -}; - -struct HexIntImpl { - using ReturnType = DataTypeString; - static constexpr auto TYPE_INDEX = TypeIndex::Int64; - using Type = Int64; - using ReturnColumnType = ColumnString; - - static DataTypes get_variadic_argument_types() { - return {std::make_shared<vectorized::DataTypeInt64>()}; - } - - static std::string_view hex(uint64_t num, char* ans){ - static constexpr auto hex_table = "0123456789ABCDEF"; - // uint64_t max value 0xFFFFFFFFFFFFFFFF , 16 'F' - if (num == 0) { return {hex_table, 1};} - - int i = 0; - while (num) { - ans[i++] = hex_table[num & 15]; - num = num >> 4; - } - ans[i] = '\0'; - - // reverse - for (int k = 0, j = i - 1; k <= j && k <= 16; k++, j--) { - char tmp = ans[j]; - ans[j] = ans[k]; - ans[k] = tmp; - } - - return {ans, static_cast<size_t>(i)}; - } - - static Status vector(const ColumnInt64::Container& data, ColumnString::Chars& res_data, - ColumnString::Offsets& res_offsets) { - res_offsets.resize(data.size()); - size_t input_size = res_offsets.size(); - char ans[17]; - for (size_t i = 0; i < input_size; ++i) { - StringOP::push_value_string(hex(data[i], ans), i, res_data, res_offsets); - } - return Status::OK(); - } -}; - -using FunctionHexInt = FunctionUnaryToType<HexIntImpl, HexIntName>; - template <typename A> struct SignImpl { using ResultType = Int8; @@ -459,7 +409,6 @@ void register_function_math(SimpleFunctionFactory& factory) { factory.register_function<FunctionCeil>(); factory.register_alias("ceil", "dceil"); factory.register_alias("ceil", "ceiling"); - factory.register_function<FunctionHexInt>(); factory.register_function<FunctionE>(); factory.register_function<FunctionLn>(); factory.register_alias("ln", "dlog1"); diff --git a/be/src/vec/functions/simple_function_factory.h b/be/src/vec/functions/simple_function_factory.h index 7ab9061..f6fdb6c 100644 --- a/be/src/vec/functions/simple_function_factory.h +++ b/be/src/vec/functions/simple_function_factory.h @@ -74,6 +74,9 @@ void register_function_convert_tz(SimpleFunctionFactory& factory); void register_function_least_greast(SimpleFunctionFactory& factory); void register_function_fake(SimpleFunctionFactory& factory); +void register_function_encryption(SimpleFunctionFactory& factory); +void register_function_regexp_extract(SimpleFunctionFactory& factory); +void register_function_hex_variadic(SimpleFunctionFactory& factory); class SimpleFunctionFactory { using Creator = std::function<FunctionBuilderPtr()>; using FunctionCreators = phmap::flat_hash_map<std::string, Creator>; @@ -194,6 +197,9 @@ public: register_function_convert_tz(instance); register_function_least_greast(instance); register_function_fake(instance); + register_function_encryption(instance); + register_function_regexp_extract(instance); + register_function_hex_variadic(instance); }); return instance; } diff --git a/be/test/vec/function/function_like_test.cpp b/be/test/vec/function/function_like_test.cpp index e27c479..5674c07 100644 --- a/be/test/vec/function/function_like_test.cpp +++ b/be/test/vec/function/function_like_test.cpp @@ -106,6 +106,65 @@ TEST(FunctionLikeTest, regexp) { check_function<DataTypeUInt8, true>(func_name, input_types, data_set); } + +TEST(FunctionLikeTest, regexp_extract) { + std::string func_name = "regexp_extract"; + + DataSet data_set = { + {{std::string("x=a3&x=18abc&x=2&y=3&x=4"), std::string("x=([0-9]+)([a-z]+)"), (int64_t)0}, std::string("x=18abc")}, + {{std::string("x=a3&x=18abc&x=2&y=3&x=4"), std::string("^x=([a-z]+)([0-9]+)"),(int64_t)0}, std::string("x=a3")}, + {{std::string("x=a3&x=18abc&x=2&y=3&x=4"), std::string("^x=([a-z]+)([0-9]+)"),(int64_t)1}, std::string("a")}, + {{std::string("http://a.m.baidu.com/i41915173660.htm"), std::string("i([0-9]+)"),(int64_t)0}, std::string("i41915173660")}, + {{std::string("http://a.m.baidu.com/i41915173660.htm"), std::string("i([0-9]+)"),(int64_t)1}, std::string("41915173660")}, + + {{std::string("hitdecisiondlist"), std::string("(i)(.*?)(e)"),(int64_t)0}, std::string("itde")}, + {{std::string("hitdecisiondlist"), std::string("(i)(.*?)(e)"),(int64_t)1}, std::string("i")}, + {{std::string("hitdecisiondlist"), std::string("(i)(.*?)(e)"),(int64_t)2}, std::string("td")}, + // null + {{std::string("abc"), Null(), (int64_t)0}, Null()}, + {{Null(), std::string("i([0-9]+)"), (int64_t)0}, Null()}}; + + // pattern is constant value + InputTypeSet const_pattern_input_types = {TypeIndex::String, Consted {TypeIndex::String}, TypeIndex::Int64}; + for (const auto& line : data_set) { + DataSet const_pattern_dataset = {line}; + check_function<DataTypeString, true>(func_name, const_pattern_input_types, + const_pattern_dataset); + } + + // pattern is not constant value + InputTypeSet input_types = {TypeIndex::String, TypeIndex::String , TypeIndex::Int64}; + check_function<DataTypeString, true>(func_name, input_types, data_set); +} + +TEST(FunctionLikeTest, regexp_replace) { + std::string func_name = "regexp_replace"; + + DataSet data_set = { + {{std::string("2022-03-02"), std::string("-"), std::string("")}, std::string("20220302")}, + {{std::string("2022-03-02"), std::string(""),std::string("s")}, std::string("s2s0s2s2s-s0s3s-s0s2s")}, + {{std::string("100-200"), std::string("(\\d+)"),std::string("doris")}, std::string("doris-doris")}, + + {{std::string("a b c"), std::string(" "),std::string("-")}, std::string("a-b-c")}, + {{std::string("a b c"), std::string("(b)"),std::string("<\\1>")}, std::string("a <b> c")}, + {{std::string("qwewe"), std::string(""),std::string("true")}, std::string("trueqtruewtrueetruewtrueetrue")}, + // null + {{std::string("abc"), std::string("x=18abc"), Null()}, Null()}, + {{Null(), std::string("i([0-9]+)"), std::string("x=18abc")}, Null()}}; + + // pattern is constant value + InputTypeSet const_pattern_input_types = {TypeIndex::String, Consted {TypeIndex::String}, TypeIndex::String}; + for (const auto& line : data_set) { + DataSet const_pattern_dataset = {line}; + check_function<DataTypeString, true>(func_name, const_pattern_input_types, + const_pattern_dataset); + } + + // pattern is not constant value + InputTypeSet input_types = {TypeIndex::String, TypeIndex::String , TypeIndex::String}; + check_function<DataTypeString, true>(func_name, input_types, data_set); +} + } // namespace doris::vectorized int main(int argc, char** argv) { diff --git a/be/test/vec/function/function_string_test.cpp b/be/test/vec/function/function_string_test.cpp index e32d911..0a36af2 100644 --- a/be/test/vec/function/function_string_test.cpp +++ b/be/test/vec/function/function_string_test.cpp @@ -561,9 +561,54 @@ TEST(function_string_test, function_md5sum_test) { } } +TEST(function_string_test, function_sm3sum_test) { + std::string func_name = "sm3sum"; + + { + InputTypeSet input_types = {TypeIndex::String}; + DataSet data_set = { + {{std::string("asd你好")}, {std::string("0d6b9dfa8fe5708eb0dccfbaff4f2964abaaa976cc4445a7ecace49c0ceb31d3")}}, + {{std::string("hello world")}, {std::string("44f0061e69fa6fdfc290c494654a05dc0c053da7e5c52b84ef93a9d67d3fff88")}}, + {{std::string("HELLO,!^%")}, {std::string("5fc6e38f40b31a659a59e1daba9b68263615f20c02037b419d9deb3509e6b5c6")}}, + {{std::string("")}, {std::string("1ab21d8355cfa17f8e61194831e81a8f22bec8c728fefb747ed035eb5082aa2b")}}, + {{std::string(" ")}, {std::string("2ae1d69bb8483e5944310c877573b21d0a420c3bf4a2a91b1a8370d760ba67c5")}}, + {{Null()}, {Null()}}, + {{std::string("MYtestSTR")}, {std::string("3155ae9f834cae035385fc15b69b6f2c051b91de943ea9a03ab8bfd497aef4c6")}}, + {{std::string("ò&ø")}, {std::string("aa47ac31c85aa819d4cc80c932e7900fa26a3073a67aa7eb011bc2ba4924a066")}}}; + + check_function<DataTypeString, true>(func_name, input_types, data_set); + } + + { + InputTypeSet input_types = {TypeIndex::String, TypeIndex::String}; + DataSet data_set = {{{std::string("asd"), std::string("你好")}, + {std::string("0d6b9dfa8fe5708eb0dccfbaff4f2964abaaa976cc4445a7ecace49c0ceb31d3")}}, + {{std::string("hello "), std::string("world")}, + {std::string("44f0061e69fa6fdfc290c494654a05dc0c053da7e5c52b84ef93a9d67d3fff88")}}, + {{std::string("HELLO "), std::string(",!^%")}, + {std::string("1f5866e786ebac9ffed0dbd8f2586e3e99d1d05f7efe7c5915478b57b7423570")}}, + {{Null(), std::string("HELLO")}, {Null()}}}; + + check_function<DataTypeString, true>(func_name, input_types, data_set); + } + + { + InputTypeSet input_types = {TypeIndex::String, TypeIndex::String, TypeIndex::String}; + DataSet data_set = {{{std::string("a"), std::string("sd"), std::string("你好")}, + {std::string("0d6b9dfa8fe5708eb0dccfbaff4f2964abaaa976cc4445a7ecace49c0ceb31d3")}}, + {{std::string(""), std::string(""), std::string("")}, + {std::string("1ab21d8355cfa17f8e61194831e81a8f22bec8c728fefb747ed035eb5082aa2b")}}, + {{std::string("HEL"), std::string("LO,!"), std::string("^%")}, + {std::string("5fc6e38f40b31a659a59e1daba9b68263615f20c02037b419d9deb3509e6b5c6")}}, + {{Null(), std::string("HELLO"), Null()}, {Null()}}}; + + check_function<DataTypeString, true>(func_name, input_types, data_set); + } +} + TEST(function_string_test, function_aes_encrypt_test) { std::string func_name = "aes_encrypt"; - +{ InputTypeSet input_types = {TypeIndex::String, TypeIndex::String}; const char* key = "doris"; @@ -590,10 +635,43 @@ TEST(function_string_test, function_aes_encrypt_test) { check_function<DataTypeString, true>(func_name, input_types, data_set); } +{ + InputTypeSet input_types = {TypeIndex::String, TypeIndex::String, TypeIndex::String, TypeIndex::String}; + const char* iv = "0123456789abcdef"; + const char* mode = "AES_256_ECB"; + const char* key = "vectorized"; + const char* src[6] = {"aaaaaa", "bbbbbb", "cccccc", "dddddd", "eeeeee", ""}; + std::string r[5]; + + for (int i = 0; i < 5; i++) { + int cipher_len = strlen(src[i]) + 16; + char p[cipher_len]; + int iv_len = 32; + std::unique_ptr<char[]> init_vec; + init_vec.reset(new char[iv_len]); + std::memset(init_vec.get(), 0, strlen(iv) + 1); + memcpy(init_vec.get(), iv, strlen(iv)); + int outlen = EncryptionUtil::encrypt(AES_256_ECB, (unsigned char*)src[i], strlen(src[i]), + (unsigned char*)key, strlen(key), init_vec.get(), true, + (unsigned char*)p); + r[i] = std::string(p, outlen); + } + + DataSet data_set = {{{std::string(src[0]), std::string(key), std::string(iv), std::string(mode)}, r[0]}, + {{std::string(src[1]), std::string(key), std::string(iv), std::string(mode)}, r[1]}, + {{std::string(src[2]), std::string(key), std::string(iv), std::string(mode)}, r[2]}, + {{std::string(src[3]), std::string(key), std::string(iv), std::string(mode)}, r[3]}, + {{std::string(src[4]), std::string(key), std::string(iv), std::string(mode)}, r[4]}, + {{std::string(src[5]), std::string(key), std::string(iv), std::string(mode)}, Null()}, + {{Null(), std::string(key), std::string(iv), std::string(mode)}, Null()}}; + + check_function<DataTypeString, true>(func_name, input_types, data_set); +} +} TEST(function_string_test, function_aes_decrypt_test) { std::string func_name = "aes_decrypt"; - +{ InputTypeSet input_types = {TypeIndex::String, TypeIndex::String}; const char* key = "doris"; @@ -619,6 +697,165 @@ TEST(function_string_test, function_aes_decrypt_test) { check_function<DataTypeString, true>(func_name, input_types, data_set); } +{ + InputTypeSet input_types = {TypeIndex::String, TypeIndex::String, TypeIndex::String, TypeIndex::String}; + const char* key = "vectorized"; + const char* iv = "0123456789abcdef"; + const char* mode = "AES_128_OFB"; + const char* src[5] = {"aaaaaa", "bbbbbb", "cccccc", "dddddd", "eeeeee"}; + + std::string r[5]; + for (int i = 0; i < 5; i++) { + int cipher_len = strlen(src[i]) + 16; + char p[cipher_len]; + int iv_len = 32; + std::unique_ptr<char[]> init_vec; + init_vec.reset(new char[iv_len]); + std::memset(init_vec.get(), 0, strlen(iv) + 1); + memcpy(init_vec.get(), iv, strlen(iv)); + int outlen = EncryptionUtil::encrypt(AES_128_OFB, (unsigned char*)src[i], strlen(src[i]), + (unsigned char*)key, strlen(key), init_vec.get(), true, + (unsigned char*)p); + r[i] = std::string(p, outlen); + } + DataSet data_set = {{{r[0], std::string(key), std::string(iv), std::string(mode)}, std::string(src[0])}, + {{r[1], std::string(key), std::string(iv), std::string(mode)}, std::string(src[1])}, + {{r[2], std::string(key), std::string(iv), std::string(mode)}, std::string(src[2])}, + {{r[3], std::string(key), std::string(iv), std::string(mode)}, std::string(src[3])}, + {{r[4], std::string(key), std::string(iv), std::string(mode)}, std::string(src[4])}, + {{Null(), std::string(key), std::string(iv), std::string(mode)}, Null()}}; + + check_function<DataTypeString, true>(func_name, input_types, data_set); +} +} + +TEST(function_string_test, function_sm4_encrypt_test) { + std::string func_name = "sm4_encrypt"; +{ + InputTypeSet input_types = {TypeIndex::String, TypeIndex::String}; + + const char* key = "doris"; + const char* src[6] = {"aaaaaa", "bbbbbb", "cccccc", "dddddd", "eeeeee", ""}; + std::string r[5]; + + for (int i = 0; i < 5; i++) { + int cipher_len = strlen(src[i]) + 16; + char p[cipher_len]; + + int outlen = EncryptionUtil::encrypt(SM4_128_ECB, (unsigned char*)src[i], strlen(src[i]), + (unsigned char*)key, strlen(key), NULL, true, + (unsigned char*)p); + r[i] = std::string(p, outlen); + } + + DataSet data_set = {{{std::string(src[0]), std::string(key)}, r[0]}, + {{std::string(src[1]), std::string(key)}, r[1]}, + {{std::string(src[2]), std::string(key)}, r[2]}, + {{std::string(src[3]), std::string(key)}, r[3]}, + {{std::string(src[4]), std::string(key)}, r[4]}, + {{std::string(src[5]), std::string(key)}, Null()}, + {{Null(), std::string(key)}, Null()}}; + + check_function<DataTypeString, true>(func_name, input_types, data_set); +} + +{ + InputTypeSet input_types = {TypeIndex::String, TypeIndex::String, TypeIndex::String, TypeIndex::String}; + + const char* key = "vectorized"; + const char* iv = "0123456789abcdef"; + const char* mode = "SM4_128_CTR"; + const char* src[6] = {"aaaaaa", "bbbbbb", "cccccc", "dddddd", "eeeeee", ""}; + std::string r[5]; + + for (int i = 0; i < 5; i++) { + int cipher_len = strlen(src[i]) + 16; + char p[cipher_len]; + int iv_len = 32; + std::unique_ptr<char[]> init_vec; + init_vec.reset(new char[iv_len]); + std::memset(init_vec.get(), 0, strlen(iv) + 1); + memcpy(init_vec.get(), iv, strlen(iv)); + int outlen = EncryptionUtil::encrypt(SM4_128_CTR, (unsigned char*)src[i], strlen(src[i]), + (unsigned char*)key, strlen(key), init_vec.get(), true, + (unsigned char*)p); + r[i] = std::string(p, outlen); + } + + DataSet data_set = {{{std::string(src[0]), std::string(key), std::string(iv), std::string(mode)}, r[0]}, + {{std::string(src[1]), std::string(key), std::string(iv), std::string(mode)}, r[1]}, + {{std::string(src[2]), std::string(key), std::string(iv), std::string(mode)}, r[2]}, + {{std::string(src[3]), std::string(key), std::string(iv), std::string(mode)}, r[3]}, + {{std::string(src[4]), std::string(key), std::string(iv), std::string(mode)}, r[4]}, + {{std::string(src[5]), std::string(key), std::string(iv), std::string(mode)}, Null()}, + {{Null(), std::string(key), std::string(iv), std::string(mode)}, Null()}}; + + check_function<DataTypeString, true>(func_name, input_types, data_set); +} +} + +TEST(function_string_test, function_sm4_decrypt_test) { + std::string func_name = "sm4_decrypt"; +{ + InputTypeSet input_types = {TypeIndex::String, TypeIndex::String}; + + const char* key = "doris"; + const char* src[5] = {"aaaaaa", "bbbbbb", "cccccc", "dddddd", "eeeeee"}; + std::string r[5]; + + for (int i = 0; i < 5; i++) { + int cipher_len = strlen(src[i]) + 16; + char p[cipher_len]; + + int outlen = EncryptionUtil::encrypt(SM4_128_ECB, (unsigned char*)src[i], strlen(src[i]), + (unsigned char*)key, strlen(key), NULL, true, + (unsigned char*)p); + r[i] = std::string(p, outlen); + } + + DataSet data_set = {{{r[0], std::string(key)}, std::string(src[0])}, + {{r[1], std::string(key)}, std::string(src[1])}, + {{r[2], std::string(key)}, std::string(src[2])}, + {{r[3], std::string(key)}, std::string(src[3])}, + {{r[4], std::string(key)}, std::string(src[4])}, + {{Null(), std::string(key)}, Null()}}; + + check_function<DataTypeString, true>(func_name, input_types, data_set); +} + +{ + InputTypeSet input_types = {TypeIndex::String, TypeIndex::String, TypeIndex::String, TypeIndex::String}; + + const char* key = "vectorized"; + const char* iv = "0123456789abcdef"; + const char* mode = "SM4_128_OFB"; + const char* src[5] = {"aaaaaa", "bbbbbb", "cccccc", "dddddd", "eeeeee"}; + std::string r[5]; + + for (int i = 0; i < 5; i++) { + int cipher_len = strlen(src[i]) + 16; + char p[cipher_len]; + int iv_len = 32; + std::unique_ptr<char[]> init_vec; + init_vec.reset(new char[iv_len]); + std::memset(init_vec.get(), 0, strlen(iv) + 1); + memcpy(init_vec.get(), iv, strlen(iv)); + int outlen = EncryptionUtil::encrypt(SM4_128_OFB, (unsigned char*)src[i], strlen(src[i]), + (unsigned char*)key, strlen(key), init_vec.get(), true, + (unsigned char*)p); + r[i] = std::string(p, outlen); + } + + DataSet data_set = {{{r[0], std::string(key), std::string(iv), std::string(mode)}, std::string(src[0])}, + {{r[1], std::string(key), std::string(iv), std::string(mode)}, std::string(src[1])}, + {{r[2], std::string(key), std::string(iv), std::string(mode)}, std::string(src[2])}, + {{r[3], std::string(key), std::string(iv), std::string(mode)}, std::string(src[3])}, + {{r[4], std::string(key), std::string(iv), std::string(mode)}, std::string(src[4])}, + {{Null(), Null(), std::string(iv), std::string(mode)}, Null()}}; + + check_function<DataTypeString, true>(func_name, input_types, data_set); +} +} TEST(function_string_test, function_parse_url_test) { std::string func_name = "parse_url"; diff --git a/gensrc/script/doris_builtins_functions.py b/gensrc/script/doris_builtins_functions.py index 8bd6829..bf1a895 100755 --- a/gensrc/script/doris_builtins_functions.py +++ b/gensrc/script/doris_builtins_functions.py @@ -897,14 +897,14 @@ visible_functions = [ '_ZN5doris15StringFunctions14regexp_prepareEPN9doris_udf' '15FunctionContextENS2_18FunctionStateScopeE', '_ZN5doris15StringFunctions12regexp_closeEPN9doris_udf' - '15FunctionContextENS2_18FunctionStateScopeE', '', ''], + '15FunctionContextENS2_18FunctionStateScopeE', 'vec', 'ALWAYS_NULLABLE'], [['regexp_replace'], 'VARCHAR', ['VARCHAR', 'VARCHAR', 'VARCHAR'], '_ZN5doris15StringFunctions14regexp_replaceEPN9doris_udf' '15FunctionContextERKNS1_9StringValES6_S6_', '_ZN5doris15StringFunctions14regexp_prepareEPN9doris_udf' '15FunctionContextENS2_18FunctionStateScopeE', '_ZN5doris15StringFunctions12regexp_closeEPN9doris_udf' - '15FunctionContextENS2_18FunctionStateScopeE', '', ''], + '15FunctionContextENS2_18FunctionStateScopeE', 'vec', 'ALWAYS_NULLABLE'], [['concat'], 'VARCHAR', ['VARCHAR', '...'], '_ZN5doris15StringFunctions6concatEPN9doris_udf15FunctionContextEiPKNS1_9StringValE', '', '', 'vec', ''], @@ -1019,14 +1019,14 @@ visible_functions = [ '_ZN5doris15StringFunctions14regexp_prepareEPN9doris_udf' '15FunctionContextENS2_18FunctionStateScopeE', '_ZN5doris15StringFunctions12regexp_closeEPN9doris_udf' - '15FunctionContextENS2_18FunctionStateScopeE', '', ''], + '15FunctionContextENS2_18FunctionStateScopeE', 'vec', 'ALWAYS_NULLABLE'], [['regexp_replace'], 'STRING', ['STRING', 'STRING', 'STRING'], '_ZN5doris15StringFunctions14regexp_replaceEPN9doris_udf' '15FunctionContextERKNS1_9StringValES6_S6_', '_ZN5doris15StringFunctions14regexp_prepareEPN9doris_udf' '15FunctionContextENS2_18FunctionStateScopeE', '_ZN5doris15StringFunctions12regexp_closeEPN9doris_udf' - '15FunctionContextENS2_18FunctionStateScopeE', '', ''], + '15FunctionContextENS2_18FunctionStateScopeE', 'vec', 'ALWAYS_NULLABLE'], [['concat'], 'STRING', ['STRING', '...'], '_ZN5doris15StringFunctions6concatEPN9doris_udf15FunctionContextEiPKNS1_9StringValE', '', '', 'vec', ''], @@ -1252,22 +1252,22 @@ visible_functions = [ '15FunctionContextERKNS1_9StringValES6_', '', '', 'vec', 'ALWAYS_NULLABLE'], [['aes_encrypt'], 'VARCHAR', ['VARCHAR', 'VARCHAR', 'VARCHAR', 'VARCHAR'], '_ZN5doris19EncryptionFunctions11aes_encryptEPN9doris_udf' - '15FunctionContextERKNS1_9StringValES6_S6_S6_', '', '', '', ''], + '15FunctionContextERKNS1_9StringValES6_S6_S6_', '', '', 'vec', 'ALWAYS_NULLABLE'], [['aes_decrypt'], 'VARCHAR', ['VARCHAR', 'VARCHAR', 'VARCHAR', 'VARCHAR'], '_ZN5doris19EncryptionFunctions11aes_decryptEPN9doris_udf' - '15FunctionContextERKNS1_9StringValES6_S6_S6_', '', '', '', ''], - [['sm4_encrypt'], 'VARCHAR', ['VARCHAR', 'VARCHAR'], - '_ZN5doris19EncryptionFunctions11sm4_encryptEPN9doris_udf' - '15FunctionContextERKNS1_9StringValES6_', '', '', '', ''], - [['sm4_decrypt'], 'VARCHAR', ['VARCHAR', 'VARCHAR'], - '_ZN5doris19EncryptionFunctions11sm4_decryptEPN9doris_udf' - '15FunctionContextERKNS1_9StringValES6_', '', '', '', ''], - [['sm4_encrypt'], 'VARCHAR', ['VARCHAR', 'VARCHAR', 'VARCHAR', 'VARCHAR'], - '_ZN5doris19EncryptionFunctions11sm4_encryptEPN9doris_udf' - '15FunctionContextERKNS1_9StringValES6_S6_S6_', '', '', '', ''], - [['sm4_decrypt'], 'VARCHAR', ['VARCHAR', 'VARCHAR', 'VARCHAR', 'VARCHAR'], - '_ZN5doris19EncryptionFunctions11sm4_decryptEPN9doris_udf' - '15FunctionContextERKNS1_9StringValES6_S6_S6_', '', '', '', ''], + '15FunctionContextERKNS1_9StringValES6_S6_S6_', '', '', 'vec', 'ALWAYS_NULLABLE'], + [['sm4_encrypt'], 'VARCHAR', ['VARCHAR', 'VARCHAR'], + '_ZN5doris19EncryptionFunctions11sm4_encryptEPN9doris_udf' + '15FunctionContextERKNS1_9StringValES6_', '', '', 'vec', 'ALWAYS_NULLABLE'], + [['sm4_decrypt'], 'VARCHAR', ['VARCHAR', 'VARCHAR'], + '_ZN5doris19EncryptionFunctions11sm4_decryptEPN9doris_udf' + '15FunctionContextERKNS1_9StringValES6_', '', '', 'vec', 'ALWAYS_NULLABLE'], + [['sm4_encrypt'], 'VARCHAR', ['VARCHAR', 'VARCHAR', 'VARCHAR', 'VARCHAR'], + '_ZN5doris19EncryptionFunctions11sm4_encryptEPN9doris_udf' + '15FunctionContextERKNS1_9StringValES6_S6_S6_', '', '', 'vec', 'ALWAYS_NULLABLE'], + [['sm4_decrypt'], 'VARCHAR', ['VARCHAR', 'VARCHAR', 'VARCHAR', 'VARCHAR'], + '_ZN5doris19EncryptionFunctions11sm4_decryptEPN9doris_udf' + '15FunctionContextERKNS1_9StringValES6_S6_S6_', '', '', 'vec', 'ALWAYS_NULLABLE'], [['from_base64'], 'VARCHAR', ['VARCHAR'], '_ZN5doris19EncryptionFunctions11from_base64EPN9doris_udf' '15FunctionContextERKNS1_9StringValE', '', '', 'vec', 'ALWAYS_NULLABLE'], @@ -1276,25 +1276,25 @@ visible_functions = [ '15FunctionContextERKNS1_9StringValES6_', '', '', 'vec', 'ALWAYS_NULLABLE'], [['aes_decrypt'], 'STRING', ['STRING', 'STRING'], '_ZN5doris19EncryptionFunctions11aes_decryptEPN9doris_udf' - '15FunctionContextERKNS1_9StringValES6_', '', '', 'vec', ''], + '15FunctionContextERKNS1_9StringValES6_', '', '', 'vec', 'ALWAYS_NULLABLE'], [['aes_encrypt'], 'STRING', ['STRING', 'STRING', 'STRING', 'STRING'], '_ZN5doris19EncryptionFunctions11aes_encryptEPN9doris_udf' - '15FunctionContextERKNS1_9StringValES6_S6_S6_', '', '', '', ''], + '15FunctionContextERKNS1_9StringValES6_S6_S6_', '', '', 'vec', 'ALWAYS_NULLABLE'], [['aes_decrypt'], 'STRING', ['STRING', 'STRING', 'STRING', 'STRING'], '_ZN5doris19EncryptionFunctions11aes_decryptEPN9doris_udf' - '15FunctionContextERKNS1_9StringValES6_S6_S6_', '', '', '', ''], - [['sm4_encrypt'], 'STRING', ['STRING', 'STRING'], - '_ZN5doris19EncryptionFunctions11sm4_encryptEPN9doris_udf' - '15FunctionContextERKNS1_9StringValES6_', '', '', '', ''], - [['sm4_decrypt'], 'STRING', ['STRING', 'STRING'], - '_ZN5doris19EncryptionFunctions11sm4_decryptEPN9doris_udf' - '15FunctionContextERKNS1_9StringValES6_', '', '', '', ''], - [['sm4_encrypt'], 'STRING', ['STRING', 'STRING', 'STRING', 'STRING'], - '_ZN5doris19EncryptionFunctions11sm4_encryptEPN9doris_udf' - '15FunctionContextERKNS1_9StringValES6_S6_S6_', '', '', '', ''], - [['sm4_decrypt'], 'STRING', ['STRING', 'STRING', 'STRING', 'STRING'], - '_ZN5doris19EncryptionFunctions11sm4_decryptEPN9doris_udf' - '15FunctionContextERKNS1_9StringValES6_S6_S6_', '', '', '', ''], + '15FunctionContextERKNS1_9StringValES6_S6_S6_', '', '', 'vec', 'ALWAYS_NULLABLE'], + [['sm4_encrypt'], 'STRING', ['STRING', 'STRING'], + '_ZN5doris19EncryptionFunctions11sm4_encryptEPN9doris_udf' + '15FunctionContextERKNS1_9StringValES6_', '', '', 'vec', 'ALWAYS_NULLABLE'], + [['sm4_decrypt'], 'STRING', ['STRING', 'STRING'], + '_ZN5doris19EncryptionFunctions11sm4_decryptEPN9doris_udf' + '15FunctionContextERKNS1_9StringValES6_', '', '', 'vec', 'ALWAYS_NULLABLE'], + [['sm4_encrypt'], 'STRING', ['STRING', 'STRING', 'STRING', 'STRING'], + '_ZN5doris19EncryptionFunctions11sm4_encryptEPN9doris_udf' + '15FunctionContextERKNS1_9StringValES6_S6_S6_', '', '', 'vec', 'ALWAYS_NULLABLE'], + [['sm4_decrypt'], 'STRING', ['STRING', 'STRING', 'STRING', 'STRING'], + '_ZN5doris19EncryptionFunctions11sm4_decryptEPN9doris_udf' + '15FunctionContextERKNS1_9StringValES6_S6_S6_', '', '', 'vec', 'ALWAYS_NULLABLE'], [['from_base64'], 'STRING', ['STRING'], '_ZN5doris19EncryptionFunctions11from_base64EPN9doris_udf' '15FunctionContextERKNS1_9StringValE', '', '', 'vec', 'ALWAYS_NULLABLE'], @@ -1316,13 +1316,13 @@ visible_functions = [ '_ZN5doris19EncryptionFunctions6md5sumEPN9doris_udf15FunctionContextEiPKNS1_9StringValE', '', '', 'vec', ''], [['sm3'], 'VARCHAR', ['VARCHAR'], - '_ZN5doris19EncryptionFunctions3sm3EPN9doris_udf15FunctionContextERKNS1_9StringValE', '', '', '', ''], + '_ZN5doris19EncryptionFunctions3sm3EPN9doris_udf15FunctionContextERKNS1_9StringValE', '', '', 'vec', ''], [['sm3sum'], 'VARCHAR', ['VARCHAR', '...'], - '_ZN5doris19EncryptionFunctions6sm3sumEPN9doris_udf15FunctionContextEiPKNS1_9StringValE', '', '', '', ''], + '_ZN5doris19EncryptionFunctions6sm3sumEPN9doris_udf15FunctionContextEiPKNS1_9StringValE', '', '', 'vec', ''], [['sm3'], 'VARCHAR', ['STRING'], - '_ZN5doris19EncryptionFunctions3sm3EPN9doris_udf15FunctionContextERKNS1_9StringValE', '', '', '', ''], + '_ZN5doris19EncryptionFunctions3sm3EPN9doris_udf15FunctionContextERKNS1_9StringValE', '', '', 'vec', ''], [['sm3sum'], 'VARCHAR', ['STRING', '...'], - '_ZN5doris19EncryptionFunctions6sm3sumEPN9doris_udf15FunctionContextEiPKNS1_9StringValE', '', '', '', ''], + '_ZN5doris19EncryptionFunctions6sm3sumEPN9doris_udf15FunctionContextEiPKNS1_9StringValE', '', '', 'vec', ''], # geo functions [['ST_Point'], 'VARCHAR', ['DOUBLE', 'DOUBLE'], --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org