This is an automated email from the ASF dual-hosted git repository. xuyang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new df47b6941d [feature-wip](array-type) support the array type in reverse function (#11213) df47b6941d is described below commit df47b6941dfa574f5f29c03eb2e6961d3a306465 Author: carlvinhust2012 <huchengha...@126.com> AuthorDate: Tue Aug 9 20:49:09 2022 +0800 [feature-wip](array-type) support the array type in reverse function (#11213) Co-authored-by: hucheng01 <huchen...@baidu.com> --- .../vec/functions/array/function_array_reverse.h | 92 ++++++++++++++++++++++ be/src/vec/functions/function_reverse.h | 66 ++++++++++++++++ be/src/vec/functions/function_string.cpp | 28 +------ be/src/vec/functions/function_string.h | 19 +++++ .../sql-functions/string-functions/reverse.md | 37 ++++++++- .../sql-functions/string-functions/reverse.md | 37 ++++++++- gensrc/script/doris_builtins_functions.py | 24 +++++- .../array_functions/test_array_functions.out | 10 +++ .../array_functions/test_array_functions.groovy | 11 +-- 9 files changed, 285 insertions(+), 39 deletions(-) diff --git a/be/src/vec/functions/array/function_array_reverse.h b/be/src/vec/functions/array/function_array_reverse.h new file mode 100644 index 0000000000..bc6891a29b --- /dev/null +++ b/be/src/vec/functions/array/function_array_reverse.h @@ -0,0 +1,92 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +// This file is copied from +// https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/array/arrayReverse.cpp +// and modified by Doris +#pragma once + +#include "vec/columns/column_array.h" +#include "vec/columns/column_const.h" +#include "vec/data_types/data_type_array.h" +#include "vec/data_types/data_type_number.h" +#include "vec/functions/array/function_array_utils.h" + +namespace doris::vectorized { + +struct ArrayReverseImpl { + static Status _execute(Block& block, const ColumnNumbers& arguments, size_t result, + size_t input_rows_count) { + ColumnPtr src_column = + block.get_by_position(arguments[0]).column->convert_to_full_column_if_const(); + ColumnArrayExecutionData src; + if (!extract_column_array_info(*src_column, src)) { + return Status::RuntimeError( + fmt::format("execute failed, unsupported types for function {}({})", "reverse", + block.get_by_position(arguments[0]).type->get_name())); + } + + bool is_nullable = src.nested_nullmap_data ? true : false; + ColumnArrayMutableData dst = create_mutable_data(src.nested_col, is_nullable); + dst.offsets_ptr->reserve(input_rows_count); + + auto res_val = _execute_internal(*src.nested_col, *src.offsets_ptr, *dst.nested_col, + *dst.offsets_ptr, src.nested_nullmap_data, + dst.nested_nullmap_data); + if (!res_val) { + return Status::RuntimeError( + fmt::format("execute failed or unsupported types for function {}({})", + "reverse", block.get_by_position(arguments[0]).type->get_name())); + } + + ColumnPtr res_column = assemble_column_array(dst); + block.replace_by_position(result, std::move(res_column)); + return Status::OK(); + } + + static bool _execute_internal(const IColumn& src_column, + const ColumnArray::Offsets& src_offsets, IColumn& dest_column, + ColumnArray::Offsets& dest_offsets, const UInt8* src_null_map, + ColumnUInt8::Container* dest_null_map) { + ColumnArray::Offset prev_src_offset = 0; + + for (auto curr_src_offset : src_offsets) { + size_t array_size = curr_src_offset - prev_src_offset; + for (size_t j = 0; j < array_size; ++j) { + size_t j_reverse = curr_src_offset - j - 1; + if (src_null_map && src_null_map[j_reverse]) { + DCHECK(dest_null_map != nullptr); + // Note: here we need to insert default value + dest_column.insert_default(); + (*dest_null_map).push_back(true); + continue; + } + + dest_column.insert_from(src_column, j_reverse); + if (dest_null_map) { + (*dest_null_map).push_back(false); + } + } + + dest_offsets.push_back(curr_src_offset); + prev_src_offset = curr_src_offset; + } + + return true; + } +}; + +} // namespace doris::vectorized \ No newline at end of file diff --git a/be/src/vec/functions/function_reverse.h b/be/src/vec/functions/function_reverse.h new file mode 100644 index 0000000000..f60a390256 --- /dev/null +++ b/be/src/vec/functions/function_reverse.h @@ -0,0 +1,66 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +#pragma once + +#include "vec/functions/array/function_array_reverse.h" +#include "vec/functions/function_string.h" + +namespace doris::vectorized { + +class FunctionReverseCommon : public IFunction { +public: + static constexpr auto name = "reverse"; + + static FunctionPtr create() { return std::make_shared<FunctionReverseCommon>(); } + + String get_name() const override { return name; } + + size_t get_number_of_arguments() const override { return 1; } + + bool get_is_injective(const Block&) override { return false; } + + bool use_default_implementation_for_constants() const override { return true; } + + DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { + DCHECK(is_string_or_fixed_string(arguments[0]) || is_array(arguments[0])) + << fmt::format("Illegal type {} used for argument of function {}", + arguments[0]->get_name(), get_name()); + + return arguments[0]; + } + + Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, + size_t result, size_t input_rows_count) override { + ColumnPtr src_column = + block.get_by_position(arguments[0]).column->convert_to_full_column_if_const(); + if (const ColumnString* col_string = check_and_get_column<ColumnString>(src_column.get())) { + auto col_res = ColumnString::create(); + ReverseImpl::vector(col_string->get_chars(), col_string->get_offsets(), + col_res->get_chars(), col_res->get_offsets()); + block.replace_by_position(result, std::move(col_res)); + } else if (check_column<ColumnArray>(src_column.get())) { + return ArrayReverseImpl::_execute(block, arguments, result, input_rows_count); + } else { + return Status::RuntimeError("Illegal column {} used for argument of function {}", + block.get_by_position(arguments[0]).column->get_name(), + get_name()); + } + return Status::OK(); + } +}; + +} // namespace doris::vectorized diff --git a/be/src/vec/functions/function_string.cpp b/be/src/vec/functions/function_string.cpp index 730a6e4742..1a327efd72 100644 --- a/be/src/vec/functions/function_string.cpp +++ b/be/src/vec/functions/function_string.cpp @@ -25,9 +25,9 @@ #include "runtime/string_search.hpp" #include "util/encryption_util.h" -#include "util/simd/vstring_function.h" #include "util/url_coding.h" #include "vec/common/pod_array_fwd.h" +#include "vec/functions/function_reverse.h" #include "vec/functions/function_string_to_string.h" #include "vec/functions/function_totype.h" #include "vec/functions/simple_function_factory.h" @@ -239,28 +239,6 @@ struct StringFunctionImpl { } }; -struct NameReverse { - static constexpr auto name = "reverse"; -}; - -struct ReverseImpl { - static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets, - ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) { - auto rows_count = offsets.size(); - res_offsets.resize(rows_count); - res_data.reserve(data.size()); - for (int i = 0; i < rows_count; ++i) { - auto src_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]); - int64_t src_len = offsets[i] - offsets[i - 1] - 1; - char dst[src_len]; - simd::VStringFunctions::reverse(StringVal((uint8_t*)src_str, src_len), - StringVal((uint8_t*)dst, src_len)); - StringOP::push_value_string(std::string_view(dst, src_len), i, res_data, res_offsets); - } - return Status::OK(); - } -}; - struct NameToLower { static constexpr auto name = "lower"; }; @@ -605,8 +583,6 @@ using FunctionStringLocate = using FunctionStringFindInSet = FunctionBinaryToType<DataTypeString, DataTypeString, StringFindInSetImpl, NameFindInSet>; -using FunctionReverse = FunctionStringToString<ReverseImpl, NameReverse>; - using FunctionUnHex = FunctionStringOperateToNullType<UnHexImpl>; using FunctionToLower = FunctionStringToString<TransferImpl<::tolower>, NameToLower>; @@ -640,7 +616,7 @@ void register_function_string(SimpleFunctionFactory& factory) { factory.register_function<FunctionStringFindInSet>(); factory.register_function<FunctionStringLocate>(); factory.register_function<FunctionStringLocatePos>(); - factory.register_function<FunctionReverse>(); + factory.register_function<FunctionReverseCommon>(); factory.register_function<FunctionUnHex>(); factory.register_function<FunctionToLower>(); factory.register_function<FunctionToUpper>(); diff --git a/be/src/vec/functions/function_string.h b/be/src/vec/functions/function_string.h index afe43c0833..e0f650d30c 100644 --- a/be/src/vec/functions/function_string.h +++ b/be/src/vec/functions/function_string.h @@ -28,6 +28,7 @@ #include "exprs/string_functions.h" #include "udf/udf.h" #include "util/md5.h" +#include "util/simd/vstring_function.h" #include "util/sm3.h" #include "util/url_parser.h" #include "vec/columns/column_array.h" @@ -1437,4 +1438,22 @@ private: } }; +struct ReverseImpl { + static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets, + ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) { + auto rows_count = offsets.size(); + res_offsets.resize(rows_count); + res_data.reserve(data.size()); + for (ssize_t i = 0; i < rows_count; ++i) { + auto src_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]); + int64_t src_len = offsets[i] - offsets[i - 1] - 1; + char dst[src_len]; + simd::VStringFunctions::reverse(StringVal((uint8_t*)src_str, src_len), + StringVal((uint8_t*)dst, src_len)); + StringOP::push_value_string(std::string_view(dst, src_len), i, res_data, res_offsets); + } + return Status::OK(); + } +}; + } // namespace doris::vectorized diff --git a/docs/en/docs/sql-manual/sql-functions/string-functions/reverse.md b/docs/en/docs/sql-manual/sql-functions/string-functions/reverse.md index bd7c5889ee..a349f7f673 100644 --- a/docs/en/docs/sql-manual/sql-functions/string-functions/reverse.md +++ b/docs/en/docs/sql-manual/sql-functions/string-functions/reverse.md @@ -29,9 +29,13 @@ under the License. #### Syntax `VARCHAR reverse(VARCHAR str)` +`ARRAY<T> reverse(ARRAY<T> arr)` +The REVERSE() function reverses a string or array and returns the result. -The REVERSE() function reverses a string and returns the result. +### notice + +`For the array type, only supported in vectorized engine` ### example @@ -52,5 +56,34 @@ mysql> SELECT REVERSE('你好'); +------------------+ 1 row in set (0.00 sec) ``` + +mysql> set enable_array_type=true; + +mysql> set enable_vectorized_engine=true; + +mysql> select k1, k2, reverse(k2) from array_test order by k1; ++------+-----------------------------+-----------------------------+ +| k1 | k2 | reverse(`k2`) | ++------+-----------------------------+-----------------------------+ +| 1 | [1, 2, 3, 4, 5] | [5, 4, 3, 2, 1] | +| 2 | [6, 7, 8] | [8, 7, 6] | +| 3 | [] | [] | +| 4 | NULL | NULL | +| 5 | [1, 2, 3, 4, 5, 4, 3, 2, 1] | [1, 2, 3, 4, 5, 4, 3, 2, 1] | +| 6 | [1, 2, 3, NULL] | [NULL, 3, 2, 1] | +| 7 | [4, 5, 6, NULL, NULL] | [NULL, NULL, 6, 5, 4] | ++------+-----------------------------+-----------------------------+ + +mysql> select k1, k2, reverse(k2) from array_test01 order by k1; ++------+-----------------------------------+-----------------------------------+ +| k1 | k2 | reverse(`k2`) | ++------+-----------------------------------+-----------------------------------+ +| 1 | ['a', 'b', 'c', 'd'] | ['d', 'c', 'b', 'a'] | +| 2 | ['e', 'f', 'g', 'h'] | ['h', 'g', 'f', 'e'] | +| 3 | [NULL, 'a', NULL, 'b', NULL, 'c'] | ['c', NULL, 'b', NULL, 'a', NULL] | +| 4 | ['d', 'e', NULL, ' '] | [' ', NULL, 'e', 'd'] | +| 5 | [' ', NULL, 'f', 'g'] | ['g', 'f', NULL, ' '] | ++------+-----------------------------------+-----------------------------------+ +``` ### keywords -REVERSE +REVERSE, ARRAY diff --git a/docs/zh-CN/docs/sql-manual/sql-functions/string-functions/reverse.md b/docs/zh-CN/docs/sql-manual/sql-functions/string-functions/reverse.md index 19477b05b4..b234fede61 100644 --- a/docs/zh-CN/docs/sql-manual/sql-functions/string-functions/reverse.md +++ b/docs/zh-CN/docs/sql-manual/sql-functions/string-functions/reverse.md @@ -29,9 +29,13 @@ under the License. #### Syntax `VARCHAR reverse(VARCHAR str)` +`ARRAY<T> reverse(ARRAY<T> arr)` +将字符串或数组反转,返回的字符串或者数组的顺序和原来的顺序相反。 -将字符串反转,返回的字符串的顺序和源字符串的顺序相反。 +### notice + +`对于数组类型,仅支持向量化引擎中使用` ### example @@ -52,5 +56,34 @@ mysql> SELECT REVERSE('你好'); +------------------+ 1 row in set (0.00 sec) ``` + +mysql> set enable_array_type=true; + +mysql> set enable_vectorized_engine=true; + +mysql> select k1, k2, reverse(k2) from array_test order by k1; ++------+-----------------------------+-----------------------------+ +| k1 | k2 | reverse(`k2`) | ++------+-----------------------------+-----------------------------+ +| 1 | [1, 2, 3, 4, 5] | [5, 4, 3, 2, 1] | +| 2 | [6, 7, 8] | [8, 7, 6] | +| 3 | [] | [] | +| 4 | NULL | NULL | +| 5 | [1, 2, 3, 4, 5, 4, 3, 2, 1] | [1, 2, 3, 4, 5, 4, 3, 2, 1] | +| 6 | [1, 2, 3, NULL] | [NULL, 3, 2, 1] | +| 7 | [4, 5, 6, NULL, NULL] | [NULL, NULL, 6, 5, 4] | ++------+-----------------------------+-----------------------------+ + +mysql> select k1, k2, reverse(k2) from array_test01 order by k1; ++------+-----------------------------------+-----------------------------------+ +| k1 | k2 | reverse(`k2`) | ++------+-----------------------------------+-----------------------------------+ +| 1 | ['a', 'b', 'c', 'd'] | ['d', 'c', 'b', 'a'] | +| 2 | ['e', 'f', 'g', 'h'] | ['h', 'g', 'f', 'e'] | +| 3 | [NULL, 'a', NULL, 'b', NULL, 'c'] | ['c', NULL, 'b', NULL, 'a', NULL] | +| 4 | ['d', 'e', NULL, ' '] | [' ', NULL, 'e', 'd'] | +| 5 | [' ', NULL, 'f', 'g'] | ['g', 'f', NULL, ' '] | ++------+-----------------------------------+-----------------------------------+ +``` ### keywords -REVERSE +REVERSE, ARRAY diff --git a/gensrc/script/doris_builtins_functions.py b/gensrc/script/doris_builtins_functions.py index 136c97a822..92b3bbe7db 100755 --- a/gensrc/script/doris_builtins_functions.py +++ b/gensrc/script/doris_builtins_functions.py @@ -324,6 +324,26 @@ visible_functions = [ [['array_slice', '%element_slice%'], 'ARRAY_DECIMALV2', ['ARRAY_DECIMALV2', 'BIGINT', 'BIGINT'], '', '', '', 'vec', ''], [['array_slice', '%element_slice%'], 'ARRAY_VARCHAR', ['ARRAY_VARCHAR', 'BIGINT', 'BIGINT'], '', '', '', 'vec', ''], [['array_slice', '%element_slice%'], 'ARRAY_STRING', ['ARRAY_STRING', 'BIGINT', 'BIGINT'], '', '', '', 'vec', ''], + + # reverse function for string builtin + [['reverse'], 'VARCHAR', ['VARCHAR'], + '_ZN5doris15StringFunctions7reverseEPN9doris_udf15FunctionContextERKNS1_9StringValE', '', '', 'vec', ''], + # reverse function support the longtext + [['reverse'], 'STRING', ['STRING'], + '_ZN5doris15StringFunctions7reverseEPN9doris_udf15FunctionContextERKNS1_9StringValE', '', '', 'vec', ''], + # reverse function support the array type + [['reverse'], 'ARRAY_TINYINT', ['ARRAY_TINYINT'], '', '', '', 'vec', ''], + [['reverse'], 'ARRAY_SMALLINT', ['ARRAY_SMALLINT'], '', '', '', 'vec', ''], + [['reverse'], 'ARRAY_INT', ['ARRAY_INT'], '', '', '', 'vec', ''], + [['reverse'], 'ARRAY_BIGINT', ['ARRAY_BIGINT'], '', '', '', 'vec', ''], + [['reverse'], 'ARRAY_LARGEINT', ['ARRAY_LARGEINT'], '', '', '', 'vec', ''], + [['reverse'], 'ARRAY_DATETIME', ['ARRAY_DATETIME'], '', '', '', 'vec', ''], + [['reverse'], 'ARRAY_DATE', ['ARRAY_DATE'], '', '', '', 'vec', ''], + [['reverse'], 'ARRAY_FLOAT', ['ARRAY_FLOAT'], '', '', '', 'vec', ''], + [['reverse'], 'ARRAY_DOUBLE', ['ARRAY_DOUBLE'], '', '', '', 'vec', ''], + [['reverse'], 'ARRAY_DECIMALV2', ['ARRAY_DECIMALV2'], '', '', '', 'vec', ''], + [['reverse'], 'ARRAY_VARCHAR', ['ARRAY_VARCHAR'], '', '', '', 'vec', ''], + [['reverse'], 'ARRAY_STRING', ['ARRAY_STRING'], '', '', '', 'vec', ''], # Timestamp functions [['unix_timestamp'], 'INT', [], @@ -1914,8 +1934,6 @@ visible_functions = [ '_ZN5doris15StringFunctions5lowerEPN9doris_udf15FunctionContextERKNS1_9StringValE', '', '', 'vec', ''], [['upper', 'ucase'], 'VARCHAR', ['VARCHAR'], '_ZN5doris15StringFunctions5upperEPN9doris_udf15FunctionContextERKNS1_9StringValE', '', '', 'vec', ''], - [['reverse'], 'VARCHAR', ['VARCHAR'], - '_ZN5doris15StringFunctions7reverseEPN9doris_udf15FunctionContextERKNS1_9StringValE', '', '', 'vec', ''], [['trim'], 'VARCHAR', ['VARCHAR'], '_ZN5doris15StringFunctions4trimEPN9doris_udf15FunctionContextERKNS1_9StringValE', '', '', 'vec', ''], [['ltrim'], 'VARCHAR', ['VARCHAR'], @@ -2039,8 +2057,6 @@ visible_functions = [ '_ZN5doris15StringFunctions5lowerEPN9doris_udf15FunctionContextERKNS1_9StringValE', '', '', 'vec', ''], [['upper', 'ucase'], 'STRING', ['STRING'], '_ZN5doris15StringFunctions5upperEPN9doris_udf15FunctionContextERKNS1_9StringValE', '', '', 'vec', ''], - [['reverse'], 'STRING', ['STRING'], - '_ZN5doris15StringFunctions7reverseEPN9doris_udf15FunctionContextERKNS1_9StringValE', '', '', 'vec', ''], [['trim'], 'STRING', ['STRING'], '_ZN5doris15StringFunctions4trimEPN9doris_udf15FunctionContextERKNS1_9StringValE', '', '', 'vec', ''], [['ltrim'], 'STRING', ['STRING'], diff --git a/regression-test/data/query/sql_functions/array_functions/test_array_functions.out b/regression-test/data/query/sql_functions/array_functions/test_array_functions.out index 634ca8d9a8..c2883d4f9d 100644 --- a/regression-test/data/query/sql_functions/array_functions/test_array_functions.out +++ b/regression-test/data/query/sql_functions/array_functions/test_array_functions.out @@ -97,3 +97,13 @@ 5 [] 6 [1, 2] 7 [8, 9] + +-- !select -- +1 [3, 2, 1] ['', 'b', 'a'] [2, 1] +2 [4] \N [5] +3 [] [] \N +4 [1, 2, 3, 4, 5, 4, 3, 2, 1] [] [] +5 [] ['a', 'b', 'c', 'd', 'c', 'b', 'a'] \N +6 [1, 2, 3, 4, 5, 4, 3, 2, 1] ['a', 'b', 'c', 'd', 'c', 'b', 'a'] \N +7 [NULL, 10, NULL, 9, 8] ['h', NULL, 'g', NULL, 'f'] \N + diff --git a/regression-test/suites/query/sql_functions/array_functions/test_array_functions.groovy b/regression-test/suites/query/sql_functions/array_functions/test_array_functions.groovy index 5852bf83e8..4797c2d5fa 100644 --- a/regression-test/suites/query/sql_functions/array_functions/test_array_functions.groovy +++ b/regression-test/suites/query/sql_functions/array_functions/test_array_functions.groovy @@ -51,9 +51,10 @@ suite("test_array_functions", "query") { qt_select "SELECT k1, array_distinct(k2), array_distinct(k3) FROM ${tableName} ORDER BY k1" qt_select "SELECT array_remove(k2, k1), k1 FROM ${tableName} ORDER BY k1" qt_select "SELECT k1, array_sort(k2), array_sort(k3), array_sort(k4) FROM ${tableName} ORDER BY k1" - qt_select "select k1, array_union(k2, k4) FROM ${tableName} ORDER BY k1" - qt_select "select k1, array_except(k2, k4) FROM ${tableName} ORDER BY k1" - qt_select "select k1, array_intersect(k2, k4) FROM ${tableName} ORDER BY k1" - qt_select "select k1, array_slice(k2, 2) FROM ${tableName} ORDER BY k1" - qt_select "select k1, array_slice(k2, 1, 2) FROM ${tableName} ORDER BY k1" + qt_select "SELECT k1, array_union(k2, k4) FROM ${tableName} ORDER BY k1" + qt_select "SELECT k1, array_except(k2, k4) FROM ${tableName} ORDER BY k1" + qt_select "SELECT k1, array_intersect(k2, k4) FROM ${tableName} ORDER BY k1" + qt_select "SELECT k1, array_slice(k2, 2) FROM ${tableName} ORDER BY k1" + qt_select "SELECT k1, array_slice(k2, 1, 2) FROM ${tableName} ORDER BY k1" + qt_select "SELECT k1, reverse(k2), reverse(k3), reverse(k4) FROM ${tableName} ORDER BY k1" } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org