This is an automated email from the ASF dual-hosted git repository. lihaopeng pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 374303186c [Vectorized](function) support topn_array function (#13869) 374303186c is described below commit 374303186c996342c1c1bbbefd01e13a2cc5112f Author: zhangstar333 <87313068+zhangstar...@users.noreply.github.com> AuthorDate: Wed Nov 2 19:49:23 2022 +0800 [Vectorized](function) support topn_array function (#13869) --- .../aggregate_function_topn.cpp | 61 ++++++++++ .../aggregate_functions/aggregate_function_topn.h | 133 +++++++++++++++++---- be/test/vec/aggregate_functions/agg_test.cpp | 2 +- .../aggregate_functions/vec_window_funnel_test.cpp | 7 +- .../aggregate-functions/topn_array.md | 61 ++++++++++ docs/sidebars.json | 1 + .../aggregate-functions/topn_array.md | 60 ++++++++++ .../java/org/apache/doris/catalog/FunctionSet.java | 7 ++ .../main/java/org/apache/doris/catalog/Type.java | 2 + .../test_select_stddev_variance_window.out | 132 ++++++++++---------- .../aggregate_functions/test_aggregate_collect.out | 12 ++ .../test_select_stddev_variance_window.out | 132 ++++++++++---------- .../test_select_stddev_variance_window.groovy | 12 +- .../test_aggregate_collect.groovy | 24 ++++ .../test_select_stddev_variance_window.groovy | 12 +- 15 files changed, 482 insertions(+), 176 deletions(-) diff --git a/be/src/vec/aggregate_functions/aggregate_function_topn.cpp b/be/src/vec/aggregate_functions/aggregate_function_topn.cpp index 19f52fbff8..74bb154d6a 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_topn.cpp +++ b/be/src/vec/aggregate_functions/aggregate_function_topn.cpp @@ -17,6 +17,8 @@ #include <vec/aggregate_functions/aggregate_function_topn.h> +#include "vec/aggregate_functions/helpers.h" +#include "vec/core/types.h" namespace doris::vectorized { AggregateFunctionPtr create_aggregate_function_topn(const std::string& name, @@ -38,8 +40,67 @@ AggregateFunctionPtr create_aggregate_function_topn(const std::string& name, return nullptr; } +template <bool has_default_param> +AggregateFunctionPtr create_topn_array(const DataTypes& argument_types) { + auto type = argument_types[0].get(); + if (type->is_nullable()) { + type = assert_cast<const DataTypeNullable*>(type)->get_nested_type().get(); + } + + WhichDataType which(*type); + +#define DISPATCH(TYPE) \ + if (which.idx == TypeIndex::TYPE) \ + return AggregateFunctionPtr( \ + new AggregateFunctionTopNArray< \ + AggregateFunctionTopNImplArray<TYPE, has_default_param>, TYPE>( \ + argument_types)); + FOR_NUMERIC_TYPES(DISPATCH) +#undef DISPATCH + if (which.is_string_or_fixed_string()) { + return AggregateFunctionPtr(new AggregateFunctionTopNArray< + AggregateFunctionTopNImplArray<std::string, has_default_param>, + std::string>(argument_types)); + } + if (which.is_decimal()) { + return AggregateFunctionPtr( + new AggregateFunctionTopNArray< + AggregateFunctionTopNImplArray<Decimal128, has_default_param>, Decimal128>( + argument_types)); + } + if (which.is_date_or_datetime() || which.is_date_time_v2()) { + return AggregateFunctionPtr( + new AggregateFunctionTopNArray< + AggregateFunctionTopNImplArray<Int64, has_default_param>, Int64>( + argument_types)); + } + if (which.is_date_v2()) { + return AggregateFunctionPtr( + new AggregateFunctionTopNArray< + AggregateFunctionTopNImplArray<UInt32, has_default_param>, UInt32>( + argument_types)); + } + + LOG(WARNING) << fmt::format("Illegal argument type for aggregate function topn_array is: {}", + type->get_name()); + return nullptr; +} + +AggregateFunctionPtr create_aggregate_function_topn_array(const std::string& name, + const DataTypes& argument_types, + const Array& parameters, + const bool result_is_nullable) { + bool has_default_param = (argument_types.size() == 3); + if (has_default_param) { + return create_topn_array<true>(argument_types); + } else { + return create_topn_array<false>(argument_types); + } +} + void register_aggregate_function_topn(AggregateFunctionSimpleFactory& factory) { factory.register_function("topn", create_aggregate_function_topn); + factory.register_function("topn_array", create_aggregate_function_topn_array); } } // namespace doris::vectorized \ No newline at end of file diff --git a/be/src/vec/aggregate_functions/aggregate_function_topn.h b/be/src/vec/aggregate_functions/aggregate_function_topn.h index ae9fdf322d..b7aa6ae14c 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_topn.h +++ b/be/src/vec/aggregate_functions/aggregate_function_topn.h @@ -21,28 +21,31 @@ #include <rapidjson/stringbuffer.h> #include <rapidjson/writer.h> +#include <type_traits> #include <unordered_map> #include "vec/aggregate_functions/aggregate_function.h" -#include "vec/aggregate_functions/aggregate_function_group_concat.h" #include "vec/aggregate_functions/aggregate_function_simple_factory.h" -#include "vec/aggregate_functions/helpers.h" -#include "vec/data_types/data_type_date_time.h" -#include "vec/data_types/data_type_decimal.h" -#include "vec/data_types/data_type_number.h" +#include "vec/columns/column_array.h" +#include "vec/columns/column_string.h" +#include "vec/columns/column_vector.h" +#include "vec/data_types/data_type_array.h" #include "vec/data_types/data_type_string.h" #include "vec/io/io_helper.h" namespace doris::vectorized { // space-saving algorithm +template <typename T> struct AggregateFunctionTopNData { + using ColVecType = + std::conditional_t<IsDecimalNumber<T>, ColumnDecimal<Decimal128>, ColumnVector<T>>; void set_paramenters(int input_top_num, int space_expand_rate = 50) { top_num = input_top_num; capacity = (uint64_t)top_num * space_expand_rate; } - void add(const std::string& value) { + void add(const T& value) { auto it = counter_map.find(value); if (it != counter_map.end()) { it->second++; @@ -93,13 +96,13 @@ struct AggregateFunctionTopNData { } } - std::vector<std::pair<uint64_t, std::string>> get_remain_vector() const { - std::vector<std::pair<uint64_t, std::string>> counter_vector; + std::vector<std::pair<uint64_t, T>> get_remain_vector() const { + std::vector<std::pair<uint64_t, T>> counter_vector; for (auto it : counter_map) { counter_vector.emplace_back(it.second, it.first); } std::sort(counter_vector.begin(), counter_vector.end(), - std::greater<std::pair<uint64_t, std::string>>()); + std::greater<std::pair<uint64_t, T>>()); return counter_vector; } @@ -127,7 +130,7 @@ struct AggregateFunctionTopNData { read_binary(element_number, buf); counter_map.clear(); - std::pair<std::string, uint64_t> element; + std::pair<T, uint64_t> element; for (auto i = 0; i < element_number; i++) { read_binary(element.first, buf); read_binary(element.second, buf); @@ -152,11 +155,24 @@ struct AggregateFunctionTopNData { return buffer.GetString(); } + void insert_result_into(IColumn& to) const { + auto counter_vector = get_remain_vector(); + for (int i = 0; i < std::min((int)counter_vector.size(), top_num); i++) { + const auto& element = counter_vector[i]; + if constexpr (std::is_same_v<T, std::string>) { + static_cast<ColumnString&>(to).insert_data(element.second.c_str(), + element.second.length()); + } else { + static_cast<ColVecType&>(to).get_data().push_back(element.second); + } + } + } + void reset() { counter_map.clear(); } int top_num = 0; uint64_t capacity = 0; - phmap::flat_hash_map<std::string, uint64_t> counter_map; + phmap::flat_hash_map<T, uint64_t> counter_map; }; struct StringDataImplTopN { @@ -170,8 +186,8 @@ struct StringDataImplTopN { template <typename DataHelper> struct AggregateFunctionTopNImplInt { - static void add(AggregateFunctionTopNData& __restrict place, const IColumn** columns, - size_t row_num) { + static void add(AggregateFunctionTopNData<std::string>& __restrict place, + const IColumn** columns, size_t row_num) { place.set_paramenters(static_cast<const ColumnInt32*>(columns[1])->get_element(row_num)); place.add(DataHelper::to_string(*columns[0], row_num)); } @@ -179,27 +195,49 @@ struct AggregateFunctionTopNImplInt { template <typename DataHelper> struct AggregateFunctionTopNImplIntInt { - static void add(AggregateFunctionTopNData& __restrict place, const IColumn** columns, - size_t row_num) { + static void add(AggregateFunctionTopNData<std::string>& __restrict place, + const IColumn** columns, size_t row_num) { place.set_paramenters(static_cast<const ColumnInt32*>(columns[1])->get_element(row_num), static_cast<const ColumnInt32*>(columns[2])->get_element(row_num)); place.add(DataHelper::to_string(*columns[0], row_num)); } }; -//base function -template <typename Impl> -class AggregateFunctionTopN final - : public IAggregateFunctionDataHelper<AggregateFunctionTopNData, - AggregateFunctionTopN<Impl>> { -public: - AggregateFunctionTopN(const DataTypes& argument_types_) - : IAggregateFunctionDataHelper<AggregateFunctionTopNData, AggregateFunctionTopN<Impl>>( - argument_types_, {}) {} +template <typename T, bool has_default_param> +struct AggregateFunctionTopNImplArray { + using ColVecType = + std::conditional_t<IsDecimalNumber<T>, ColumnDecimal<Decimal128>, ColumnVector<T>>; + static void add(AggregateFunctionTopNData<T>& __restrict place, const IColumn** columns, + size_t row_num) { + if constexpr (has_default_param) { + place.set_paramenters( + static_cast<const ColumnInt32*>(columns[1])->get_element(row_num), + static_cast<const ColumnInt32*>(columns[2])->get_element(row_num)); - String get_name() const override { return "topn"; } + } else { + place.set_paramenters( + static_cast<const ColumnInt32*>(columns[1])->get_element(row_num)); + } + if constexpr (std::is_same_v<T, std::string>) { + StringRef ref = static_cast<const ColumnString&>(*columns[0]).get_data_at(row_num); + place.add(std::string(ref.data, ref.size)); + } else { + T val = static_cast<const ColVecType&>(*columns[0]).get_data()[row_num]; + place.add(val); + } + } +}; - DataTypePtr get_return_type() const override { return std::make_shared<DataTypeString>(); } +//base function +template <typename Impl, typename T> +class AggregateFunctionTopNBase + : public IAggregateFunctionDataHelper<AggregateFunctionTopNData<T>, + AggregateFunctionTopNBase<Impl, T>> { +public: + AggregateFunctionTopNBase(const DataTypes& argument_types_) + : IAggregateFunctionDataHelper<AggregateFunctionTopNData<T>, + AggregateFunctionTopNBase<Impl, T>>(argument_types_, + {}) {} void add(AggregateDataPtr __restrict place, const IColumn** columns, size_t row_num, Arena*) const override { @@ -221,6 +259,18 @@ public: Arena*) const override { this->data(place).read(buf); } +}; + +//topn function return string +template <typename Impl, typename T = std::string> +class AggregateFunctionTopN final : public AggregateFunctionTopNBase<Impl, T> { +public: + AggregateFunctionTopN(const DataTypes& argument_types_) + : AggregateFunctionTopNBase<Impl, T>(argument_types_) {} + + String get_name() const override { return "topn"; } + + DataTypePtr get_return_type() const override { return std::make_shared<DataTypeString>(); } void insert_result_into(ConstAggregateDataPtr __restrict place, IColumn& to) const override { std::string result = this->data(place).get(); @@ -228,4 +278,35 @@ public: } }; +//topn function return array +template <typename Impl, typename T> +class AggregateFunctionTopNArray final : public AggregateFunctionTopNBase<Impl, T> { +public: + AggregateFunctionTopNArray(const DataTypes& argument_types_) + : AggregateFunctionTopNBase<Impl, T>(argument_types_), + _argument_type(argument_types_[0]) {} + + String get_name() const override { return "topn_array"; } + + DataTypePtr get_return_type() const override { + return std::make_shared<DataTypeArray>(make_nullable(_argument_type)); + } + + void insert_result_into(ConstAggregateDataPtr __restrict place, IColumn& to) const override { + auto& to_arr = assert_cast<ColumnArray&>(to); + auto& to_nested_col = to_arr.get_data(); + if (to_nested_col.is_nullable()) { + auto col_null = reinterpret_cast<ColumnNullable*>(&to_nested_col); + this->data(place).insert_result_into(col_null->get_nested_column()); + col_null->get_null_map_data().resize_fill(col_null->get_nested_column().size(), 0); + } else { + this->data(place).insert_result_into(to_nested_col); + } + to_arr.get_offsets().push_back(to_nested_col.size()); + } + +private: + DataTypePtr _argument_type; +}; + } // namespace doris::vectorized diff --git a/be/test/vec/aggregate_functions/agg_test.cpp b/be/test/vec/aggregate_functions/agg_test.cpp index b9840fb099..951b1b8e3b 100644 --- a/be/test/vec/aggregate_functions/agg_test.cpp +++ b/be/test/vec/aggregate_functions/agg_test.cpp @@ -89,7 +89,7 @@ TEST(AggTest, topn_test) { agg_function->add(place, const_cast<const IColumn**>(columns), i, nullptr); } - std::string result = reinterpret_cast<AggregateFunctionTopNData*>(place)->get(); + std::string result = reinterpret_cast<AggregateFunctionTopNData<std::string>*>(place)->get(); std::string expect_result = "{\"1\":2048,\"2\":683,\"3\":341,\"4\":205,\"5\":137,\"6\":97,\"7\":73,\"8\":57,\"9\":" "46,\"10\":37}"; diff --git a/be/test/vec/aggregate_functions/vec_window_funnel_test.cpp b/be/test/vec/aggregate_functions/vec_window_funnel_test.cpp index 904d5ce80c..50bf612fd2 100644 --- a/be/test/vec/aggregate_functions/vec_window_funnel_test.cpp +++ b/be/test/vec/aggregate_functions/vec_window_funnel_test.cpp @@ -17,16 +17,14 @@ #include <gtest/gtest.h> -#include "common/logging.h" #include "gtest/gtest.h" #include "vec/aggregate_functions/aggregate_function.h" #include "vec/aggregate_functions/aggregate_function_simple_factory.h" -#include "vec/aggregate_functions/aggregate_function_topn.h" #include "vec/columns/column_vector.h" #include "vec/data_types/data_type.h" +#include "vec/data_types/data_type_date_time.h" #include "vec/data_types/data_type_number.h" #include "vec/data_types/data_type_string.h" - namespace doris::vectorized { void register_aggregate_function_window_funnel(AggregateFunctionSimpleFactory& factory); @@ -43,8 +41,7 @@ public: std::make_shared<DataTypeInt64>(), std::make_shared<DataTypeString>(), std::make_shared<DataTypeDateTime>(), std::make_shared<DataTypeUInt8>(), std::make_shared<DataTypeUInt8>(), std::make_shared<DataTypeUInt8>(), - std::make_shared<DataTypeUInt8>(), - }; + std::make_shared<DataTypeUInt8>()}; Array array; agg_function = factory.get("window_funnel", data_types, array, false); EXPECT_NE(agg_function, nullptr); diff --git a/docs/en/docs/sql-manual/sql-functions/aggregate-functions/topn_array.md b/docs/en/docs/sql-manual/sql-functions/aggregate-functions/topn_array.md new file mode 100644 index 0000000000..d266287638 --- /dev/null +++ b/docs/en/docs/sql-manual/sql-functions/aggregate-functions/topn_array.md @@ -0,0 +1,61 @@ +--- +{ + "title": "topn_array", + "language": "en" +} +--- + +<!-- +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, +software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, either express or implied. See the License for the +specific language governing permissions and limitations +under the License. +--> + +## topn_array +### description +#### Syntax + +`ARRAY<T> topn_array(expr, INT top_num[, INT space_expand_rate])` + +The topn function uses the Space-Saving algorithm to calculate the top_num frequent items in expr, and the result is the +frequent items and their occurrence times, which is an approximation + +The space_expand_rate parameter is optional and is used to set the number of counters used in the Space-Saving algorithm +``` +counter numbers = top_num * space_expand_rate +``` +The higher value of space_expand_rate, the more accurate result will be. The default value is 50 + +### example +``` +mysql> select topn_array(k3,3) from baseall; ++--------------------------+ +| topn_array(`k3`, 3) | ++--------------------------+ +| [3021, 2147483647, 5014] | ++--------------------------+ +1 row in set (0.02 sec) + +mysql> select topn_array(k3,3,100) from baseall; ++--------------------------+ +| topn_array(`k3`, 3, 100) | ++--------------------------+ +| [3021, 2147483647, 5014] | ++--------------------------+ +1 row in set (0.02 sec) +``` +### keywords +TOPN, TOPN_ARRAY \ No newline at end of file diff --git a/docs/sidebars.json b/docs/sidebars.json index 0333af1eeb..ebf2da3ec4 100644 --- a/docs/sidebars.json +++ b/docs/sidebars.json @@ -430,6 +430,7 @@ "sql-manual/sql-functions/aggregate-functions/percentile", "sql-manual/sql-functions/aggregate-functions/hll_union_agg", "sql-manual/sql-functions/aggregate-functions/topn", + "sql-manual/sql-functions/aggregate-functions/topn_array", "sql-manual/sql-functions/aggregate-functions/count", "sql-manual/sql-functions/aggregate-functions/sum", "sql-manual/sql-functions/aggregate-functions/max_by", diff --git a/docs/zh-CN/docs/sql-manual/sql-functions/aggregate-functions/topn_array.md b/docs/zh-CN/docs/sql-manual/sql-functions/aggregate-functions/topn_array.md new file mode 100644 index 0000000000..8b4985c25d --- /dev/null +++ b/docs/zh-CN/docs/sql-manual/sql-functions/aggregate-functions/topn_array.md @@ -0,0 +1,60 @@ +--- +{ + "title": "topn_array", + "language": "zh-CN" +} +--- + +<!-- +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, +software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, either express or implied. See the License for the +specific language governing permissions and limitations +under the License. +--> + +## topn_array +### description +#### Syntax + +`ARRAY<T> topn_array(expr, INT top_num[, INT space_expand_rate])` + +该topn_array函数使用Space-Saving算法计算expr中的top_num个频繁项,结果为频繁项及其出现次数,该结果为近似值 + +space_expand_rate参数是可选项,该值用来设置Space-Saving算法中使用的counter个数 +``` +counter numbers = top_num * space_expand_rate +``` +space_expand_rate的值越大,结果越准确,默认值为50 + +### example +``` +mysql> select topn_array(k3,3) from baseall; ++--------------------------+ +| topn_array(`k3`, 3) | ++--------------------------+ +| [3021, 2147483647, 5014] | ++--------------------------+ +1 row in set (0.02 sec) + +mysql> select topn_array(k3,3,100) from baseall; ++--------------------------+ +| topn_array(`k3`, 3, 100) | ++--------------------------+ +| [3021, 2147483647, 5014] | ++--------------------------+ +1 row in set (0.02 sec) +``` +### keywords +TOPN, TOPN_ARRAY diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java index 6f9154cac8..acbb0a5a18 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java @@ -2466,6 +2466,13 @@ public class FunctionSet<T> { "", "", "", "", "", true, false, true, true)); addBuiltin(AggregateFunction.createBuiltin(COLLECT_SET, Lists.newArrayList(t), new ArrayType(t), t, "", "", "", "", "", true, false, true, true)); + addBuiltin( + AggregateFunction.createBuiltin("topn_array", Lists.newArrayList(t, Type.INT), new ArrayType(t), t, + "", "", "", "", "", true, false, true, true)); + addBuiltin( + AggregateFunction + .createBuiltin("topn_array", Lists.newArrayList(t, Type.INT, Type.INT), new ArrayType(t), t, + "", "", "", "", "", true, false, true, true)); } // Avg diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Type.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/Type.java index 3f6fd5ef0e..be8217e42b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Type.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Type.java @@ -158,6 +158,8 @@ public abstract class Type { arraySubTypes.add(DECIMALV2); arraySubTypes.add(DATE); arraySubTypes.add(DATETIME); + arraySubTypes.add(DATEV2); + arraySubTypes.add(DATETIMEV2); arraySubTypes.add(CHAR); arraySubTypes.add(VARCHAR); arraySubTypes.add(STRING); diff --git a/regression-test/data/correctness_p0/test_select_stddev_variance_window.out b/regression-test/data/correctness_p0/test_select_stddev_variance_window.out index b3a9ee95e5..24fd821c9a 100644 --- a/regression-test/data/correctness_p0/test_select_stddev_variance_window.out +++ b/regression-test/data/correctness_p0/test_select_stddev_variance_window.out @@ -918,55 +918,55 @@ 15 1991.8000000000002 -- !select_default -- -1 5069.4970703125 -2 1991.0 -3 1991.0 -4 1991.0 -5 5069.4970703125 -6 5069.4970703125 -7 1991.0 -8 5069.4970703125 -9 11224.505859375 -10 5068.5966796875 -11 17379.5 -12 11223.8056640625 -13 17379.0 -14 23534.203125 -15 1991.7000732421875 +1 1989.0 +2 1987.5 +3 1987.5 +4 1987.5 +5 1989.0 +6 1989.0 +7 1987.5 +8 1989.0 +9 1990.0 +10 1989.0 +11 1991.0 +12 1123.0 +13 255.0 +14 1123.0 +15 1990.0 -- !select_default -- 1 \N 2 \N 3 1986.0 -4 1989.0 +4 1987.5 5 1989.0 -6 1989.0 -7 1990.7999267578125 -8 29689.203125 -9 29688.802734375 -10 1990.7999267578125 -11 29689.40234375 +6 1987.0 +7 1989.0 +8 1989.0 +9 1985.0 +10 1989.0 +11 1991.0 12 1991.0 -13 29689.40234375 -14 29689.40234375 -15 1990.7999267578125 +13 1991.0 +14 1991.0 +15 1989.0 -- !select_default -- -1 1989.0 -2 1989.0 -3 1990.7999267578125 -4 1990.39990234375 -5 29689.203125 -6 23533.6015625 -7 1991.0 -8 17379.0 -9 17379.0 -10 17379.0 -11 17379.5 -12 17379.0 -13 17379.0 -14 23534.203125 -15 1991.7000732421875 +1 1987.0 +2 1987.5 +3 1989.0 +4 1987.5 +5 1989.0 +6 1987.0 +7 1989.0 +8 1989.0 +9 1989.0 +10 1991.0 +11 1991.0 +12 1991.0 +13 255.0 +14 1123.0 +15 1990.0 -- !select_default -- 1 1989.0 @@ -986,36 +986,36 @@ 15 1992.0 -- !select_default -- -1 5069.4970703125 -2 1991.0 -3 5068.5966796875 -4 11223.8056640625 -5 11224.505859375 -6 17379.5 -7 17379.0 -8 1991.7000732421875 -9 1991.9000244140625 -10 23534.203125 -11 1992.0 -12 29515.802734375 -13 255.0 +1 1989.0 +2 1987.5 +3 1989.0 +4 1123.0 +5 1990.0 +6 1991.0 +7 255.0 +8 1990.0 +9 1991.0 +10 1123.0 +11 1990.5 +12 255.0 +13 -16256.0 14 255.0 15 1992.0 -- !select_default -- 1 1989.0 2 1986.0 -3 1989.0 -4 1990.7999267578125 -5 1989.0 -6 29689.203125 -7 1990.39990234375 -8 23533.6015625 -9 17379.0 -10 1991.0 -11 11223.8056640625 -12 11223.8056640625 -13 5068.5966796875 -14 1991.0 -15 5069.4970703125 +3 1987.5 +4 1989.0 +5 1987.0 +6 1989.0 +7 1987.5 +8 1987.0 +9 1989.0 +10 1989.0 +11 1989.0 +12 1990.0 +13 1989.0 +14 1987.5 +15 1989.0 diff --git a/regression-test/data/query_p0/sql_functions/aggregate_functions/test_aggregate_collect.out b/regression-test/data/query_p0/sql_functions/aggregate_functions/test_aggregate_collect.out index d501683f7f..f26cf1146b 100644 --- a/regression-test/data/query_p0/sql_functions/aggregate_functions/test_aggregate_collect.out +++ b/regression-test/data/query_p0/sql_functions/aggregate_functions/test_aggregate_collect.out @@ -16,3 +16,15 @@ -- !select -- 1 [1, 2, 1, 2] ['hello'] [2022-07-04, 2022-07-04] [1.23] ['hello', 'hello', 'hello', 'hello'] +-- !select43 -- +[10, 8] + +-- !select44 -- +[10, 8] + +-- !select45 -- +[2022-11-02, 2022-11-01] + +-- !select46 -- +[6.8754576, 0.576] + diff --git a/regression-test/data/query_p0/sql_functions/window_functions/test_select_stddev_variance_window.out b/regression-test/data/query_p0/sql_functions/window_functions/test_select_stddev_variance_window.out index b3a9ee95e5..24fd821c9a 100644 --- a/regression-test/data/query_p0/sql_functions/window_functions/test_select_stddev_variance_window.out +++ b/regression-test/data/query_p0/sql_functions/window_functions/test_select_stddev_variance_window.out @@ -918,55 +918,55 @@ 15 1991.8000000000002 -- !select_default -- -1 5069.4970703125 -2 1991.0 -3 1991.0 -4 1991.0 -5 5069.4970703125 -6 5069.4970703125 -7 1991.0 -8 5069.4970703125 -9 11224.505859375 -10 5068.5966796875 -11 17379.5 -12 11223.8056640625 -13 17379.0 -14 23534.203125 -15 1991.7000732421875 +1 1989.0 +2 1987.5 +3 1987.5 +4 1987.5 +5 1989.0 +6 1989.0 +7 1987.5 +8 1989.0 +9 1990.0 +10 1989.0 +11 1991.0 +12 1123.0 +13 255.0 +14 1123.0 +15 1990.0 -- !select_default -- 1 \N 2 \N 3 1986.0 -4 1989.0 +4 1987.5 5 1989.0 -6 1989.0 -7 1990.7999267578125 -8 29689.203125 -9 29688.802734375 -10 1990.7999267578125 -11 29689.40234375 +6 1987.0 +7 1989.0 +8 1989.0 +9 1985.0 +10 1989.0 +11 1991.0 12 1991.0 -13 29689.40234375 -14 29689.40234375 -15 1990.7999267578125 +13 1991.0 +14 1991.0 +15 1989.0 -- !select_default -- -1 1989.0 -2 1989.0 -3 1990.7999267578125 -4 1990.39990234375 -5 29689.203125 -6 23533.6015625 -7 1991.0 -8 17379.0 -9 17379.0 -10 17379.0 -11 17379.5 -12 17379.0 -13 17379.0 -14 23534.203125 -15 1991.7000732421875 +1 1987.0 +2 1987.5 +3 1989.0 +4 1987.5 +5 1989.0 +6 1987.0 +7 1989.0 +8 1989.0 +9 1989.0 +10 1991.0 +11 1991.0 +12 1991.0 +13 255.0 +14 1123.0 +15 1990.0 -- !select_default -- 1 1989.0 @@ -986,36 +986,36 @@ 15 1992.0 -- !select_default -- -1 5069.4970703125 -2 1991.0 -3 5068.5966796875 -4 11223.8056640625 -5 11224.505859375 -6 17379.5 -7 17379.0 -8 1991.7000732421875 -9 1991.9000244140625 -10 23534.203125 -11 1992.0 -12 29515.802734375 -13 255.0 +1 1989.0 +2 1987.5 +3 1989.0 +4 1123.0 +5 1990.0 +6 1991.0 +7 255.0 +8 1990.0 +9 1991.0 +10 1123.0 +11 1990.5 +12 255.0 +13 -16256.0 14 255.0 15 1992.0 -- !select_default -- 1 1989.0 2 1986.0 -3 1989.0 -4 1990.7999267578125 -5 1989.0 -6 29689.203125 -7 1990.39990234375 -8 23533.6015625 -9 17379.0 -10 1991.0 -11 11223.8056640625 -12 11223.8056640625 -13 5068.5966796875 -14 1991.0 -15 5069.4970703125 +3 1987.5 +4 1989.0 +5 1987.0 +6 1989.0 +7 1987.5 +8 1987.0 +9 1989.0 +10 1989.0 +11 1989.0 +12 1990.0 +13 1989.0 +14 1987.5 +15 1989.0 diff --git a/regression-test/suites/correctness_p0/test_select_stddev_variance_window.groovy b/regression-test/suites/correctness_p0/test_select_stddev_variance_window.groovy index 390418ddcf..311ae350c8 100644 --- a/regression-test/suites/correctness_p0/test_select_stddev_variance_window.groovy +++ b/regression-test/suites/correctness_p0/test_select_stddev_variance_window.groovy @@ -144,12 +144,12 @@ suite("test_select_stddev_variance_window") { qt_select_default "select k1, percentile(k2,0.8) over (partition by k6 order by k1 rows between current row and unbounded following) from ${tableName} order by k1;" qt_select_default "select k1, percentile(k2,0.8) over (partition by k6 order by k1) from ${tableName} order by k1;" - qt_select_default "select k1, percentile_approx(k2,0.8,4096) over (partition by k6 order by k1 rows between 3 preceding and unbounded following) from ${tableName} order by k1;" - qt_select_default "select k1, percentile_approx(k2,0.8,4096) over (partition by k6 order by k1 rows between 3 preceding and 1 preceding) from ${tableName} order by k1;" - qt_select_default "select k1, percentile_approx(k2,0.8,4096) over (partition by k6 order by k1 rows between 3 preceding and 1 following) from ${tableName} order by k1;" - qt_select_default "select k1, percentile_approx(k2,0.8,4096) over (partition by k6 order by k1 rows between current row and current row) from ${tableName} order by k1;" - qt_select_default "select k1, percentile_approx(k2,0.8,4096) over (partition by k6 order by k1 rows between current row and unbounded following) from ${tableName} order by k1;" - qt_select_default "select k1, percentile_approx(k2,0.8,4096) over (partition by k6 order by k1) from ${tableName} order by k1;" + qt_select_default "select k1, percentile_approx(k2,0.5,4096) over (partition by k6 order by k1 rows between 3 preceding and unbounded following) from ${tableName} order by k1;" + qt_select_default "select k1, percentile_approx(k2,0.5,4096) over (partition by k6 order by k1 rows between 3 preceding and 1 preceding) from ${tableName} order by k1;" + qt_select_default "select k1, percentile_approx(k2,0.5,4096) over (partition by k6 order by k1 rows between 3 preceding and 1 following) from ${tableName} order by k1;" + qt_select_default "select k1, percentile_approx(k2,0.5,4096) over (partition by k6 order by k1 rows between current row and current row) from ${tableName} order by k1;" + qt_select_default "select k1, percentile_approx(k2,0.5,4096) over (partition by k6 order by k1 rows between current row and unbounded following) from ${tableName} order by k1;" + qt_select_default "select k1, percentile_approx(k2,0.5,4096) over (partition by k6 order by k1) from ${tableName} order by k1;" } diff --git a/regression-test/suites/query_p0/sql_functions/aggregate_functions/test_aggregate_collect.groovy b/regression-test/suites/query_p0/sql_functions/aggregate_functions/test_aggregate_collect.groovy index cd7e61d408..a53e795064 100644 --- a/regression-test/suites/query_p0/sql_functions/aggregate_functions/test_aggregate_collect.groovy +++ b/regression-test/suites/query_p0/sql_functions/aggregate_functions/test_aggregate_collect.groovy @@ -47,4 +47,28 @@ suite("test_aggregate_collect") { sql """ CREATE TABLE ${tableCTAS} PROPERTIES("replication_num" = "1") AS SELECT 1,collect_list(c_int),collect_set(c_string),collect_list(c_date),collect_set(c_decimal),collect_list(c_string_not_null) FROM ${tableName} """ qt_select "SELECT * from ${tableCTAS}" + + // topn_array + def tableName_12 = "topn_array" + + sql "DROP TABLE IF EXISTS ${tableName_12}" + sql """ + CREATE TABLE IF NOT EXISTS ${tableName_12} ( + id int, + level int, + dt datev2, + num decimal(27,9) + ) + DISTRIBUTED BY HASH(id) BUCKETS 1 + PROPERTIES ( + "replication_num" = "1" + ) + """ + sql "INSERT INTO ${tableName_12} values(1,10,'2022-11-1',6.8754576), (2,8,'2022-11-3',0.576), (2,10,'2022-11-2',1.234) ,(3,10,'2022-11-2',0.576) ,(5,29,'2022-11-2',6.8754576) ,(6,8,'2022-11-1',6.8754576)" + + qt_select43 "select topn_array(level,2) from ${tableName_12}" + qt_select44 "select topn_array(level,2,100) from ${tableName_12}" + qt_select45 "select topn_array(dt,2,100) from ${tableName_12}" + qt_select46 "select topn_array(num,2,100) from ${tableName_12}" + sql "DROP TABLE IF EXISTS ${tableName_12}" } diff --git a/regression-test/suites/query_p0/sql_functions/window_functions/test_select_stddev_variance_window.groovy b/regression-test/suites/query_p0/sql_functions/window_functions/test_select_stddev_variance_window.groovy index b81f400dad..fb319a4192 100644 --- a/regression-test/suites/query_p0/sql_functions/window_functions/test_select_stddev_variance_window.groovy +++ b/regression-test/suites/query_p0/sql_functions/window_functions/test_select_stddev_variance_window.groovy @@ -147,12 +147,12 @@ suite("test_select_stddev_variance_window") { qt_select_default "select k1, percentile(k2,0.8) over (partition by k6 order by k1 rows between current row and unbounded following) from ${tableName} order by k1;" qt_select_default "select k1, percentile(k2,0.8) over (partition by k6 order by k1) from ${tableName} order by k1;" - qt_select_default "select k1, percentile_approx(k2,0.8,4096) over (partition by k6 order by k1 rows between 3 preceding and unbounded following) from ${tableName} order by k1;" - qt_select_default "select k1, percentile_approx(k2,0.8,4096) over (partition by k6 order by k1 rows between 3 preceding and 1 preceding) from ${tableName} order by k1;" - qt_select_default "select k1, percentile_approx(k2,0.8,4096) over (partition by k6 order by k1 rows between 3 preceding and 1 following) from ${tableName} order by k1;" - qt_select_default "select k1, percentile_approx(k2,0.8,4096) over (partition by k6 order by k1 rows between current row and current row) from ${tableName} order by k1;" - qt_select_default "select k1, percentile_approx(k2,0.8,4096) over (partition by k6 order by k1 rows between current row and unbounded following) from ${tableName} order by k1;" - qt_select_default "select k1, percentile_approx(k2,0.8,4096) over (partition by k6 order by k1) from ${tableName} order by k1;" + qt_select_default "select k1, percentile_approx(k2,0.5,4096) over (partition by k6 order by k1 rows between 3 preceding and unbounded following) from ${tableName} order by k1;" + qt_select_default "select k1, percentile_approx(k2,0.5,4096) over (partition by k6 order by k1 rows between 3 preceding and 1 preceding) from ${tableName} order by k1;" + qt_select_default "select k1, percentile_approx(k2,0.5,4096) over (partition by k6 order by k1 rows between 3 preceding and 1 following) from ${tableName} order by k1;" + qt_select_default "select k1, percentile_approx(k2,0.5,4096) over (partition by k6 order by k1 rows between current row and current row) from ${tableName} order by k1;" + qt_select_default "select k1, percentile_approx(k2,0.5,4096) over (partition by k6 order by k1 rows between current row and unbounded following) from ${tableName} order by k1;" + qt_select_default "select k1, percentile_approx(k2,0.5,4096) over (partition by k6 order by k1) from ${tableName} order by k1;" } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org