This is an automated email from the ASF dual-hosted git repository. eldenmoon pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 1f17551fb04 [improve](function) support collect_list with nested types param (#47965) 1f17551fb04 is described below commit 1f17551fb044d99dbfca25f417841fad75786315 Author: amory <wangqian...@selectdb.com> AuthorDate: Thu Feb 20 10:31:22 2025 +0800 [improve](function) support collect_list with nested types param (#47965) before we do not support collect_list with array/map/struct type will meet: ``` mysql> SELECT id, collect_list(kastr) FROM test_array_agg_complex GROUP BY id ORDER BY id; ERROR 1105 (HY000): errCode = 2, detailMessage = (172.21.16.12)[INTERNAL_ERROR]Agg Function collect_list(array<text>) is not implemented ``` after we can use it with array/map/struct para --- .../aggregate_function_collect.cpp | 8 +++ .../aggregate_function_collect.h | 70 ++++++++++++++++++++- .../data/query_p0/aggregate/array_agg.out | Bin 8132 -> 37278 bytes .../suites/query_p0/aggregate/array_agg.groovy | 16 +++++ 4 files changed, 93 insertions(+), 1 deletion(-) diff --git a/be/src/vec/aggregate_functions/aggregate_function_collect.cpp b/be/src/vec/aggregate_functions/aggregate_function_collect.cpp index 15806c739ed..c1abefec218 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_collect.cpp +++ b/be/src/vec/aggregate_functions/aggregate_function_collect.cpp @@ -49,6 +49,11 @@ AggregateFunctionPtr do_create_agg_function_collect(bool distinct, const DataTyp AggregateFunctionCollectListData<T, HasLimit>, HasLimit, std::false_type>>( argument_types, result_is_nullable); } + } else if (!distinct) { + // void type means support array/map/struct type for collect_list + return creator_without_type::create<AggregateFunctionCollect< + AggregateFunctionCollectListData<void, HasLimit>, HasLimit, std::false_type>>( + argument_types, result_is_nullable); } return nullptr; } @@ -93,6 +98,9 @@ AggregateFunctionPtr create_aggregate_function_collect_impl(const std::string& n if constexpr (ShowNull::value) { return do_create_agg_function_collect<void, HasLimit, ShowNull>( distinct, argument_types, result_is_nullable); + } else { + return do_create_agg_function_collect<void, HasLimit, ShowNull>( + distinct, argument_types, result_is_nullable); } } diff --git a/be/src/vec/aggregate_functions/aggregate_function_collect.h b/be/src/vec/aggregate_functions/aggregate_function_collect.h index 1b4eadf259d..755458d662a 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_collect.h +++ b/be/src/vec/aggregate_functions/aggregate_function_collect.h @@ -194,6 +194,9 @@ struct AggregateFunctionCollectListData { PaddedPODArray<ElementType> data; Int64 max_size = -1; + AggregateFunctionCollectListData() {} + AggregateFunctionCollectListData(const DataTypes& argument_types) {} + size_t size() const { return data.size(); } void add(const IColumn& column, size_t row_num) { @@ -306,6 +309,67 @@ struct AggregateFunctionCollectListData<StringRef, HasLimit> { } }; +template <typename HasLimit> +struct AggregateFunctionCollectListData<void, HasLimit> { + using ElementType = StringRef; + using Self = AggregateFunctionCollectListData<void, HasLimit>; + MutableColumnPtr column_data; + Int64 max_size = -1; + + AggregateFunctionCollectListData() {} + AggregateFunctionCollectListData(const DataTypes& argument_types) { + DataTypePtr column_type = argument_types[0]; + column_data = column_type->create_column(); + } + + size_t size() const { return column_data->size(); } + + void add(const IColumn& column, size_t row_num) { column_data->insert_from(column, row_num); } + + void merge(const AggregateFunctionCollectListData& rhs) { + if constexpr (HasLimit::value) { + if (max_size == -1) { + max_size = rhs.max_size; + } + max_size = rhs.max_size; + + column_data->insert_range_from( + *rhs.column_data, 0, + std::min(assert_cast<size_t, TypeCheckOnRelease::DISABLE>( + static_cast<size_t>(max_size - size())), + rhs.size())); + } else { + column_data->insert_range_from(*rhs.column_data, 0, rhs.size()); + } + } + + void write(BufferWritable& buf) const { + const size_t size = column_data->size(); + write_binary(size, buf); + for (size_t i = 0; i < size; i++) { + write_string_binary(column_data->get_data_at(i), buf); + } + write_var_int(max_size, buf); + } + + void read(BufferReadable& buf) { + size_t size = 0; + read_binary(size, buf); + column_data->reserve(size); + + StringRef s; + for (size_t i = 0; i < size; i++) { + read_string_binary(s, buf); + column_data->insert_data(s.data, s.size); + } + read_var_int(max_size, buf); + } + + void reset() { column_data->clear(); } + + void insert_result_into(IColumn& to) const { to.insert_range_from(*column_data, 0, size()); } +}; + template <typename T> struct AggregateFunctionArrayAggData { using ElementType = T; @@ -623,7 +687,11 @@ public: new (place) Data(); } } else { - new (place) Data(); + if constexpr (std::is_same_v<Data, AggregateFunctionCollectListData<void, HasLimit>>) { + new (place) Data(argument_types); + } else { + new (place) Data(); + } } } diff --git a/regression-test/data/query_p0/aggregate/array_agg.out b/regression-test/data/query_p0/aggregate/array_agg.out index 62ffb5fcf47..4bdf7671da5 100644 Binary files a/regression-test/data/query_p0/aggregate/array_agg.out and b/regression-test/data/query_p0/aggregate/array_agg.out differ diff --git a/regression-test/suites/query_p0/aggregate/array_agg.groovy b/regression-test/suites/query_p0/aggregate/array_agg.groovy index 42fb3b131a4..6342baa8212 100644 --- a/regression-test/suites/query_p0/aggregate/array_agg.groovy +++ b/regression-test/suites/query_p0/aggregate/array_agg.groovy @@ -276,6 +276,22 @@ suite("array_agg") { order_qt_sql_array_agg_array """ SELECT id, array_agg(kastr) FROM test_array_agg_complex GROUP BY id ORDER BY id """ order_qt_sql_array_agg_map """ SELECT id, array_agg(km) FROM test_array_agg_complex GROUP BY id ORDER BY id """ order_qt_sql_array_agg_struct """ SELECT id, array_agg(ks) FROM test_array_agg_complex GROUP BY id ORDER BY id """ + order_qt_sql_collect_list_array """ SELECT id, collect_list(kastr) FROM test_array_agg_complex GROUP BY id ORDER BY id """ + order_qt_sql_collect_list_map """ SELECT id, collect_list(km) FROM test_array_agg_complex GROUP BY id ORDER BY id """ + order_qt_sql_collect_list_struct """ SELECT id, collect_list(ks) FROM test_array_agg_complex GROUP BY id ORDER BY id """ + order_qt_sql_group_array_array """ SELECT group_array(kastr) FROM test_array_agg_complex GROUP BY id ORDER BY id """ + order_qt_sql_group_array_map """ SELECT group_array(km) FROM test_array_agg_complex GROUP BY id ORDER BY id """ + order_qt_sql_group_array_struct """ SELECT group_array(ks) FROM test_array_agg_complex GROUP BY id ORDER BY id """ + // add limit for param + order_qt_sql_array_agg_array_limit """ SELECT id, array_agg(kastr) FROM test_array_agg_complex GROUP BY id ORDER BY id """ + order_qt_sql_array_agg_map_limit """ SELECT id, array_agg(km) FROM test_array_agg_complex GROUP BY id ORDER BY id """ + order_qt_sql_array_agg_struct_limit """ SELECT id, array_agg(ks) FROM test_array_agg_complex GROUP BY id ORDER BY id""" + order_qt_sql_collect_list_array_limit """ SELECT id, collect_list(kastr, 2) FROM test_array_agg_complex GROUP BY id ORDER BY id""" + order_qt_sql_collect_list_map_limit """ SELECT id, collect_list(km, 2) FROM test_array_agg_complex GROUP BY id ORDER BY id""" + order_qt_sql_collect_list_struct_limit """ SELECT id, collect_list(ks, 3) FROM test_array_agg_complex GROUP BY id ORDER BY id""" + order_qt_sql_group_array_array_limit """ SELECT group_array(kastr, 3) FROM test_array_agg_complex GROUP BY id ORDER BY id""" + order_qt_sql_group_array_map_limit """ SELECT group_array(km, 7) FROM test_array_agg_complex GROUP BY id ORDER BY id""" + order_qt_sql_group_array_struct_limit """ SELECT group_array(ks, 7) FROM test_array_agg_complex GROUP BY id ORDER BY id""" sql """ DROP TABLE IF EXISTS test_array_agg_ip;""" --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org