This is an automated email from the ASF dual-hosted git repository.

eldenmoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 1f17551fb04 [improve](function) support collect_list with nested types 
param (#47965)
1f17551fb04 is described below

commit 1f17551fb044d99dbfca25f417841fad75786315
Author: amory <wangqian...@selectdb.com>
AuthorDate: Thu Feb 20 10:31:22 2025 +0800

    [improve](function) support collect_list with nested types param (#47965)
    
    before we do not support collect_list with array/map/struct type will
    meet:
    ```
    mysql>  SELECT id, collect_list(kastr) FROM test_array_agg_complex GROUP BY 
id ORDER BY id;
    ERROR 1105 (HY000): errCode = 2, detailMessage = 
(172.21.16.12)[INTERNAL_ERROR]Agg Function collect_list(array<text>) is not 
implemented
    ```
    after we can use it with array/map/struct para
---
 .../aggregate_function_collect.cpp                 |   8 +++
 .../aggregate_function_collect.h                   |  70 ++++++++++++++++++++-
 .../data/query_p0/aggregate/array_agg.out          | Bin 8132 -> 37278 bytes
 .../suites/query_p0/aggregate/array_agg.groovy     |  16 +++++
 4 files changed, 93 insertions(+), 1 deletion(-)

diff --git a/be/src/vec/aggregate_functions/aggregate_function_collect.cpp 
b/be/src/vec/aggregate_functions/aggregate_function_collect.cpp
index 15806c739ed..c1abefec218 100644
--- a/be/src/vec/aggregate_functions/aggregate_function_collect.cpp
+++ b/be/src/vec/aggregate_functions/aggregate_function_collect.cpp
@@ -49,6 +49,11 @@ AggregateFunctionPtr do_create_agg_function_collect(bool 
distinct, const DataTyp
                     AggregateFunctionCollectListData<T, HasLimit>, HasLimit, 
std::false_type>>(
                     argument_types, result_is_nullable);
         }
+    } else if (!distinct) {
+        // void type means support array/map/struct type for collect_list
+        return creator_without_type::create<AggregateFunctionCollect<
+                AggregateFunctionCollectListData<void, HasLimit>, HasLimit, 
std::false_type>>(
+                argument_types, result_is_nullable);
     }
     return nullptr;
 }
@@ -93,6 +98,9 @@ AggregateFunctionPtr 
create_aggregate_function_collect_impl(const std::string& n
         if constexpr (ShowNull::value) {
             return do_create_agg_function_collect<void, HasLimit, ShowNull>(
                     distinct, argument_types, result_is_nullable);
+        } else {
+            return do_create_agg_function_collect<void, HasLimit, ShowNull>(
+                    distinct, argument_types, result_is_nullable);
         }
     }
 
diff --git a/be/src/vec/aggregate_functions/aggregate_function_collect.h 
b/be/src/vec/aggregate_functions/aggregate_function_collect.h
index 1b4eadf259d..755458d662a 100644
--- a/be/src/vec/aggregate_functions/aggregate_function_collect.h
+++ b/be/src/vec/aggregate_functions/aggregate_function_collect.h
@@ -194,6 +194,9 @@ struct AggregateFunctionCollectListData {
     PaddedPODArray<ElementType> data;
     Int64 max_size = -1;
 
+    AggregateFunctionCollectListData() {}
+    AggregateFunctionCollectListData(const DataTypes& argument_types) {}
+
     size_t size() const { return data.size(); }
 
     void add(const IColumn& column, size_t row_num) {
@@ -306,6 +309,67 @@ struct AggregateFunctionCollectListData<StringRef, 
HasLimit> {
     }
 };
 
+template <typename HasLimit>
+struct AggregateFunctionCollectListData<void, HasLimit> {
+    using ElementType = StringRef;
+    using Self = AggregateFunctionCollectListData<void, HasLimit>;
+    MutableColumnPtr column_data;
+    Int64 max_size = -1;
+
+    AggregateFunctionCollectListData() {}
+    AggregateFunctionCollectListData(const DataTypes& argument_types) {
+        DataTypePtr column_type = argument_types[0];
+        column_data = column_type->create_column();
+    }
+
+    size_t size() const { return column_data->size(); }
+
+    void add(const IColumn& column, size_t row_num) { 
column_data->insert_from(column, row_num); }
+
+    void merge(const AggregateFunctionCollectListData& rhs) {
+        if constexpr (HasLimit::value) {
+            if (max_size == -1) {
+                max_size = rhs.max_size;
+            }
+            max_size = rhs.max_size;
+
+            column_data->insert_range_from(
+                    *rhs.column_data, 0,
+                    std::min(assert_cast<size_t, TypeCheckOnRelease::DISABLE>(
+                                     static_cast<size_t>(max_size - size())),
+                             rhs.size()));
+        } else {
+            column_data->insert_range_from(*rhs.column_data, 0, rhs.size());
+        }
+    }
+
+    void write(BufferWritable& buf) const {
+        const size_t size = column_data->size();
+        write_binary(size, buf);
+        for (size_t i = 0; i < size; i++) {
+            write_string_binary(column_data->get_data_at(i), buf);
+        }
+        write_var_int(max_size, buf);
+    }
+
+    void read(BufferReadable& buf) {
+        size_t size = 0;
+        read_binary(size, buf);
+        column_data->reserve(size);
+
+        StringRef s;
+        for (size_t i = 0; i < size; i++) {
+            read_string_binary(s, buf);
+            column_data->insert_data(s.data, s.size);
+        }
+        read_var_int(max_size, buf);
+    }
+
+    void reset() { column_data->clear(); }
+
+    void insert_result_into(IColumn& to) const { 
to.insert_range_from(*column_data, 0, size()); }
+};
+
 template <typename T>
 struct AggregateFunctionArrayAggData {
     using ElementType = T;
@@ -623,7 +687,11 @@ public:
                 new (place) Data();
             }
         } else {
-            new (place) Data();
+            if constexpr (std::is_same_v<Data, 
AggregateFunctionCollectListData<void, HasLimit>>) {
+                new (place) Data(argument_types);
+            } else {
+                new (place) Data();
+            }
         }
     }
 
diff --git a/regression-test/data/query_p0/aggregate/array_agg.out 
b/regression-test/data/query_p0/aggregate/array_agg.out
index 62ffb5fcf47..4bdf7671da5 100644
Binary files a/regression-test/data/query_p0/aggregate/array_agg.out and 
b/regression-test/data/query_p0/aggregate/array_agg.out differ
diff --git a/regression-test/suites/query_p0/aggregate/array_agg.groovy 
b/regression-test/suites/query_p0/aggregate/array_agg.groovy
index 42fb3b131a4..6342baa8212 100644
--- a/regression-test/suites/query_p0/aggregate/array_agg.groovy
+++ b/regression-test/suites/query_p0/aggregate/array_agg.groovy
@@ -276,6 +276,22 @@ suite("array_agg") {
     order_qt_sql_array_agg_array """ SELECT id, array_agg(kastr) FROM 
test_array_agg_complex GROUP BY id ORDER BY id """
     order_qt_sql_array_agg_map """ SELECT id, array_agg(km) FROM 
test_array_agg_complex GROUP BY id ORDER BY id """
     order_qt_sql_array_agg_struct """ SELECT id, array_agg(ks) FROM 
test_array_agg_complex GROUP BY id ORDER BY id """
+    order_qt_sql_collect_list_array """ SELECT id, collect_list(kastr) FROM 
test_array_agg_complex GROUP BY id ORDER BY id """
+    order_qt_sql_collect_list_map """ SELECT id, collect_list(km) FROM 
test_array_agg_complex GROUP BY id ORDER BY id """
+    order_qt_sql_collect_list_struct """ SELECT id, collect_list(ks) FROM 
test_array_agg_complex GROUP BY id ORDER BY id """
+    order_qt_sql_group_array_array """ SELECT group_array(kastr) FROM 
test_array_agg_complex GROUP BY id ORDER BY id """
+    order_qt_sql_group_array_map """ SELECT group_array(km) FROM 
test_array_agg_complex GROUP BY id ORDER BY id """
+    order_qt_sql_group_array_struct """ SELECT group_array(ks) FROM 
test_array_agg_complex GROUP BY id ORDER BY id """
+    // add limit for param
+    order_qt_sql_array_agg_array_limit """ SELECT id, array_agg(kastr) FROM 
test_array_agg_complex GROUP BY id ORDER BY id """
+    order_qt_sql_array_agg_map_limit """ SELECT id, array_agg(km) FROM 
test_array_agg_complex GROUP BY id ORDER BY id """
+    order_qt_sql_array_agg_struct_limit """ SELECT id, array_agg(ks) FROM 
test_array_agg_complex GROUP BY id ORDER BY id"""
+    order_qt_sql_collect_list_array_limit """ SELECT id, collect_list(kastr, 
2) FROM test_array_agg_complex GROUP BY id ORDER BY id"""
+    order_qt_sql_collect_list_map_limit """ SELECT id, collect_list(km, 2) 
FROM test_array_agg_complex GROUP BY id ORDER BY id"""
+    order_qt_sql_collect_list_struct_limit """ SELECT id, collect_list(ks, 3) 
FROM test_array_agg_complex GROUP BY id ORDER BY id"""
+    order_qt_sql_group_array_array_limit """ SELECT group_array(kastr, 3) FROM 
test_array_agg_complex GROUP BY id ORDER BY id"""
+    order_qt_sql_group_array_map_limit """ SELECT group_array(km, 7) FROM 
test_array_agg_complex GROUP BY id ORDER BY id"""
+    order_qt_sql_group_array_struct_limit """ SELECT group_array(ks, 7) FROM 
test_array_agg_complex GROUP BY id ORDER BY id"""
 
 
  sql """ DROP TABLE IF EXISTS test_array_agg_ip;"""


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to