TangSiyang2001 commented on code in PR #15339:
URL: https://github.com/apache/doris/pull/15339#discussion_r1112110316


##########
be/src/vec/aggregate_functions/aggregate_function_collect.h:
##########
@@ -31,122 +33,180 @@
 
 namespace doris::vectorized {
 
-template <typename T>
+template <typename T, typename HasLimit>
 struct AggregateFunctionCollectSetData {
     using ElementType = T;
     using ColVecType = ColumnVectorOrDecimal<ElementType>;
     using ElementNativeType = typename NativeType<T>::Type;
+    using SelfType = AggregateFunctionCollectSetData;
     using Set = HashSetWithStackMemory<ElementNativeType, 
DefaultHash<ElementNativeType>, 4>;
-    Set set;
+    Set data_set;
+    UInt64 max_size;
+
+    size_t size() const { return data_set.size(); }
 
     void add(const IColumn& column, size_t row_num) {
-        const auto& vec = assert_cast<const ColVecType&>(column).get_data();
-        set.insert(vec[row_num]);
+        data_set.insert(assert_cast<const 
ColVecType&>(column).get_data()[row_num]);
+    }
+
+    void merge(const SelfType& rhs) {
+        if constexpr (HasLimit::value) {
+            for (auto& rhs_elem : rhs.data_set) {
+                if (size() >= max_size) {
+                    return;
+                }
+                data_set.insert(rhs_elem.get_value());
+            }
+        } else {
+            data_set.merge(rhs.data_set);
+        }
     }
-    void merge(const AggregateFunctionCollectSetData& rhs) { 
set.merge(rhs.set); }
-    void write(BufferWritable& buf) const { set.write(buf); }
-    void read(BufferReadable& buf) { set.read(buf); }
-    void reset() { set.clear(); }
+
+    void write(BufferWritable& buf) const { data_set.write(buf); }
+
+    void read(BufferReadable& buf) { data_set.read(buf); }
+
     void insert_result_into(IColumn& to) const {
         auto& vec = assert_cast<ColVecType&>(to).get_data();
-        vec.reserve(set.size());
-        for (auto item : set) {
+        vec.reserve(size());
+        for (auto item : data_set) {
             vec.push_back(item.key);
         }
     }
+
+    void reset() { data_set.clear(); }
 };
 
-template <>
-struct AggregateFunctionCollectSetData<StringRef> {
+template <typename HasLimit>
+struct AggregateFunctionCollectSetData<StringRef, HasLimit> {
     using ElementType = StringRef;
     using ColVecType = ColumnString;
+    using SelfType = AggregateFunctionCollectSetData<ElementType, HasLimit>;
     using Set = HashSetWithSavedHashWithStackMemory<ElementType, 
DefaultHash<ElementType>, 4>;
-    Set set;
+    Set data_set;
+    UInt64 max_size;

Review Comment:
   Ok, I will enhance it in that way.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to