This is an automated email from the ASF dual-hosted git repository.

panxiaolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 9451382428 [Improvement](aggregate) optimization for 
AggregationMethodKeysFixed::insert_keys_into_columns (#22216)
9451382428 is described below

commit 9451382428cdb6cb55b48b7d2e1941c0fbe72701
Author: Pxl <pxl...@qq.com>
AuthorDate: Wed Jul 26 16:19:15 2023 +0800

    [Improvement](aggregate) optimization for 
AggregationMethodKeysFixed::insert_keys_into_columns (#22216)
    
    optimization for AggregationMethodKeysFixed::insert_keys_into_columns
---
 be/src/vec/columns/column_vector.h     |   4 +-
 be/src/vec/common/aggregation_common.h |  24 +++----
 be/src/vec/exec/vaggregation_node.h    | 111 ++++++++++-----------------------
 3 files changed, 46 insertions(+), 93 deletions(-)

diff --git a/be/src/vec/columns/column_vector.h 
b/be/src/vec/columns/column_vector.h
index b8c119a217..04908d8711 100644
--- a/be/src/vec/columns/column_vector.h
+++ b/be/src/vec/columns/column_vector.h
@@ -227,9 +227,7 @@ public:
         use by date, datetime, basic type
     */
     void insert_many_fix_len_data(const char* data_ptr, size_t num) override {
-        if constexpr (!std::is_same_v<T, vectorized::Int64>) {
-            insert_many_in_copy_way(data_ptr, num);
-        } else if (IColumn::is_date) {
+        if (IColumn::is_date) {
             insert_date_column(data_ptr, num);
         } else if (IColumn::is_date_time) {
             insert_datetime_column(data_ptr, num);
diff --git a/be/src/vec/common/aggregation_common.h 
b/be/src/vec/common/aggregation_common.h
index 39beb8b25f..31f19e7418 100644
--- a/be/src/vec/common/aggregation_common.h
+++ b/be/src/vec/common/aggregation_common.h
@@ -166,17 +166,18 @@ T pack_fixed(size_t i, size_t keys_size, const 
ColumnRawPtrs& key_columns, const
     }
 
     for (size_t j = 0; j < keys_size; ++j) {
-        bool is_null;
+        bool is_null = false;
 
-        if (!has_bitmap)
-            is_null = false;
-        else {
+        if (has_bitmap) {
             size_t bucket = j / 8;
             size_t off = j % 8;
             is_null = ((bitmap[bucket] >> off) & 1) == 1;
         }
 
-        if (is_null) continue;
+        if (is_null) {
+            offset += key_sizes[j];
+            continue;
+        }
 
         switch (key_sizes[j]) {
         case 1:
@@ -184,28 +185,24 @@ T pack_fixed(size_t i, size_t keys_size, const 
ColumnRawPtrs& key_columns, const
                    static_cast<const 
ColumnVectorHelper*>(key_columns[j])->get_raw_data_begin<1>() +
                            i,
                    1);
-            offset += 1;
             break;
         case 2:
             memcpy(bytes + offset,
                    static_cast<const 
ColumnVectorHelper*>(key_columns[j])->get_raw_data_begin<2>() +
                            i * 2,
                    2);
-            offset += 2;
             break;
         case 4:
             memcpy(bytes + offset,
                    static_cast<const 
ColumnVectorHelper*>(key_columns[j])->get_raw_data_begin<4>() +
                            i * 4,
                    4);
-            offset += 4;
             break;
         case 8:
             memcpy(bytes + offset,
                    static_cast<const 
ColumnVectorHelper*>(key_columns[j])->get_raw_data_begin<8>() +
                            i * 8,
                    8);
-            offset += 8;
             break;
         default:
             memcpy(bytes + offset,
@@ -214,6 +211,8 @@ T pack_fixed(size_t i, size_t keys_size, const 
ColumnRawPtrs& key_columns, const
                    key_sizes[j]);
             offset += key_sizes[j];
         }
+
+        offset += key_sizes[j];
     }
 
     return key;
@@ -224,7 +223,9 @@ inline UInt128 hash128(size_t i, size_t keys_size, const 
ColumnRawPtrs& key_colu
     UInt128 key;
     SipHash hash;
 
-    for (size_t j = 0; j < keys_size; ++j) 
key_columns[j]->update_hash_with_value(i, hash);
+    for (size_t j = 0; j < keys_size; ++j) {
+        key_columns[j]->update_hash_with_value(i, hash);
+    }
 
     hash.get128(key.low, key.high);
 
@@ -253,8 +254,9 @@ inline StringRef serialize_keys_to_pool_contiguous(size_t 
i, size_t keys_size,
     const char* begin = nullptr;
 
     size_t sum_size = 0;
-    for (size_t j = 0; j < keys_size; ++j)
+    for (size_t j = 0; j < keys_size; ++j) {
         sum_size += key_columns[j]->serialize_value_into_arena(i, pool, 
begin).size;
+    }
 
     return {begin, sum_size};
 }
diff --git a/be/src/vec/exec/vaggregation_node.h 
b/be/src/vec/exec/vaggregation_node.h
index 9d6f4c4979..e31240cdbc 100644
--- a/be/src/vec/exec/vaggregation_node.h
+++ b/be/src/vec/exec/vaggregation_node.h
@@ -154,14 +154,6 @@ struct AggregationMethodSerialized {
         return max_one_row_byte_size;
     }
 
-    static void insert_key_into_columns(const StringRef& key, MutableColumns& 
key_columns,
-                                        const Sizes&) {
-        auto pos = key.data;
-        for (auto& column : key_columns) {
-            pos = column->deserialize_and_insert_from_arena(pos);
-        }
-    }
-
     static void insert_keys_into_columns(std::vector<StringRef>& keys, 
MutableColumns& key_columns,
                                          const size_t num_rows, const Sizes&) {
         for (auto& column : key_columns) {
@@ -215,11 +207,6 @@ struct AggregationMethodStringNoCache {
 
     static const bool low_cardinality_optimization = false;
 
-    static void insert_key_into_columns(const StringRef& key, MutableColumns& 
key_columns,
-                                        const Sizes&) {
-        key_columns[0]->insert_data(key.data, key.size);
-    }
-
     static void insert_keys_into_columns(std::vector<StringRef>& keys, 
MutableColumns& key_columns,
                                          const size_t num_rows, const Sizes&) {
         key_columns[0]->reserve(num_rows);
@@ -256,14 +243,6 @@ struct AggregationMethodOneNumber {
     using State = ColumnsHashing::HashMethodOneNumber<typename 
Data::value_type, Mapped, FieldType,
                                                       
consecutive_keys_optimization>;
 
-    // Insert the key from the hash table into columns.
-    static void insert_key_into_columns(const Key& key, MutableColumns& 
key_columns,
-                                        const Sizes& /*key_sizes*/) {
-        const auto* key_holder = reinterpret_cast<const char*>(&key);
-        auto* column = static_cast<ColumnVectorHelper*>(key_columns[0].get());
-        column->insert_raw_data<sizeof(FieldType)>(key_holder);
-    }
-
     static void insert_keys_into_columns(std::vector<Key>& keys, 
MutableColumns& key_columns,
                                          const size_t num_rows, const Sizes&) {
         key_columns[0]->reserve(num_rows);
@@ -328,59 +307,44 @@ struct AggregationMethodKeysFixed {
     using State = ColumnsHashing::HashMethodKeysFixed<typename 
Data::value_type, Key, Mapped,
                                                       has_nullable_keys, 
false>;
 
-    static void insert_key_into_columns(const Key& key, MutableColumns& 
key_columns,
-                                        const Sizes& key_sizes) {
-        size_t keys_size = key_columns.size();
-
-        static constexpr auto bitmap_size =
-                has_nullable_keys ? std::tuple_size<KeysNullMap<Key>>::value : 
0;
-        /// In any hash key value, column values to be read start just after 
the bitmap, if it exists.
-        size_t pos = bitmap_size;
-
-        for (size_t i = 0; i < keys_size; ++i) {
-            IColumn* observed_column;
-            ColumnUInt8* null_map;
-
-            bool column_nullable = false;
-            if constexpr (has_nullable_keys) {
-                column_nullable = is_column_nullable(*key_columns[i]);
-            }
-
-            /// If we have a nullable column, get its nested column and its 
null map.
-            if (column_nullable) {
+    static void insert_keys_into_columns(std::vector<Key>& keys, 
MutableColumns& key_columns,
+                                         const size_t num_rows, const Sizes& 
key_sizes) {
+        // In any hash key value, column values to be read start just after 
the bitmap, if it exists.
+        size_t pos = has_nullable_keys ? 
std::tuple_size<KeysNullMap<Key>>::value : 0;
+
+        for (size_t i = 0; i < key_columns.size(); ++i) {
+            size_t size = key_sizes[i];
+            key_columns[i]->resize(num_rows);
+            // If we have a nullable column, get its nested column and its 
null map.
+            if (is_column_nullable(*key_columns[i])) {
                 ColumnNullable& nullable_col = 
assert_cast<ColumnNullable&>(*key_columns[i]);
-                observed_column = &nullable_col.get_nested_column();
-                null_map = 
assert_cast<ColumnUInt8*>(&nullable_col.get_null_map_column());
-            } else {
-                observed_column = key_columns[i].get();
-                null_map = nullptr;
-            }
 
-            bool is_null = false;
-            if (column_nullable) {
-                /// The current column is nullable. Check if the value of the
-                /// corresponding key is nullable. Update the null map 
accordingly.
+                char* data =
+                        
const_cast<char*>(nullable_col.get_nested_column().get_raw_data().data);
+                UInt8* nullmap = 
assert_cast<ColumnUInt8*>(&nullable_col.get_null_map_column())
+                                         ->get_data()
+                                         .data();
+
+                // The current column is nullable. Check if the value of the
+                // corresponding key is nullable. Update the null map 
accordingly.
                 size_t bucket = i / 8;
                 size_t offset = i % 8;
-                UInt8 val = (reinterpret_cast<const UInt8*>(&key)[bucket] >> 
offset) & 1;
-                null_map->insert_value(val);
-                is_null = val == 1;
-            }
-
-            if (has_nullable_keys && is_null) {
-                observed_column->insert_default();
+                for (size_t j = 0; j < num_rows; j++) {
+                    const Key& key = keys[j];
+                    UInt8 val = (reinterpret_cast<const UInt8*>(&key)[bucket] 
>> offset) & 1;
+                    nullmap[j] = val;
+                    if (!val) {
+                        memcpy(data + j * size, reinterpret_cast<const 
char*>(&key) + pos, size);
+                    }
+                }
             } else {
-                size_t size = key_sizes[i];
-                observed_column->insert_data(reinterpret_cast<const 
char*>(&key) + pos, size);
-                pos += size;
+                char* data = 
const_cast<char*>(key_columns[i]->get_raw_data().data);
+                for (size_t j = 0; j < num_rows; j++) {
+                    const Key& key = keys[j];
+                    memcpy(data + j * size, reinterpret_cast<const 
char*>(&key) + pos, size);
+                }
             }
-        }
-    }
-
-    static void insert_keys_into_columns(std::vector<Key>& keys, 
MutableColumns& key_columns,
-                                         const size_t num_rows, const Sizes& 
key_sizes) {
-        for (size_t i = 0; i != num_rows; ++i) {
-            insert_key_into_columns(keys[i], key_columns, key_sizes);
+            pos += size;
         }
     }
 
@@ -411,17 +375,6 @@ struct AggregationMethodSingleNullableColumn : public 
SingleColumnMethod {
 
     using State = ColumnsHashing::HashMethodSingleLowNullableColumn<BaseState, 
Mapped, true>;
 
-    static void insert_key_into_columns(const Key& key, MutableColumns& 
key_columns,
-                                        const Sizes& /*key_sizes*/) {
-        auto col = key_columns[0].get();
-
-        if constexpr (std::is_same_v<Key, StringRef>) {
-            col->insert_data(key.data, key.size);
-        } else {
-            col->insert_data(reinterpret_cast<const char*>(&key), sizeof(key));
-        }
-    }
-
     static void insert_keys_into_columns(std::vector<Key>& keys, 
MutableColumns& key_columns,
                                          const size_t num_rows, const Sizes&) {
         auto col = key_columns[0].get();


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to