[GitHub] [incubator-doris] HappenLee commented on a change in pull request #8051: eliminate branch prediction

GitBox Tue, 15 Feb 2022 10:44:43 -0800


HappenLee commented on a change in pull request #8051:
URL: https://github.com/apache/incubator-doris/pull/8051#discussion_r806548118




##########
File path: be/src/vec/exec/join/vhash_join_node.cpp
##########
@@ -168,89 +177,136 @@ struct ProcessHashTableProbe {
     // the output block struct is same with mutable block. we can do more opt 
on it and simplify
     // the logic of probe
     // TODO: opt the visited here to reduce the size of hash table
+    template <class JoinOpType>
     Status do_process(HashTableContext& hash_table_ctx, ConstNullMapPtr 
null_map,
                       MutableBlock& mutable_block, Block* output_block) {
         using KeyGetter = typename HashTableContext::State;
         using Mapped = typename HashTableContext::Mapped;
 
         KeyGetter key_getter(_probe_raw_ptrs, _join_node->_probe_key_sz, 
nullptr);
-
+    
         std::vector<uint32_t> items_counts(_probe_rows);
         auto& mcol = mutable_block.mutable_columns();
-
-        int right_col_idx = _join_node->_is_right_semi_anti ? 0 : 
_left_table_data_types.size();
-        int right_col_len = _right_table_data_types.size();
         int current_offset = 0;
 
         for (; _probe_index < _probe_rows;) {
-            // ignore null rows
             if constexpr (ignore_null) {
                 if ((*null_map)[_probe_index]) {
                     items_counts[_probe_index++] = 0;
                     continue;
                 }
             }
-
             int repeat_count = 0;
-            auto find_result =
-                    (*null_map)[_probe_index]
+            if constexpr (JoinOpType::value == TJoinOp::INNER_JOIN) {
+                if (!(*null_map)[_probe_index]) {
+                    auto find_result = 
key_getter.find_key(hash_table_ctx.hash_table, _probe_index, _arena);
+
+                    if (find_result.is_found()) {
+                        auto& mapped = find_result.get_mapped();
+
+                        // TODO: Iterators are currently considered to be a 
heavy operation and have a certain impact on performance.
+                        // We should rethink whether to use this iterator mode 
in the future. Now just opt the one row case
+                        if (mapped.get_row_count() == 1) {
+                            mapped.visited = true;
+                            // right semi/anti join should dispose the data in 
hash table
+                            // after probe data eof
+                            ++repeat_count;
+                            for (size_t j = 0; j < _right_col_len; ++j) {
+                                auto& column = 
*mapped.block->get_by_position(j).column;
+                                mcol[j + _right_col_idx]->insert_from(column, 
mapped.row_num);
+                            }
+                        } else {
+                            if (_probe_index + 2 < _probe_rows)
+                                key_getter.prefetch(hash_table_ctx.hash_table, 
_probe_index + 2, _arena);
+                            for (auto it = mapped.begin(); it.ok(); ++it) {
+                                // right semi/anti join should dispose the 
data in hash table
+                                // after probe data eof
+                                ++repeat_count;
+                                for (size_t j = 0; j < _right_col_len; ++j) {
+                                    auto& column = 
*it->block->get_by_position(j).column;
+                                    // TODO: interface insert from cause 
serious performance problems
+                                    //  when column is nullable. Try to make 
more effective way
+                                    mcol[j + 
_right_col_idx]->insert_from(column, it->row_num);
+                                }
+                                it->visited = true;

Review comment:
       inner join not need to set visited

##########
File path: be/src/vec/exec/join/vhash_join_node.cpp
##########
@@ -168,89 +177,136 @@ struct ProcessHashTableProbe {
     // the output block struct is same with mutable block. we can do more opt 
on it and simplify
     // the logic of probe
     // TODO: opt the visited here to reduce the size of hash table
+    template <class JoinOpType>
     Status do_process(HashTableContext& hash_table_ctx, ConstNullMapPtr 
null_map,
                       MutableBlock& mutable_block, Block* output_block) {
         using KeyGetter = typename HashTableContext::State;
         using Mapped = typename HashTableContext::Mapped;
 
         KeyGetter key_getter(_probe_raw_ptrs, _join_node->_probe_key_sz, 
nullptr);
-
+    
         std::vector<uint32_t> items_counts(_probe_rows);
         auto& mcol = mutable_block.mutable_columns();
-
-        int right_col_idx = _join_node->_is_right_semi_anti ? 0 : 
_left_table_data_types.size();
-        int right_col_len = _right_table_data_types.size();
         int current_offset = 0;
 
         for (; _probe_index < _probe_rows;) {
-            // ignore null rows
             if constexpr (ignore_null) {
                 if ((*null_map)[_probe_index]) {
                     items_counts[_probe_index++] = 0;
                     continue;
                 }
             }
-
             int repeat_count = 0;
-            auto find_result =
-                    (*null_map)[_probe_index]
+            if constexpr (JoinOpType::value == TJoinOp::INNER_JOIN) {
+                if (!(*null_map)[_probe_index]) {
+                    auto find_result = 
key_getter.find_key(hash_table_ctx.hash_table, _probe_index, _arena);
+
+                    if (find_result.is_found()) {
+                        auto& mapped = find_result.get_mapped();
+
+                        // TODO: Iterators are currently considered to be a 
heavy operation and have a certain impact on performance.
+                        // We should rethink whether to use this iterator mode 
in the future. Now just opt the one row case
+                        if (mapped.get_row_count() == 1) {
+                            mapped.visited = true;
+                            // right semi/anti join should dispose the data in 
hash table
+                            // after probe data eof
+                            ++repeat_count;
+                            for (size_t j = 0; j < _right_col_len; ++j) {
+                                auto& column = 
*mapped.block->get_by_position(j).column;
+                                mcol[j + _right_col_idx]->insert_from(column, 
mapped.row_num);
+                            }
+                        } else {
+                            if (_probe_index + 2 < _probe_rows)
+                                key_getter.prefetch(hash_table_ctx.hash_table, 
_probe_index + 2, _arena);
+                            for (auto it = mapped.begin(); it.ok(); ++it) {
+                                // right semi/anti join should dispose the 
data in hash table
+                                // after probe data eof
+                                ++repeat_count;
+                                for (size_t j = 0; j < _right_col_len; ++j) {
+                                    auto& column = 
*it->block->get_by_position(j).column;
+                                    // TODO: interface insert from cause 
serious performance problems
+                                    //  when column is nullable. Try to make 
more effective way
+                                    mcol[j + 
_right_col_idx]->insert_from(column, it->row_num);
+                                }
+                                it->visited = true;
+                            }
+                        }
+                    }
+                }
+            } else if constexpr (JoinOpType::value == TJoinOp::LEFT_ANTI_JOIN) 
{
+                if ((*null_map)[_probe_index]) { 
+                    ++repeat_count;
+                } else {
+                    auto find_result = 
key_getter.find_key(hash_table_ctx.hash_table, _probe_index, _arena);
+                    if (find_result.is_found()) { 
+                        //do nothing
+                    } else {
+                        ++repeat_count;
+                    }
+                }
+            } else if constexpr (JoinOpType::value == TJoinOp::LEFT_SEMI_JOIN) 
{
+                if (!(*null_map)[_probe_index]) {
+                    auto find_result = 
key_getter.find_key(hash_table_ctx.hash_table, _probe_index, _arena);
+                    if (find_result.is_found()) { 
+                        ++repeat_count;
+                    }
+                }
+                items_counts[_probe_index++] = repeat_count;
+                current_offset += repeat_count;
+
+                if (current_offset >= _batch_size) {
+                    break;
+                }
+            } else {
+                auto find_result = (*null_map)[_probe_index]
                             ? 
decltype(key_getter.find_key(hash_table_ctx.hash_table, _probe_index,
                                                            _arena)) {nullptr, 
false}
                             : key_getter.find_key(hash_table_ctx.hash_table, 
_probe_index, _arena);
 
-            if (_probe_index + 2 < _probe_rows)
-                key_getter.prefetch(hash_table_ctx.hash_table, _probe_index + 
2, _arena);
+                if (_probe_index + 2 < _probe_rows)
+                    key_getter.prefetch(hash_table_ctx.hash_table, 
_probe_index + 2, _arena);
+
+                if (find_result.is_found()) {
 
-            if (find_result.is_found()) {
-                // left semi join only need one match, do not need insert the 
data of right table
-                if (_join_node->_join_op == TJoinOp::LEFT_SEMI_JOIN) {
-                    ++repeat_count;
-                } else if (_join_node->_join_op == TJoinOp::LEFT_ANTI_JOIN) {
-                    // do nothing
-                } else {
                     auto& mapped = find_result.get_mapped();
                     // TODO: Iterators are currently considered to be a heavy 
operation and have a certain impact on performance.
                     // We should rethink whether to use this iterator mode in 
the future. Now just opt the one row case
                     if (mapped.get_row_count() == 1) {
                         mapped.visited = true;
                         // right semi/anti join should dispose the data in 
hash table
                         // after probe data eof
-                        if (!_join_node->_is_right_semi_anti) {
+                        if constexpr (JoinOpType::value != 
TJoinOp::RIGHT_ANTI_JOIN && JoinOpType::value == TJoinOp::RIGHT_SEMI_JOIN) {

Review comment:
       JoinOpType::value != TJoinOp::RIGHT_SEMI_JOIN

##########
File path: be/src/vec/exec/join/vhash_join_node.cpp
##########
@@ -168,89 +177,136 @@ struct ProcessHashTableProbe {
     // the output block struct is same with mutable block. we can do more opt 
on it and simplify
     // the logic of probe
     // TODO: opt the visited here to reduce the size of hash table
+    template <class JoinOpType>
     Status do_process(HashTableContext& hash_table_ctx, ConstNullMapPtr 
null_map,
                       MutableBlock& mutable_block, Block* output_block) {
         using KeyGetter = typename HashTableContext::State;
         using Mapped = typename HashTableContext::Mapped;
 
         KeyGetter key_getter(_probe_raw_ptrs, _join_node->_probe_key_sz, 
nullptr);
-
+    
         std::vector<uint32_t> items_counts(_probe_rows);
         auto& mcol = mutable_block.mutable_columns();
-
-        int right_col_idx = _join_node->_is_right_semi_anti ? 0 : 
_left_table_data_types.size();
-        int right_col_len = _right_table_data_types.size();
         int current_offset = 0;
 
         for (; _probe_index < _probe_rows;) {
-            // ignore null rows
             if constexpr (ignore_null) {
                 if ((*null_map)[_probe_index]) {
                     items_counts[_probe_index++] = 0;
                     continue;
                 }
             }
-
             int repeat_count = 0;
-            auto find_result =
-                    (*null_map)[_probe_index]
+            if constexpr (JoinOpType::value == TJoinOp::INNER_JOIN) {
+                if (!(*null_map)[_probe_index]) {
+                    auto find_result = 
key_getter.find_key(hash_table_ctx.hash_table, _probe_index, _arena);
+
+                    if (find_result.is_found()) {
+                        auto& mapped = find_result.get_mapped();
+
+                        // TODO: Iterators are currently considered to be a 
heavy operation and have a certain impact on performance.
+                        // We should rethink whether to use this iterator mode 
in the future. Now just opt the one row case
+                        if (mapped.get_row_count() == 1) {
+                            mapped.visited = true;
+                            // right semi/anti join should dispose the data in 
hash table
+                            // after probe data eof
+                            ++repeat_count;
+                            for (size_t j = 0; j < _right_col_len; ++j) {
+                                auto& column = 
*mapped.block->get_by_position(j).column;
+                                mcol[j + _right_col_idx]->insert_from(column, 
mapped.row_num);
+                            }
+                        } else {
+                            if (_probe_index + 2 < _probe_rows)
+                                key_getter.prefetch(hash_table_ctx.hash_table, 
_probe_index + 2, _arena);
+                            for (auto it = mapped.begin(); it.ok(); ++it) {
+                                // right semi/anti join should dispose the 
data in hash table
+                                // after probe data eof
+                                ++repeat_count;
+                                for (size_t j = 0; j < _right_col_len; ++j) {
+                                    auto& column = 
*it->block->get_by_position(j).column;
+                                    // TODO: interface insert from cause 
serious performance problems
+                                    //  when column is nullable. Try to make 
more effective way
+                                    mcol[j + 
_right_col_idx]->insert_from(column, it->row_num);
+                                }
+                                it->visited = true;
+                            }
+                        }
+                    }
+                }
+            } else if constexpr (JoinOpType::value == TJoinOp::LEFT_ANTI_JOIN) 
{
+                if ((*null_map)[_probe_index]) { 
+                    ++repeat_count;
+                } else {
+                    auto find_result = 
key_getter.find_key(hash_table_ctx.hash_table, _probe_index, _arena);
+                    if (find_result.is_found()) { 
+                        //do nothing
+                    } else {
+                        ++repeat_count;
+                    }
+                }
+            } else if constexpr (JoinOpType::value == TJoinOp::LEFT_SEMI_JOIN) 
{
+                if (!(*null_map)[_probe_index]) {
+                    auto find_result = 
key_getter.find_key(hash_table_ctx.hash_table, _probe_index, _arena);
+                    if (find_result.is_found()) { 
+                        ++repeat_count;
+                    }
+                }
+                items_counts[_probe_index++] = repeat_count;

Review comment:
       the code same to the line 330， why need dispose here

##########
File path: be/src/vec/exec/join/vhash_join_node.cpp
##########
@@ -168,89 +177,136 @@ struct ProcessHashTableProbe {
     // the output block struct is same with mutable block. we can do more opt 
on it and simplify
     // the logic of probe
     // TODO: opt the visited here to reduce the size of hash table
+    template <class JoinOpType>
     Status do_process(HashTableContext& hash_table_ctx, ConstNullMapPtr 
null_map,
                       MutableBlock& mutable_block, Block* output_block) {
         using KeyGetter = typename HashTableContext::State;
         using Mapped = typename HashTableContext::Mapped;
 
         KeyGetter key_getter(_probe_raw_ptrs, _join_node->_probe_key_sz, 
nullptr);
-
+    
         std::vector<uint32_t> items_counts(_probe_rows);
         auto& mcol = mutable_block.mutable_columns();
-
-        int right_col_idx = _join_node->_is_right_semi_anti ? 0 : 
_left_table_data_types.size();
-        int right_col_len = _right_table_data_types.size();
         int current_offset = 0;
 
         for (; _probe_index < _probe_rows;) {
-            // ignore null rows
             if constexpr (ignore_null) {
                 if ((*null_map)[_probe_index]) {
                     items_counts[_probe_index++] = 0;
                     continue;
                 }
             }
-
             int repeat_count = 0;
-            auto find_result =
-                    (*null_map)[_probe_index]
+            if constexpr (JoinOpType::value == TJoinOp::INNER_JOIN) {
+                if (!(*null_map)[_probe_index]) {
+                    auto find_result = 
key_getter.find_key(hash_table_ctx.hash_table, _probe_index, _arena);
+
+                    if (find_result.is_found()) {
+                        auto& mapped = find_result.get_mapped();
+
+                        // TODO: Iterators are currently considered to be a 
heavy operation and have a certain impact on performance.
+                        // We should rethink whether to use this iterator mode 
in the future. Now just opt the one row case
+                        if (mapped.get_row_count() == 1) {
+                            mapped.visited = true;
+                            // right semi/anti join should dispose the data in 
hash table
+                            // after probe data eof
+                            ++repeat_count;
+                            for (size_t j = 0; j < _right_col_len; ++j) {
+                                auto& column = 
*mapped.block->get_by_position(j).column;
+                                mcol[j + _right_col_idx]->insert_from(column, 
mapped.row_num);
+                            }
+                        } else {
+                            if (_probe_index + 2 < _probe_rows)
+                                key_getter.prefetch(hash_table_ctx.hash_table, 
_probe_index + 2, _arena);
+                            for (auto it = mapped.begin(); it.ok(); ++it) {
+                                // right semi/anti join should dispose the 
data in hash table
+                                // after probe data eof
+                                ++repeat_count;
+                                for (size_t j = 0; j < _right_col_len; ++j) {
+                                    auto& column = 
*it->block->get_by_position(j).column;
+                                    // TODO: interface insert from cause 
serious performance problems
+                                    //  when column is nullable. Try to make 
more effective way
+                                    mcol[j + 
_right_col_idx]->insert_from(column, it->row_num);
+                                }
+                                it->visited = true;
+                            }
+                        }
+                    }
+                }
+            } else if constexpr (JoinOpType::value == TJoinOp::LEFT_ANTI_JOIN) 
{
+                if ((*null_map)[_probe_index]) { 
+                    ++repeat_count;
+                } else {
+                    auto find_result = 
key_getter.find_key(hash_table_ctx.hash_table, _probe_index, _arena);
+                    if (find_result.is_found()) { 
+                        //do nothing
+                    } else {
+                        ++repeat_count;
+                    }
+                }
+            } else if constexpr (JoinOpType::value == TJoinOp::LEFT_SEMI_JOIN) 
{
+                if (!(*null_map)[_probe_index]) {
+                    auto find_result = 
key_getter.find_key(hash_table_ctx.hash_table, _probe_index, _arena);
+                    if (find_result.is_found()) { 
+                        ++repeat_count;
+                    }
+                }
+                items_counts[_probe_index++] = repeat_count;
+                current_offset += repeat_count;
+
+                if (current_offset >= _batch_size) {
+                    break;
+                }
+            } else {
+                auto find_result = (*null_map)[_probe_index]
                             ? 
decltype(key_getter.find_key(hash_table_ctx.hash_table, _probe_index,
                                                            _arena)) {nullptr, 
false}
                             : key_getter.find_key(hash_table_ctx.hash_table, 
_probe_index, _arena);
 
-            if (_probe_index + 2 < _probe_rows)
-                key_getter.prefetch(hash_table_ctx.hash_table, _probe_index + 
2, _arena);
+                if (_probe_index + 2 < _probe_rows)
+                    key_getter.prefetch(hash_table_ctx.hash_table, 
_probe_index + 2, _arena);
+
+                if (find_result.is_found()) {
 
-            if (find_result.is_found()) {
-                // left semi join only need one match, do not need insert the 
data of right table
-                if (_join_node->_join_op == TJoinOp::LEFT_SEMI_JOIN) {
-                    ++repeat_count;
-                } else if (_join_node->_join_op == TJoinOp::LEFT_ANTI_JOIN) {
-                    // do nothing
-                } else {
                     auto& mapped = find_result.get_mapped();
                     // TODO: Iterators are currently considered to be a heavy 
operation and have a certain impact on performance.
                     // We should rethink whether to use this iterator mode in 
the future. Now just opt the one row case
                     if (mapped.get_row_count() == 1) {
                         mapped.visited = true;
                         // right semi/anti join should dispose the data in 
hash table
                         // after probe data eof
-                        if (!_join_node->_is_right_semi_anti) {
+                        if constexpr (JoinOpType::value != 
TJoinOp::RIGHT_ANTI_JOIN && JoinOpType::value == TJoinOp::RIGHT_SEMI_JOIN) {
                             ++repeat_count;
-                            for (size_t j = 0; j < right_col_len; ++j) {
+                            for (size_t j = 0; j < _right_col_len; ++j) {
                                 auto& column = 
*mapped.block->get_by_position(j).column;
-                                mcol[j + right_col_idx]->insert_from(column, 
mapped.row_num);
+                                mcol[j + _right_col_idx]->insert_from(column, 
mapped.row_num);
                             }
                         }
                     } else {
                         for (auto it = mapped.begin(); it.ok(); ++it) {
                             // right semi/anti join should dispose the data in 
hash table
                             // after probe data eof
-                            if (!_join_node->_is_right_semi_anti) {
+                            if constexpr (JoinOpType::value != 
TJoinOp::RIGHT_ANTI_JOIN && JoinOpType::value == TJoinOp::RIGHT_SEMI_JOIN) {

Review comment:
       JoinOpType::value != TJoinOp::RIGHT_SEMI_JOIN

##########
File path: be/src/vec/exec/join/vhash_join_node.cpp
##########
@@ -168,89 +177,136 @@ struct ProcessHashTableProbe {
     // the output block struct is same with mutable block. we can do more opt 
on it and simplify
     // the logic of probe
     // TODO: opt the visited here to reduce the size of hash table
+    template <class JoinOpType>
     Status do_process(HashTableContext& hash_table_ctx, ConstNullMapPtr 
null_map,
                       MutableBlock& mutable_block, Block* output_block) {
         using KeyGetter = typename HashTableContext::State;
         using Mapped = typename HashTableContext::Mapped;
 
         KeyGetter key_getter(_probe_raw_ptrs, _join_node->_probe_key_sz, 
nullptr);
-
+    
         std::vector<uint32_t> items_counts(_probe_rows);
         auto& mcol = mutable_block.mutable_columns();
-
-        int right_col_idx = _join_node->_is_right_semi_anti ? 0 : 
_left_table_data_types.size();
-        int right_col_len = _right_table_data_types.size();
         int current_offset = 0;
 
         for (; _probe_index < _probe_rows;) {
-            // ignore null rows
             if constexpr (ignore_null) {
                 if ((*null_map)[_probe_index]) {
                     items_counts[_probe_index++] = 0;
                     continue;
                 }
             }
-
             int repeat_count = 0;
-            auto find_result =
-                    (*null_map)[_probe_index]
+            if constexpr (JoinOpType::value == TJoinOp::INNER_JOIN) {
+                if (!(*null_map)[_probe_index]) {
+                    auto find_result = 
key_getter.find_key(hash_table_ctx.hash_table, _probe_index, _arena);
+
+                    if (find_result.is_found()) {
+                        auto& mapped = find_result.get_mapped();
+
+                        // TODO: Iterators are currently considered to be a 
heavy operation and have a certain impact on performance.
+                        // We should rethink whether to use this iterator mode 
in the future. Now just opt the one row case
+                        if (mapped.get_row_count() == 1) {
+                            mapped.visited = true;

Review comment:
       no need set visited here




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

[GitHub] [incubator-doris] HappenLee commented on a change in pull request #8051: eliminate branch prediction

Reply via email to