HappenLee commented on a change in pull request #8051:
URL: https://github.com/apache/incubator-doris/pull/8051#discussion_r807512288



##########
File path: be/src/vec/exec/join/vhash_join_node.cpp
##########
@@ -481,17 +530,17 @@ struct ProcessHashTableProbe {
                 if ((it->visited && _join_node->_join_op == 
TJoinOp::RIGHT_SEMI_JOIN) ||
                     (!it->visited && _join_node->_join_op != 
TJoinOp::RIGHT_SEMI_JOIN)) {
                     block_size++;
-                    for (size_t j = 0; j < right_col_len; ++j) {
+                    for (size_t j = 0; j < _right_col_len; ++j) {
                         auto& column = *it->block->get_by_position(j).column;
-                        mcol[j + right_col_idx]->insert_from(column, 
it->row_num);
+                        mcol[j + _right_col_idx]->insert_from(column, 
it->row_num);
                     }
                 }
             }
         }
 
         // right outer join / full join need insert data of left table
         if (_join_node->_is_outer_join) {
-            for (int i = 0; i < right_col_idx; ++i) {
+            for (int i = 0; i < _right_col_idx; ++i) {
                 for (int j = 0; j < block_size; ++j) {
                     mcol[i]->insert_data(nullptr, 0);

Review comment:
       cast to column nullable to avoid one time virtual function call

##########
File path: be/src/vec/exec/join/vhash_join_node.cpp
##########
@@ -177,80 +187,118 @@ struct ProcessHashTableProbe {
 
         std::vector<uint32_t> items_counts(_probe_rows);
         auto& mcol = mutable_block.mutable_columns();
-
-        int right_col_idx = _join_node->_is_right_semi_anti ? 0 : 
_left_table_data_types.size();
-        int right_col_len = _right_table_data_types.size();
         int current_offset = 0;
 
         for (; _probe_index < _probe_rows;) {
-            // ignore null rows
             if constexpr (ignore_null) {
                 if ((*null_map)[_probe_index]) {
                     items_counts[_probe_index++] = 0;
                     continue;
                 }
             }
-
             int repeat_count = 0;
-            auto find_result =
-                    (*null_map)[_probe_index]
+            if constexpr (JoinOpType::value == TJoinOp::INNER_JOIN) {
+                if (!(*null_map)[_probe_index]) {
+                    auto find_result = 
key_getter.find_key(hash_table_ctx.hash_table, _probe_index, _arena);
+
+                    if (find_result.is_found()) {
+                        auto& mapped = find_result.get_mapped();
+
+                        // TODO: Iterators are currently considered to be a 
heavy operation and have a certain impact on performance.
+                        // We should rethink whether to use this iterator mode 
in the future. Now just opt the one row case
+                        if (mapped.get_row_count() == 1) {
+                            ++repeat_count;
+                            for (size_t j = 0; j < _right_col_len; ++j) {
+                                auto& column = 
*mapped.block->get_by_position(j).column;
+                                mcol[j + _right_col_idx]->insert_from(column, 
mapped.row_num);
+                            }
+                        } else {
+                            // prefetch is more useful while matching to 
multiple rows
+                            if (_probe_index + 2 < _probe_rows)
+                                key_getter.prefetch(hash_table_ctx.hash_table, 
_probe_index + 2, _arena);
+                            for (auto it = mapped.begin(); it.ok(); ++it) {
+                                ++repeat_count;
+                                for (size_t j = 0; j < _right_col_len; ++j) {
+                                    auto& column = 
*it->block->get_by_position(j).column;
+                                    // TODO: interface insert from cause 
serious performance problems
+                                    //  when column is nullable. Try to make 
more effective way
+                                    mcol[j + 
_right_col_idx]->insert_from(column, it->row_num);
+                                }
+                            }
+                        }
+                    }
+                }
+            } else if constexpr (JoinOpType::value == TJoinOp::LEFT_ANTI_JOIN) 
{
+                if ((*null_map)[_probe_index]) { 
+                    ++repeat_count;
+                } else {
+                    auto find_result = 
key_getter.find_key(hash_table_ctx.hash_table, _probe_index, _arena);
+                    if (find_result.is_found()) { 
+                        //do nothing
+                    } else {
+                        ++repeat_count;
+                    }
+                }
+            } else if constexpr (JoinOpType::value == TJoinOp::LEFT_SEMI_JOIN) 
{
+                if (!(*null_map)[_probe_index]) {
+                    auto find_result = 
key_getter.find_key(hash_table_ctx.hash_table, _probe_index, _arena);
+                    if (find_result.is_found()) { 
+                        ++repeat_count;
+                    }
+                }
+            } else {
+                auto find_result = (*null_map)[_probe_index]
                             ? 
decltype(key_getter.find_key(hash_table_ctx.hash_table, _probe_index,
                                                            _arena)) {nullptr, 
false}
                             : key_getter.find_key(hash_table_ctx.hash_table, 
_probe_index, _arena);
 
-            if (_probe_index + 2 < _probe_rows)
-                key_getter.prefetch(hash_table_ctx.hash_table, _probe_index + 
2, _arena);
+                if (_probe_index + 2 < _probe_rows)
+                    key_getter.prefetch(hash_table_ctx.hash_table, 
_probe_index + 2, _arena);
+
+                if (find_result.is_found()) {
 
-            if (find_result.is_found()) {
-                // left semi join only need one match, do not need insert the 
data of right table
-                if (_join_node->_join_op == TJoinOp::LEFT_SEMI_JOIN) {
-                    ++repeat_count;
-                } else if (_join_node->_join_op == TJoinOp::LEFT_ANTI_JOIN) {
-                    // do nothing
-                } else {
                     auto& mapped = find_result.get_mapped();
                     // TODO: Iterators are currently considered to be a heavy 
operation and have a certain impact on performance.
                     // We should rethink whether to use this iterator mode in 
the future. Now just opt the one row case
                     if (mapped.get_row_count() == 1) {
                         mapped.visited = true;
                         // right semi/anti join should dispose the data in 
hash table
                         // after probe data eof
-                        if (!_join_node->_is_right_semi_anti) {
+                        if constexpr (JoinOpType::value != 
TJoinOp::RIGHT_ANTI_JOIN && 
+                                      JoinOpType::value != 
TJoinOp::RIGHT_SEMI_JOIN) {
                             ++repeat_count;
-                            for (size_t j = 0; j < right_col_len; ++j) {
+                            for (size_t j = 0; j < _right_col_len; ++j) {
                                 auto& column = 
*mapped.block->get_by_position(j).column;
-                                mcol[j + right_col_idx]->insert_from(column, 
mapped.row_num);
+                                mcol[j + _right_col_idx]->insert_from(column, 
mapped.row_num);
                             }
                         }
                     } else {
                         for (auto it = mapped.begin(); it.ok(); ++it) {
                             // right semi/anti join should dispose the data in 
hash table
                             // after probe data eof
-                            if (!_join_node->_is_right_semi_anti) {
+                            if constexpr (JoinOpType::value != 
TJoinOp::RIGHT_ANTI_JOIN && 
+                                          JoinOpType::value != 
TJoinOp::RIGHT_SEMI_JOIN) {
                                 ++repeat_count;
-                                for (size_t j = 0; j < right_col_len; ++j) {
+                                for (size_t j = 0; j < _right_col_len; ++j) {
                                     auto& column = 
*it->block->get_by_position(j).column;
                                     // TODO: interface insert from cause 
serious performance problems
                                     //  when column is nullable. Try to make 
more effective way
-                                    mcol[j + 
right_col_idx]->insert_from(column, it->row_num);
+                                    mcol[j + 
_right_col_idx]->insert_from(column, it->row_num);
                                 }
                             }
                             it->visited = true;
                         }
                     }
-                }
-            } else if (_join_node->_match_all_probe ||
-                       _join_node->_join_op == TJoinOp::LEFT_ANTI_JOIN) {
-                ++repeat_count;
-                // only full outer / left outer need insert the data of right 
table
-                if (_join_node->_match_all_probe) {
-                    for (size_t j = 0; j < right_col_len; ++j) {
-                        DCHECK(mcol[j + right_col_idx]->is_nullable());
-                        mcol[j + right_col_idx]->insert_data(nullptr, 0);
+                } else if constexpr (JoinOpType::value == 
TJoinOp::LEFT_OUTER_JOIN || 
+                                     JoinOpType::value == 
TJoinOp::FULL_OUTER_JOIN) {
+                    // only full outer / left outer need insert the data of 
right table
+                    ++repeat_count;
+                    for (size_t j = 0; j < _right_col_len; ++j) {
+                        DCHECK(mcol[j + _right_col_idx]->is_nullable());
+                        mcol[j + _right_col_idx]->insert_data(nullptr, 0);

Review comment:
       cast to column nullable to avoid one time virtual function call

##########
File path: be/src/vec/exec/join/vhash_join_node.cpp
##########
@@ -316,30 +362,32 @@ struct ProcessHashTableProbe {
 
                 for (auto it = mapped.begin(); it.ok(); ++it) {
                     ++current_offset;
-                    for (size_t j = 0; j < right_col_len; ++j) {
+                    for (size_t j = 0; j < _right_col_len; ++j) {
                         auto& column = *it->block->get_by_position(j).column;
-                        mcol[j + right_col_idx]->insert_from(column, 
it->row_num);
+                        mcol[j + _right_col_idx]->insert_from(column, 
it->row_num);
                     }
                     visited_map.emplace_back(&it->visited);
                 }
                 same_to_prev.emplace_back(false);
                 for (int i = 0; i < current_offset - origin_offset - 1; ++i) {
                     same_to_prev.emplace_back(true);
                 }
-            } else if (_join_node->_match_all_probe ||
-                       _join_node->_join_op == TJoinOp::LEFT_ANTI_JOIN) {
+            } else if constexpr (JoinOpType::value == TJoinOp::LEFT_OUTER_JOIN 
|| 
+                                 JoinOpType::value == TJoinOp::FULL_OUTER_JOIN 
||
+                                 JoinOpType::value == TJoinOp::LEFT_ANTI_JOIN) 
{
                 ++current_offset;
                 same_to_prev.emplace_back(false);
                 visited_map.emplace_back(nullptr);
                 // only full outer / left outer need insert the data of right 
table
-                if (_join_node->_match_all_probe) {
-                    for (size_t j = 0; j < right_col_len; ++j) {
-                        DCHECK(mcol[j + right_col_idx]->is_nullable());
-                        mcol[j + right_col_idx]->insert_data(nullptr, 0);
+                if constexpr (JoinOpType::value == TJoinOp::LEFT_OUTER_JOIN || 
+                              JoinOpType::value == TJoinOp::FULL_OUTER_JOIN) {
+                    for (size_t j = 0; j < _right_col_len; ++j) {
+                        DCHECK(mcol[j + _right_col_idx]->is_nullable());
+                        mcol[j + _right_col_idx]->insert_data(nullptr, 0);

Review comment:
       cast to column nullable to avoid one time virtual function call




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to