zuochunwei commented on a change in pull request #7972:
URL: https://github.com/apache/incubator-doris/pull/7972#discussion_r805656136



##########
File path: be/src/vec/exec/join/vhash_join_node.cpp
##########
@@ -177,89 +180,113 @@ struct ProcessHashTableProbe {
 
         std::vector<uint32_t> items_counts(_probe_rows);
         auto& mcol = mutable_block.mutable_columns();
-
-        int right_col_idx = _join_node->_is_right_semi_anti ? 0 : 
_left_table_data_types.size();
-        int right_col_len = _right_table_data_types.size();
         int current_offset = 0;
+        std::vector<uint32_t> _build_index;
+        _build_index.reserve(1.2 * _batch_size);
 
         for (; _probe_index < _probe_rows;) {
-            // ignore null rows
             if constexpr (ignore_null) {
                 if ((*null_map)[_probe_index]) {
                     items_counts[_probe_index++] = 0;
                     continue;
                 }
             }
-
             int repeat_count = 0;
-            auto find_result =
-                    (*null_map)[_probe_index]
+            if constexpr (JoinOpType::value == TJoinOp::INNER_JOIN) {
+                if (!(*null_map)[_probe_index]) {
+                    auto find_result = 
key_getter.find_key(hash_table_ctx.hash_table, _probe_index, _arena);
+
+                    if (find_result.is_found()) {
+                        auto& mapped = find_result.get_mapped();
+
+                        if (mapped.get_row_count() == 1) {
+                            ++repeat_count;
+                            _build_index.emplace_back(mapped.row_num);
+                        } else {
+                            if (_probe_index + 2 < _probe_rows)
+                                key_getter.prefetch(hash_table_ctx.hash_table, 
_probe_index + 2, _arena);
+                            for (auto it = mapped.begin(); it.ok(); ++it) {
+                                ++repeat_count;

Review comment:
       increase repeat_count value out of the loop

##########
File path: be/src/vec/exec/join/vhash_join_node.cpp
##########
@@ -217,49 +249,44 @@ struct ProcessHashTableProbe {
                         // after probe data eof
                         if (!_join_node->_is_right_semi_anti) {
                             ++repeat_count;
-                            for (size_t j = 0; j < right_col_len; ++j) {
-                                auto& column = 
*mapped.block->get_by_position(j).column;
-                                mcol[j + right_col_idx]->insert_from(column, 
mapped.row_num);
-                            }
+                            _build_index.emplace_back(mapped.row_num);
                         }
                     } else {
                         for (auto it = mapped.begin(); it.ok(); ++it) {
                             // right semi/anti join should dispose the data in 
hash table
                             // after probe data eof
                             if (!_join_node->_is_right_semi_anti) {
                                 ++repeat_count;
-                                for (size_t j = 0; j < right_col_len; ++j) {
-                                    auto& column = 
*it->block->get_by_position(j).column;
-                                    // TODO: interface insert from cause 
serious performance problems
-                                    //  when column is nullable. Try to make 
more effective way
-                                    mcol[j + 
right_col_idx]->insert_from(column, it->row_num);
-                                }
+                                _build_index.emplace_back(it->row_num);
                             }
                             it->visited = true;
                         }
                     }
-                }
-            } else if (_join_node->_match_all_probe ||
-                       _join_node->_join_op == TJoinOp::LEFT_ANTI_JOIN) {
-                ++repeat_count;
-                // only full outer / left outer need insert the data of right 
table
-                if (_join_node->_match_all_probe) {
-                    for (size_t j = 0; j < right_col_len; ++j) {
-                        DCHECK(mcol[j + right_col_idx]->is_nullable());
-                        mcol[j + right_col_idx]->insert_data(nullptr, 0);
+                } else if (_join_node->_match_all_probe) {
+                    // only full outer / left outer need insert the data of 
right table
+                    ++repeat_count;
+                    for (size_t j = 0; j < _right_col_len; ++j) {
+                        DCHECK(mcol[j + _right_col_idx]->is_nullable());
+                        mcol[j + _right_col_idx]->insert_data(nullptr, 0);
                     }
                 }
             }
-
             items_counts[_probe_index++] = repeat_count;
             current_offset += repeat_count;
-
             if (current_offset >= _batch_size) {
                 break;
             }
         }
-        
-        for (int i = 0; i < right_col_idx; ++i) {
+
+        // insert all match build rows
+        for (int i = 0; i < _right_col_len; i++) {
+            auto &column = *_build_block.get_by_position(i).column;
+            for (int j = 0; j < _build_index.size(); j++) {

Review comment:
       avoid call the virtual function insert_from in loop




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to