This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git

commit f17ac173b4e8052cb130119bdec649169f66ac4e
Author: Pxl <pxl...@qq.com>
AuthorDate: Thu Apr 18 14:26:39 2024 +0800

    [Improvementation](join) empty_block shall be set true when build block 
only one row (#33721)
    
    empty_block shall be set true when build block only one row
---
 be/src/pipeline/exec/hashjoin_build_sink.cpp     | 17 +++++++++--------
 be/src/pipeline/exec/hashjoin_probe_operator.cpp | 17 ++++++-----------
 be/src/vec/core/column_with_type_and_name.cpp    | 12 +++++++-----
 3 files changed, 22 insertions(+), 24 deletions(-)

diff --git a/be/src/pipeline/exec/hashjoin_build_sink.cpp 
b/be/src/pipeline/exec/hashjoin_build_sink.cpp
index f0ff99f0e3d..a780131ad44 100644
--- a/be/src/pipeline/exec/hashjoin_build_sink.cpp
+++ b/be/src/pipeline/exec/hashjoin_build_sink.cpp
@@ -155,21 +155,22 @@ bool HashJoinBuildSinkLocalState::build_unique() const {
 
 void HashJoinBuildSinkLocalState::init_short_circuit_for_probe() {
     auto& p = _parent->cast<HashJoinBuildSinkOperatorX>();
+    bool empty_block =
+            !_shared_state->build_block ||
+            !(_shared_state->build_block->rows() > 1); // build size always 
mock a row into block
     _shared_state->short_circuit_for_probe =
             (_shared_state->_has_null_in_build_side &&
              p._join_op == TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN && 
!p._is_mark_join) ||
-            (!_shared_state->build_block && p._join_op == TJoinOp::INNER_JOIN 
&&
-             !p._is_mark_join) ||
-            (!_shared_state->build_block && p._join_op == 
TJoinOp::LEFT_SEMI_JOIN &&
-             !p._is_mark_join) ||
-            (!_shared_state->build_block && p._join_op == 
TJoinOp::RIGHT_OUTER_JOIN) ||
-            (!_shared_state->build_block && p._join_op == 
TJoinOp::RIGHT_SEMI_JOIN) ||
-            (!_shared_state->build_block && p._join_op == 
TJoinOp::RIGHT_ANTI_JOIN);
+            (empty_block && p._join_op == TJoinOp::INNER_JOIN && 
!p._is_mark_join) ||
+            (empty_block && p._join_op == TJoinOp::LEFT_SEMI_JOIN && 
!p._is_mark_join) ||
+            (empty_block && p._join_op == TJoinOp::RIGHT_OUTER_JOIN) ||
+            (empty_block && p._join_op == TJoinOp::RIGHT_SEMI_JOIN) ||
+            (empty_block && p._join_op == TJoinOp::RIGHT_ANTI_JOIN);
 
     //when build table rows is 0 and not have other_join_conjunct and not 
_is_mark_join and join type is one of 
LEFT_OUTER_JOIN/FULL_OUTER_JOIN/LEFT_ANTI_JOIN
     //we could get the result is probe table + null-column(if need output)
     _shared_state->empty_right_table_need_probe_dispose =
-            (!_shared_state->build_block && !p._have_other_join_conjunct && 
!p._is_mark_join) &&
+            (empty_block && !p._have_other_join_conjunct && !p._is_mark_join) 
&&
             (p._join_op == TJoinOp::LEFT_OUTER_JOIN || p._join_op == 
TJoinOp::FULL_OUTER_JOIN ||
              p._join_op == TJoinOp::LEFT_ANTI_JOIN);
 }
diff --git a/be/src/pipeline/exec/hashjoin_probe_operator.cpp 
b/be/src/pipeline/exec/hashjoin_probe_operator.cpp
index a58ad62211c..2e273f18660 100644
--- a/be/src/pipeline/exec/hashjoin_probe_operator.cpp
+++ b/be/src/pipeline/exec/hashjoin_probe_operator.cpp
@@ -247,7 +247,9 @@ Status HashJoinProbeOperatorX::pull(doris::RuntimeState* 
state, vectorized::Bloc
     }
 
     //TODO: this short circuit maybe could refactor, no need to check at here.
-    if (local_state._shared_state->empty_right_table_need_probe_dispose) {
+    // only support nereids
+    if (local_state._shared_state->empty_right_table_need_probe_dispose &&
+        !Base::_projections.empty()) {
         // when build table rows is 0 and not have other_join_conjunct and 
join type is one of LEFT_OUTER_JOIN/FULL_OUTER_JOIN/LEFT_ANTI_JOIN
         // we could get the result is probe table + null-column(if need output)
         // If we use a short-circuit strategy, should return block directly by 
add additional null data.
@@ -257,12 +259,6 @@ Status HashJoinProbeOperatorX::pull(doris::RuntimeState* 
state, vectorized::Bloc
             return Status::OK();
         }
 
-        vectorized::Block temp_block;
-        //get probe side output column
-        for (int i = 0; i < _left_output_slot_flags.size(); ++i) {
-            temp_block.insert(local_state._probe_block.get_by_position(i));
-        }
-
         //create build side null column, if need output
         for (int i = 0;
              (_join_op != TJoinOp::LEFT_ANTI_JOIN) && i < 
_right_output_slot_flags.size(); ++i) {
@@ -273,8 +269,8 @@ Status HashJoinProbeOperatorX::pull(doris::RuntimeState* 
state, vectorized::Bloc
                     
vectorized::ColumnVector<vectorized::UInt8>::create(block_rows, 1);
             auto nullable_column = 
vectorized::ColumnNullable::create(std::move(column),
                                                                       
std::move(null_map_column));
-            temp_block.insert({std::move(nullable_column), make_nullable(type),
-                               _right_table_column_names[i]});
+            local_state._probe_block.insert({std::move(nullable_column), 
make_nullable(type),
+                                             _right_table_column_names[i]});
         }
         if (_is_outer_join) {
             reinterpret_cast<vectorized::ColumnUInt8*>(
@@ -290,8 +286,7 @@ Status HashJoinProbeOperatorX::pull(doris::RuntimeState* 
state, vectorized::Bloc
         /// No need to check the block size in `_filter_data_and_build_output` 
because here dose not
         /// increase the output rows count(just same as `_probe_block`'s rows 
count).
         RETURN_IF_ERROR(local_state.filter_data_and_build_output(state, 
output_block, eos,
-                                                                 &temp_block, 
false));
-        temp_block.clear();
+                                                                 
&local_state._probe_block, false));
         
local_state._probe_block.clear_column_data(_child_x->row_desc().num_materialized_slots());
         return Status::OK();
     }
diff --git a/be/src/vec/core/column_with_type_and_name.cpp 
b/be/src/vec/core/column_with_type_and_name.cpp
index cd0f7194004..e93946804ff 100644
--- a/be/src/vec/core/column_with_type_and_name.cpp
+++ b/be/src/vec/core/column_with_type_and_name.cpp
@@ -62,15 +62,17 @@ void ColumnWithTypeAndName::dump_structure(std::ostream& 
out) const {
         out << name;
     }
 
-    if (type)
+    if (type) {
         out << " " << type->get_name();
-    else
+    } else {
         out << " nullptr";
+    }
 
-    if (column)
-        out << ' ' << column->dump_structure();
-    else
+    if (column) {
+        out << ' ' << column->dump_structure() << "(use_count=" << 
column->use_count() << ')';
+    } else {
         out << " nullptr";
+    }
 }
 
 String ColumnWithTypeAndName::dump_structure() const {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to