This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 80dd027ce2e28a6fb5c6c1b9eaaf7c7ff5f2a068
Author: Jerry Hu <mrh...@gmail.com>
AuthorDate: Mon May 13 23:46:27 2024 +0800

    [opt](join) For left semi/anti join without mark join conjunct and without 
other conjucnts, stop probing after matching one row (#34703)
---
 be/src/vec/common/hash_table/join_hash_table.h     | 31 +++++++++++++++-------
 .../vec/exec/join/process_hash_table_probe_impl.h  |  3 ++-
 2 files changed, 24 insertions(+), 10 deletions(-)

diff --git a/be/src/vec/common/hash_table/join_hash_table.h 
b/be/src/vec/common/hash_table/join_hash_table.h
index 10ca6c9b2dd..a869ad419ad 100644
--- a/be/src/vec/common/hash_table/join_hash_table.h
+++ b/be/src/vec/common/hash_table/join_hash_table.h
@@ -89,7 +89,7 @@ public:
     auto find_batch(const Key* __restrict keys, const uint32_t* __restrict 
build_idx_map,
                     int probe_idx, uint32_t build_idx, int probe_rows,
                     uint32_t* __restrict probe_idxs, bool& probe_visited,
-                    uint32_t* __restrict build_idxs) {
+                    uint32_t* __restrict build_idxs, bool 
has_mark_join_conjunct = false) {
         if constexpr (JoinOpType == TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN ||
                       JoinOpType == TJoinOp::NULL_AWARE_LEFT_SEMI_JOIN) {
             if (_empty_build_side) {
@@ -100,12 +100,25 @@ public:
 
         if constexpr (with_other_conjuncts ||
                       (is_mark_join && JoinOpType != 
TJoinOp::RIGHT_SEMI_JOIN)) {
-            constexpr bool null_aware_without_other_conjuncts =
-                    (JoinOpType == TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN ||
-                     JoinOpType == TJoinOp::NULL_AWARE_LEFT_SEMI_JOIN) &&
-                    !with_other_conjuncts;
-            return _find_batch_conjunct<JoinOpType, need_judge_null,
-                                        null_aware_without_other_conjuncts>(
+            if constexpr (!with_other_conjuncts) {
+                constexpr bool is_null_aware_join =
+                        JoinOpType == TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN ||
+                        JoinOpType == TJoinOp::NULL_AWARE_LEFT_SEMI_JOIN;
+                constexpr bool is_left_half_join = JoinOpType == 
TJoinOp::LEFT_SEMI_JOIN ||
+                                                   JoinOpType == 
TJoinOp::LEFT_ANTI_JOIN;
+
+                /// For null aware join or left half(semi/anti) join without 
other conjuncts and without
+                /// mark join conjunct.
+                /// If one row on probe side has one match in build side, we 
should stop searching the
+                /// hash table for this row.
+                if (is_null_aware_join || (is_left_half_join && 
!has_mark_join_conjunct)) {
+                    return _find_batch_conjunct<JoinOpType, need_judge_null, 
true>(
+                            keys, build_idx_map, probe_idx, build_idx, 
probe_rows, probe_idxs,
+                            build_idxs);
+                }
+            }
+
+            return _find_batch_conjunct<JoinOpType, need_judge_null, false>(
                     keys, build_idx_map, probe_idx, build_idx, probe_rows, 
probe_idxs, build_idxs);
         }
 
@@ -314,7 +327,7 @@ private:
         return std::tuple {probe_idx, 0U, matched_cnt};
     }
 
-    template <int JoinOpType, bool need_judge_null, bool 
null_aware_without_other_conjuncts>
+    template <int JoinOpType, bool need_judge_null, bool 
only_need_to_match_one>
     auto _find_batch_conjunct(const Key* __restrict keys, const uint32_t* 
__restrict build_idx_map,
                               int probe_idx, uint32_t build_idx, int 
probe_rows,
                               uint32_t* __restrict probe_idxs, uint32_t* 
__restrict build_idxs) {
@@ -345,7 +358,7 @@ private:
                     probe_idxs[matched_cnt] = probe_idx;
                     matched_cnt++;
 
-                    if constexpr (null_aware_without_other_conjuncts) {
+                    if constexpr (only_need_to_match_one) {
                         build_idx = 0;
                         break;
                     }
diff --git a/be/src/vec/exec/join/process_hash_table_probe_impl.h 
b/be/src/vec/exec/join/process_hash_table_probe_impl.h
index b4212405aed..9a15df637aa 100644
--- a/be/src/vec/exec/join/process_hash_table_probe_impl.h
+++ b/be/src/vec/exec/join/process_hash_table_probe_impl.h
@@ -213,6 +213,7 @@ Status ProcessHashTableProbe<JoinOpType, 
Parent>::do_process(HashTableType& hash
     }
 
     auto& mcol = mutable_block.mutable_columns();
+    const bool has_mark_join_conjunct = !_parent->_mark_join_conjuncts.empty();
 
     int current_offset = 0;
     if constexpr ((JoinOpType == doris::TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN ||
@@ -258,7 +259,7 @@ Status ProcessHashTableProbe<JoinOpType, 
Parent>::do_process(HashTableType& hash
               need_null_map_for_probe &&
                       ignore_null > (hash_table_ctx.keys, 
hash_table_ctx.bucket_nums.data(),
                                      probe_index, build_index, probe_rows, 
_probe_indexs.data(),
-                                     _probe_visited, _build_indexs.data());
+                                     _probe_visited, _build_indexs.data(), 
has_mark_join_conjunct);
         probe_index = new_probe_idx;
         build_index = new_build_idx;
         current_offset = new_current_offset;


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to