This is an automated email from the ASF dual-hosted git repository.

panxiaolei pushed a commit to branch new_join2
in repository https://gitbox.apache.org/repos/asf/doris.git

commit a7092a7755d20058c2e6bb8ffac588c3fa1d3eb7
Author: HappenLee <happen...@hotmail.com>
AuthorDate: Tue Nov 21 12:04:02 2023 +0800

    fix outer join and other join conjuncts (#27319)
    
    * update some fix on join
    
    * save code
    
    ---------
    
    Co-authored-by: BiteTheDDDDt <pxl...@qq.com>
---
 be/src/vec/common/hash_table/hash_map.h            | 26 +++++++++++++---------
 be/src/vec/exec/join/process_hash_table_probe.h    |  1 +
 .../vec/exec/join/process_hash_table_probe_impl.h  |  2 +-
 3 files changed, 17 insertions(+), 12 deletions(-)

diff --git a/be/src/vec/common/hash_table/hash_map.h 
b/be/src/vec/common/hash_table/hash_map.h
index 2f81fc27978..80ff3481544 100644
--- a/be/src/vec/common/hash_table/hash_map.h
+++ b/be/src/vec/common/hash_table/hash_map.h
@@ -260,7 +260,7 @@ public:
     template <int JoinOpType, bool with_other_conjuncts, bool is_mark_join, 
bool need_judge_null>
     auto find_batch(const Key* __restrict keys, const uint32_t* __restrict 
bucket_nums,
                     int probe_idx, uint32_t build_idx, int probe_rows,
-                    uint32_t* __restrict probe_idxs, uint32_t* __restrict 
build_idxs,
+                    uint32_t* __restrict probe_idxs, bool& probe_visited, 
uint32_t* __restrict build_idxs,
                     doris::vectorized::ColumnFilterHelper* mark_column) {
         if constexpr (is_mark_join) {
             return _find_batch_mark<JoinOpType>(keys, bucket_nums, probe_idx, 
probe_rows,
@@ -277,7 +277,7 @@ public:
                       JoinOpType == doris::TJoinOp::LEFT_OUTER_JOIN ||
                       JoinOpType == doris::TJoinOp::RIGHT_OUTER_JOIN) {
             return _find_batch_inner_outer_join<JoinOpType>(keys, bucket_nums, 
probe_idx, build_idx,
-                                                            probe_rows, 
probe_idxs, build_idxs);
+                                                            probe_rows, 
probe_idxs, probe_visited, build_idxs);
         }
         if constexpr (JoinOpType == doris::TJoinOp::LEFT_ANTI_JOIN ||
                       JoinOpType == doris::TJoinOp::LEFT_SEMI_JOIN ||
@@ -431,7 +431,7 @@ private:
                         build_idxs[matched_cnt++] = build_idx;
                     }
                 } else {
-                    build_idxs[matched_cnt++] = build_idx;
+                    build_idxs[matched_cnt] = build_idx;
                     matched_cnt += keys[probe_idx] == build_keys[build_idx];
                 }
                 build_idx = next[build_idx];
@@ -443,6 +443,7 @@ private:
 
             if constexpr (JoinOpType == doris::TJoinOp::LEFT_OUTER_JOIN ||
                           JoinOpType == doris::TJoinOp::FULL_OUTER_JOIN) {
+                // may over batch_size when emplace 0 into build_idxs
                 if (!build_idx) {
                     probe_idxs[matched_cnt] = probe_idx;
                     build_idxs[matched_cnt] = 0;
@@ -462,9 +463,7 @@ private:
             do_the_probe();
         }
 
-        probe_idx -=
-                (matched_cnt >= batch_size &&
-                 build_idx); // FULL_OUTER_JOIN may over batch_size when 
emplace 0 into build_idxs
+        probe_idx -= (build_idx != 0);
         return std::tuple {probe_idx, build_idx, matched_cnt};
     }
 
@@ -473,6 +472,7 @@ private:
                                       const uint32_t* __restrict bucket_nums, 
int probe_idx,
                                       uint32_t build_idx, int probe_rows,
                                       uint32_t* __restrict probe_idxs,
+                                      bool& probe_visited,
                                       uint32_t* __restrict build_idxs) {
         auto matched_cnt = 0;
         const auto batch_size = max_batch_size;
@@ -496,10 +496,14 @@ private:
             if constexpr (JoinOpType == doris::TJoinOp::LEFT_OUTER_JOIN ||
                           JoinOpType == doris::TJoinOp::FULL_OUTER_JOIN) {
                 // `(!matched_cnt || probe_idxs[matched_cnt - 1] != 
probe_idx)` means not match one build side
-                if (!matched_cnt || probe_idxs[matched_cnt - 1] != probe_idx) {
-                    probe_idxs[matched_cnt] = probe_idx;
-                    build_idxs[matched_cnt] = 0;
-                    matched_cnt++;
+                probe_visited |= (matched_cnt && probe_idxs[matched_cnt - 1] 
== probe_idx);
+                if (!build_idx) {
+                    if (!probe_visited) {
+                        probe_idxs[matched_cnt] = probe_idx;
+                        build_idxs[matched_cnt] = 0;
+                        matched_cnt++;
+                    }
+                    probe_visited = false;
                 }
             }
             probe_idx++;
@@ -514,7 +518,7 @@ private:
             do_the_probe();
         }
 
-        probe_idx -= (matched_cnt == batch_size && build_idx);
+        probe_idx -= (build_idx != 0);
         return std::tuple {probe_idx, build_idx, matched_cnt};
     }
 
diff --git a/be/src/vec/exec/join/process_hash_table_probe.h 
b/be/src/vec/exec/join/process_hash_table_probe.h
index ed7d0c6443b..bfc628914b1 100644
--- a/be/src/vec/exec/join/process_hash_table_probe.h
+++ b/be/src/vec/exec/join/process_hash_table_probe.h
@@ -88,6 +88,7 @@ struct ProcessHashTableProbe {
     std::vector<StringRef> _probe_keys;
 
     std::vector<uint32_t> _probe_indexs;
+    bool _probe_visited = false;
     std::vector<uint32_t> _build_indexs;
     std::vector<int> _build_blocks_locs;
     // only need set the tuple is null in RIGHT_OUTER_JOIN and FULL_OUTER_JOIN
diff --git a/be/src/vec/exec/join/process_hash_table_probe_impl.h 
b/be/src/vec/exec/join/process_hash_table_probe_impl.h
index 8cb5bd8cb8f..4cd79510d4b 100644
--- a/be/src/vec/exec/join/process_hash_table_probe_impl.h
+++ b/be/src/vec/exec/join/process_hash_table_probe_impl.h
@@ -179,7 +179,7 @@ Status ProcessHashTableProbe<JoinOpType, 
Parent>::do_process(HashTableType& hash
               with_other_conjuncts, is_mark_join,
               need_null_map_for_probe &&
                       ignore_null > (hash_table_ctx.keys, 
hash_table_ctx.bucket_nums.data(),
-                                     probe_index, build_index, probe_rows, 
_probe_indexs.data(),
+                                     probe_index, build_index, probe_rows, 
_probe_indexs.data(), _probe_visited,
                                      _build_indexs.data(), mark_column.get());
         probe_index = new_probe_idx;
         build_index = new_build_idx;


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to