This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.1 by this push:
     new 51b39d09921 [fix](join)Consider mark join when computing 
right_col_idx(#50720) (#50727)
51b39d09921 is described below

commit 51b39d099219a3354c4571b7dce9b23125572820
Author: Jerry Hu <[email protected]>
AuthorDate: Mon May 19 14:42:15 2025 +0800

    [fix](join)Consider mark join when computing right_col_idx(#50720) (#50727)
---
 be/src/pipeline/exec/hashjoin_probe_operator.cpp   |  31 ++-
 be/src/pipeline/exec/hashjoin_probe_operator.h     |   6 +
 be/src/vec/common/hash_table/join_hash_table.h     |  44 ++---
 be/src/vec/exec/join/process_hash_table_probe.h    |  10 +-
 .../vec/exec/join/process_hash_table_probe_impl.h  | 138 +++++++++----
 be/src/vec/exec/join/vhash_join_node.cpp           |   5 +
 be/src/vec/exec/join/vhash_join_node.h             |   3 +
 .../data/query_p0/join/mark_join/mark_join.out     | Bin 239 -> 1007 bytes
 .../query_p0/join/mark_join/mark_join.groovy       | 214 +++++++++++++++++++++
 9 files changed, 382 insertions(+), 69 deletions(-)

diff --git a/be/src/pipeline/exec/hashjoin_probe_operator.cpp 
b/be/src/pipeline/exec/hashjoin_probe_operator.cpp
index a98c7215771..85280c8295c 100644
--- a/be/src/pipeline/exec/hashjoin_probe_operator.cpp
+++ b/be/src/pipeline/exec/hashjoin_probe_operator.cpp
@@ -78,6 +78,7 @@ Status HashJoinProbeLocalState::open(RuntimeState* state) {
 
     _process_hashtable_ctx_variants = std::make_unique<HashTableCtxVariants>();
     auto& p = _parent->cast<HashJoinProbeOperatorX>();
+    _right_col_idx = p._right_col_idx;
     std::visit(
             [&](auto&& join_op_variants, auto have_other_join_conjunct) {
                 using JoinOpType = std::decay_t<decltype(join_op_variants)>;
@@ -536,7 +537,7 @@ Status HashJoinProbeOperatorX::init(const TPlanNode& tnode, 
RuntimeState* state)
     const bool probe_dispose_null =
             _match_all_probe || _build_unique || _join_op == 
TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN ||
             _join_op == TJoinOp::NULL_AWARE_LEFT_SEMI_JOIN || _join_op == 
TJoinOp::LEFT_ANTI_JOIN ||
-            _join_op == TJoinOp::LEFT_SEMI_JOIN;
+            _join_op == TJoinOp::LEFT_SEMI_JOIN || _is_mark_join;
     const std::vector<TEqJoinCondition>& eq_join_conjuncts = 
tnode.hash_join_node.eq_join_conjuncts;
     std::vector<bool> probe_not_ignore_null(eq_join_conjuncts.size());
     size_t conjuncts_index = 0;
@@ -668,14 +669,34 @@ Status HashJoinProbeOperatorX::prepare(RuntimeState* 
state) {
         }
     }
 
-    const int right_col_idx =
-            (_is_right_semi_anti && !_have_other_join_conjunct) ? 0 : 
_left_table_data_types.size();
+    _right_col_idx = (_is_right_semi_anti && !_have_other_join_conjunct &&
+                      (!_is_mark_join || _mark_join_conjuncts.empty()))
+                             ? 0
+                             : _left_table_data_types.size();
     size_t idx = 0;
     for (const auto* slot : slots_to_check) {
         auto data_type = slot->get_data_type_ptr();
-        const auto slot_on_left = idx < right_col_idx;
+        const auto slot_on_left = idx < _right_col_idx;
+
+        if (slot_on_left) {
+            if (idx >= _left_table_data_types.size()) {
+                return Status::InternalError(
+                        "Join node(id={}, OP={}) intermediate slot({}, #{})'s 
on left table "
+                        "idx out bound of _left_table_data_types: {} vs {}",
+                        _node_id, _join_op, slot->col_name(), slot->id(), idx,
+                        _left_table_data_types.size());
+            }
+        } else if (idx - _right_col_idx >= _right_table_data_types.size()) {
+            return Status::InternalError(
+                    "Join node(id={}, OP={}) intermediate slot({}, #{})'s on 
right table "
+                    "idx out bound of _right_table_data_types: {} vs {}(idx = 
{}, _right_col_idx = "
+                    "{})",
+                    _node_id, _join_op, slot->col_name(), slot->id(), idx - 
_right_col_idx,
+                    _right_table_data_types.size(), idx, _right_col_idx);
+        }
+
         auto target_data_type = slot_on_left ? _left_table_data_types[idx]
-                                             : _right_table_data_types[idx - 
right_col_idx];
+                                             : _right_table_data_types[idx - 
_right_col_idx];
         ++idx;
         if (data_type->equals(*target_data_type)) {
             continue;
diff --git a/be/src/pipeline/exec/hashjoin_probe_operator.h 
b/be/src/pipeline/exec/hashjoin_probe_operator.h
index 3c75a8d10de..7b73b3bf321 100644
--- a/be/src/pipeline/exec/hashjoin_probe_operator.h
+++ b/be/src/pipeline/exec/hashjoin_probe_operator.h
@@ -138,6 +138,9 @@ private:
     std::unique_ptr<HashTableCtxVariants> _process_hashtable_ctx_variants =
             std::make_unique<HashTableCtxVariants>();
 
+    // Index of column(slot) from right table in the `_intermediate_row_desc`.
+    size_t _right_col_idx;
+
     RuntimeProfile::Counter* _probe_expr_call_timer = nullptr;
     RuntimeProfile::Counter* _probe_next_timer = nullptr;
     RuntimeProfile::Counter* _probe_side_output_timer = nullptr;
@@ -213,6 +216,9 @@ private:
     std::vector<bool> _right_output_slot_flags;
     std::vector<std::string> _right_table_column_names;
     const std::vector<TExpr> _partition_exprs;
+
+    // Index of column(slot) from right table in the `_intermediate_row_desc`.
+    size_t _right_col_idx;
 };
 
 } // namespace pipeline
diff --git a/be/src/vec/common/hash_table/join_hash_table.h 
b/be/src/vec/common/hash_table/join_hash_table.h
index 317987541cd..88695ef9b43 100644
--- a/be/src/vec/common/hash_table/join_hash_table.h
+++ b/be/src/vec/common/hash_table/join_hash_table.h
@@ -101,24 +101,23 @@ public:
             }
         }
 
-        if constexpr (with_other_conjuncts ||
-                      (is_mark_join && JoinOpType != 
TJoinOp::RIGHT_SEMI_JOIN)) {
-            if constexpr (!with_other_conjuncts) {
-                constexpr bool is_null_aware_join =
-                        JoinOpType == TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN ||
-                        JoinOpType == TJoinOp::NULL_AWARE_LEFT_SEMI_JOIN;
-                constexpr bool is_left_half_join = JoinOpType == 
TJoinOp::LEFT_SEMI_JOIN ||
-                                                   JoinOpType == 
TJoinOp::LEFT_ANTI_JOIN;
-
-                /// For null aware join or left half(semi/anti) join without 
other conjuncts and without
-                /// mark join conjunct.
-                /// If one row on probe side has one match in build side, we 
should stop searching the
-                /// hash table for this row.
-                if (is_null_aware_join || (is_left_half_join && 
!has_mark_join_conjunct)) {
-                    return _find_batch_conjunct<JoinOpType, need_judge_null, 
true>(
-                            keys, build_idx_map, probe_idx, build_idx, 
probe_rows, probe_idxs,
-                            build_idxs);
-                }
+        if constexpr (with_other_conjuncts) {
+            return _find_batch_conjunct<JoinOpType, need_judge_null, false>(
+                    keys, build_idx_map, probe_idx, build_idx, probe_rows, 
probe_idxs, build_idxs);
+        } else if constexpr (is_mark_join) {
+            constexpr bool is_null_aware_join = JoinOpType == 
TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN ||
+                                                JoinOpType == 
TJoinOp::NULL_AWARE_LEFT_SEMI_JOIN;
+            constexpr bool is_left_half_join =
+                    JoinOpType == TJoinOp::LEFT_SEMI_JOIN || JoinOpType == 
TJoinOp::LEFT_ANTI_JOIN;
+
+            /// For null aware join or left half(semi/anti) join without other 
conjuncts and without
+            /// mark join conjunct.
+            /// If one row on probe side has one match in build side, we 
should stop searching the
+            /// hash table for this row.
+            if (is_null_aware_join || (is_left_half_join && 
!has_mark_join_conjunct)) {
+                return _find_batch_conjunct<JoinOpType, need_judge_null, true>(
+                        keys, build_idx_map, probe_idx, build_idx, probe_rows, 
probe_idxs,
+                        build_idxs);
             }
 
             return _find_batch_conjunct<JoinOpType, need_judge_null, false>(
@@ -339,14 +338,7 @@ private:
 
         auto do_the_probe = [&]() {
             while (build_idx && matched_cnt < batch_size) {
-                if constexpr (JoinOpType == TJoinOp::RIGHT_ANTI_JOIN ||
-                              JoinOpType == TJoinOp::RIGHT_SEMI_JOIN) {
-                    if (!visited[build_idx] && keys[probe_idx] == 
build_keys[build_idx]) {
-                        probe_idxs[matched_cnt] = probe_idx;
-                        build_idxs[matched_cnt] = build_idx;
-                        matched_cnt++;
-                    }
-                } else if constexpr (need_judge_null) {
+                if constexpr (need_judge_null) {
                     if (build_idx == bucket_size) {
                         build_idxs[matched_cnt] = build_idx;
                         probe_idxs[matched_cnt] = probe_idx;
diff --git a/be/src/vec/exec/join/process_hash_table_probe.h 
b/be/src/vec/exec/join/process_hash_table_probe.h
index ee46197617a..0228310390c 100644
--- a/be/src/vec/exec/join/process_hash_table_probe.h
+++ b/be/src/vec/exec/join/process_hash_table_probe.h
@@ -23,6 +23,7 @@
 #include "vec/columns/column.h"
 #include "vec/columns/columns_number.h"
 #include "vec/common/arena.h"
+#include "vec/common/custom_allocator.h"
 
 namespace doris {
 namespace vectorized {
@@ -132,8 +133,15 @@ struct ProcessHashTableProbe {
     RuntimeProfile::Counter* _probe_side_output_timer = nullptr;
     RuntimeProfile::Counter* _probe_process_hashtable_timer = nullptr;
 
-    int _right_col_idx;
+    // See `HashJoinProbeOperatorX::_right_col_idx`
+    const int _right_col_idx;
+
     int _right_col_len;
+
+    // For right semi with mark join conjunct, we need to store the mark join 
flags
+    // in the hash table.
+    // -1 means null, 0 means false, 1 means true
+    DorisVector<int8_t> mark_join_flags;
 };
 
 } // namespace vectorized
diff --git a/be/src/vec/exec/join/process_hash_table_probe_impl.h 
b/be/src/vec/exec/join/process_hash_table_probe_impl.h
index 9a15df637aa..519239ff172 100644
--- a/be/src/vec/exec/join/process_hash_table_probe_impl.h
+++ b/be/src/vec/exec/join/process_hash_table_probe_impl.h
@@ -57,9 +57,7 @@ ProcessHashTableProbe<JoinOpType, 
Parent>::ProcessHashTableProbe(Parent* parent,
           _build_side_output_timer(parent->_build_side_output_timer),
           _probe_side_output_timer(parent->_probe_side_output_timer),
           
_probe_process_hashtable_timer(parent->_probe_process_hashtable_timer),
-          _right_col_idx((_is_right_semi_anti && !_have_other_join_conjunct)
-                                 ? 0
-                                 : _parent->left_table_data_types().size()),
+          _right_col_idx(parent->_right_col_idx),
           _right_col_len(_parent->right_table_data_types().size()) {}
 
 template <int JoinOpType, typename Parent>
@@ -67,11 +65,10 @@ void ProcessHashTableProbe<JoinOpType, 
Parent>::build_side_output_column(
         MutableColumns& mcol, const std::vector<bool>& output_slot_flags, int 
size,
         bool have_other_join_conjunct, bool is_mark_join) {
     SCOPED_TIMER(_build_side_output_timer);
-    constexpr auto is_semi_anti_join = JoinOpType == TJoinOp::RIGHT_ANTI_JOIN 
||
-                                       JoinOpType == TJoinOp::RIGHT_SEMI_JOIN 
||
-                                       JoinOpType == TJoinOp::LEFT_ANTI_JOIN ||
+    constexpr auto is_semi_anti_join = JoinOpType == TJoinOp::LEFT_ANTI_JOIN ||
+                                       JoinOpType == TJoinOp::LEFT_SEMI_JOIN ||
                                        JoinOpType == 
TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN ||
-                                       JoinOpType == TJoinOp::LEFT_SEMI_JOIN;
+                                       JoinOpType == 
TJoinOp::NULL_AWARE_LEFT_SEMI_JOIN;
 
     constexpr auto probe_all =
             JoinOpType == TJoinOp::LEFT_OUTER_JOIN || JoinOpType == 
TJoinOp::FULL_OUTER_JOIN;
@@ -209,7 +206,7 @@ Status ProcessHashTableProbe<JoinOpType, 
Parent>::do_process(HashTableType& hash
                         (JoinOpType == doris::TJoinOp::LEFT_ANTI_JOIN ||
                          JoinOpType == doris::TJoinOp::LEFT_SEMI_JOIN ||
                          JoinOpType == 
doris::TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN ||
-                         (is_mark_join && JoinOpType != 
doris::TJoinOp::RIGHT_SEMI_JOIN)));
+                         (is_mark_join)));
     }
 
     auto& mcol = mutable_block.mutable_columns();
@@ -268,8 +265,8 @@ Status ProcessHashTableProbe<JoinOpType, 
Parent>::do_process(HashTableType& hash
     build_side_output_column(mcol, *_right_output_slot_flags, current_offset, 
with_other_conjuncts,
                              is_mark_join);
 
-    if constexpr (with_other_conjuncts || (JoinOpType != 
TJoinOp::RIGHT_SEMI_JOIN &&
-                                           JoinOpType != 
TJoinOp::RIGHT_ANTI_JOIN)) {
+    if (with_other_conjuncts || !_parent->_mark_join_conjuncts.empty() ||
+        (JoinOpType != TJoinOp::RIGHT_SEMI_JOIN && JoinOpType != 
TJoinOp::RIGHT_ANTI_JOIN)) {
         auto check_all_match_one = [](const std::vector<uint32_t>& vecs, 
uint32_t probe_idx,
                                       int size) {
             if (!size || vecs[0] != probe_idx || vecs[size - 1] != probe_idx + 
size - 1) {
@@ -291,7 +288,7 @@ Status ProcessHashTableProbe<JoinOpType, 
Parent>::do_process(HashTableType& hash
 
     output_block->swap(mutable_block.to_block());
 
-    if constexpr (is_mark_join && JoinOpType != TJoinOp::RIGHT_SEMI_JOIN) {
+    if constexpr (is_mark_join) {
         return do_mark_join_conjuncts<with_other_conjuncts>(
                 output_block, hash_table_ctx.hash_table->get_bucket_size());
     } else if constexpr (with_other_conjuncts) {
@@ -363,21 +360,31 @@ template <int JoinOpType, typename Parent>
 template <bool with_other_conjuncts>
 Status ProcessHashTableProbe<JoinOpType, Parent>::do_mark_join_conjuncts(
         Block* output_block, size_t hash_table_bucket_size) {
-    DCHECK(JoinOpType == TJoinOp::LEFT_ANTI_JOIN ||
-           JoinOpType == TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN ||
-           JoinOpType == TJoinOp::LEFT_SEMI_JOIN ||
-           JoinOpType == TJoinOp::NULL_AWARE_LEFT_SEMI_JOIN);
+    if (JoinOpType != TJoinOp::LEFT_ANTI_JOIN && JoinOpType != 
TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN &&
+        JoinOpType != TJoinOp::LEFT_SEMI_JOIN && JoinOpType != 
TJoinOp::NULL_AWARE_LEFT_SEMI_JOIN &&
+        JoinOpType != TJoinOp::RIGHT_SEMI_JOIN && JoinOpType != 
TJoinOp::RIGHT_ANTI_JOIN) {
+        return Status::InternalError("join type {} is not supported", 
JoinOpType);
+    }
 
     constexpr bool is_anti_join = JoinOpType == TJoinOp::LEFT_ANTI_JOIN ||
-                                  JoinOpType == 
TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN;
+                                  JoinOpType == 
TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN ||
+                                  JoinOpType == TJoinOp::RIGHT_ANTI_JOIN;
     constexpr bool is_null_aware_join = JoinOpType == 
TJoinOp::NULL_AWARE_LEFT_SEMI_JOIN ||
                                         JoinOpType == 
TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN;
+    constexpr bool is_right_half_join =
+            JoinOpType == TJoinOp::RIGHT_SEMI_JOIN || JoinOpType == 
TJoinOp::RIGHT_ANTI_JOIN;
 
     const auto row_count = output_block->rows();
     if (!row_count) {
         return Status::OK();
     }
 
+    if constexpr (is_right_half_join) {
+        if (mark_join_flags.empty() && _build_block != nullptr) {
+            mark_join_flags.resize(_build_block->rows(), 0);
+        }
+    }
+
     auto mark_column_mutable =
             
output_block->get_by_position(_parent->_mark_column_id).column->assume_mutable();
     auto& mark_column = assert_cast<ColumnNullable&>(*mark_column_mutable);
@@ -455,36 +462,70 @@ Status ProcessHashTableProbe<JoinOpType, 
Parent>::do_mark_join_conjuncts(
     const bool should_be_null_if_build_side_has_null =
             *_has_null_in_build_side && is_null_aware_join && 
!with_other_conjuncts;
     for (size_t i = 0; i != row_count; ++i) {
-        bool not_matched_before = _parent->_last_probe_match != 
_probe_indexs[i];
-        if (_build_indexs[i] == 0) {
-            bool has_null_mark_value = _parent->_last_probe_null_mark == 
_probe_indexs[i];
-            if (not_matched_before) {
-                filter_map[i] = true;
-                mark_null_map[i] = has_null_mark_value || 
should_be_null_if_build_side_has_null;
-                mark_filter_data[i] = false;
+        if constexpr (is_right_half_join) {
+            const auto& build_index = _build_indexs[i];
+            if (build_index == 0) {
+                continue;
+            }
+
+            if (mark_join_flags[build_index] == 1) {
+                continue;
+            }
+
+            if (mark_null_map[i]) {
+                mark_join_flags[build_index] = -1;
+            } else if (mark_filter_data[i]) {
+                mark_join_flags[build_index] = 1;
             }
         } else {
-            if (mark_null_map[i]) { // is null
-                _parent->_last_probe_null_mark = _probe_indexs[i];
-            } else {
-                if (mark_filter_data[i] && not_matched_before) {
-                    _parent->_last_probe_match = _probe_indexs[i];
+            bool not_matched_before = _parent->_last_probe_match != 
_probe_indexs[i];
+            if (_build_indexs[i] == 0) {
+                bool has_null_mark_value = _parent->_last_probe_null_mark == 
_probe_indexs[i];
+                if (not_matched_before) {
                     filter_map[i] = true;
+                    mark_null_map[i] = has_null_mark_value || 
should_be_null_if_build_side_has_null;
+                    mark_filter_data[i] = false;
+                }
+            } else {
+                if (mark_null_map[i]) { // is null
+                    _parent->_last_probe_null_mark = _probe_indexs[i];
+                } else {
+                    if (mark_filter_data[i] && not_matched_before) {
+                        _parent->_last_probe_match = _probe_indexs[i];
+                        filter_map[i] = true;
+                    }
                 }
             }
         }
     }
 
-    if constexpr (is_anti_join) {
-        // flip the mark column
-        for (size_t i = 0; i != row_count; ++i) {
-            mark_filter_data[i] ^= 1; // not null/ null
+    if constexpr (is_right_half_join) {
+        if constexpr (is_anti_join) {
+            // flip the mark column
+            for (size_t i = 0; i != row_count; ++i) {
+                if (mark_join_flags[i] == -1) {
+                    // -1 means null.
+                    continue;
+                }
+
+                mark_join_flags[i] ^= 1;
+            }
+        }
+        // For right semi/anti join, no rows will be output in probe phase.
+        output_block->swap(vectorized::Block());
+        return Status::OK();
+    } else {
+        if constexpr (is_anti_join) {
+            // flip the mark column
+            for (size_t i = 0; i != row_count; ++i) {
+                mark_filter_data[i] ^= 1; // not null/ null
+            }
         }
-    }
 
-    auto result_column_id = output_block->columns();
-    output_block->insert({std::move(filter_column), 
std::make_shared<DataTypeUInt8>(), ""});
-    return Block::filter_block(output_block, result_column_id, 
result_column_id);
+        auto result_column_id = output_block->columns();
+        output_block->insert({std::move(filter_column), 
std::make_shared<DataTypeUInt8>(), ""});
+        return Block::filter_block(output_block, result_column_id, 
result_column_id);
+    }
 }
 
 template <int JoinOpType, typename Parent>
@@ -638,8 +679,31 @@ Status ProcessHashTableProbe<JoinOpType, 
Parent>::process_data_in_hashtable(
                                                           _build_indexs.data() 
+ block_size);
         }
 
+        if constexpr (JoinOpType == TJoinOp::RIGHT_ANTI_JOIN ||
+                      JoinOpType == TJoinOp::RIGHT_SEMI_JOIN) {
+            if (is_mark_join) {
+                if (mark_join_flags.empty() && _build_block != nullptr) {
+                    mark_join_flags.resize(_build_block->rows(), 0);
+                }
+
+                // mark column is nullable
+                auto* mark_column = assert_cast<vectorized::ColumnNullable*>(
+                        mcol[_parent->_mark_column_id].get());
+                mark_column->resize(block_size);
+                auto* null_map = mark_column->get_null_map_data().data();
+                auto* data = 
assert_cast<vectorized::ColumnUInt8&>(mark_column->get_nested_column())
+                                     .get_data()
+                                     .data();
+                for (size_t i = 0; i != block_size; ++i) {
+                    const auto build_index = _build_indexs[i];
+                    null_map[i] = mark_join_flags[build_index] == -1;
+                    data[i] = mark_join_flags[build_index] == 1;
+                }
+            }
+        }
+
         // just resize the left table column in case with other conjunct to 
make block size is not zero
-        if (_is_right_semi_anti && _have_other_join_conjunct) {
+        if (_is_right_semi_anti && _right_col_idx != 0) {
             for (int i = 0; i < _right_col_idx; ++i) {
                 mcol[i]->resize(block_size);
             }
diff --git a/be/src/vec/exec/join/vhash_join_node.cpp 
b/be/src/vec/exec/join/vhash_join_node.cpp
index 9fec942d161..410b5ddf7ed 100644
--- a/be/src/vec/exec/join/vhash_join_node.cpp
+++ b/be/src/vec/exec/join/vhash_join_node.cpp
@@ -292,6 +292,11 @@ Status HashJoinNode::prepare(RuntimeState* state) {
     _right_table_data_types = 
VectorizedUtils::get_data_types(child(1)->row_desc());
     _left_table_data_types = 
VectorizedUtils::get_data_types(child(0)->row_desc());
     _right_table_column_names = 
VectorizedUtils::get_column_names(child(1)->row_desc());
+
+    _right_col_idx = (_is_right_semi_anti && !_have_other_join_conjunct &&
+                      (!_is_mark_join || _mark_join_conjuncts.empty()))
+                             ? 0
+                             : _left_table_data_types.size();
     // Hash Table Init
     _hash_table_init(state);
     _construct_mutable_join_block();
diff --git a/be/src/vec/exec/join/vhash_join_node.h 
b/be/src/vec/exec/join/vhash_join_node.h
index 5cfac9f9aba..74b1428fc8c 100644
--- a/be/src/vec/exec/join/vhash_join_node.h
+++ b/be/src/vec/exec/join/vhash_join_node.h
@@ -358,6 +358,9 @@ private:
     int64_t _build_side_last_mem_used = 0;
     MutableBlock _build_side_mutable_block;
 
+    // Index of column(slot) from right table in the `_intermediate_row_desc`.
+    size_t _right_col_idx;
+
     SharedHashTableContextPtr _shared_hash_table_context = nullptr;
 
     Status _materialize_build_side(RuntimeState* state) override;
diff --git a/regression-test/data/query_p0/join/mark_join/mark_join.out 
b/regression-test/data/query_p0/join/mark_join/mark_join.out
index ed3575d0e14..ea3b7ddd6a6 100644
Binary files a/regression-test/data/query_p0/join/mark_join/mark_join.out and 
b/regression-test/data/query_p0/join/mark_join/mark_join.out differ
diff --git a/regression-test/suites/query_p0/join/mark_join/mark_join.groovy 
b/regression-test/suites/query_p0/join/mark_join/mark_join.groovy
index 9759a0e9b4c..0292fd4ae30 100644
--- a/regression-test/suites/query_p0/join/mark_join/mark_join.groovy
+++ b/regression-test/suites/query_p0/join/mark_join/mark_join.groovy
@@ -61,4 +61,218 @@ suite("mark_join") {
     qt_test """
     select * from t1 where t1.k1 not in (select t2.k3 from t2 where t2.k2 = 
t1.k2) or k1 < 10 order by k1, k2;
     """
+
+    sql "drop table if exists tbl1;"
+    sql "drop table if exists tbl2;"
+    sql "drop table if exists tbl3;"
+
+    sql """
+        CREATE TABLE `tbl1` (
+            `unit_name` varchar(1080) NULL,
+            `cur_unit_name` varchar(1080) NOT NULL
+        ) ENGINE=OLAP
+        DUPLICATE KEY(`unit_name`)
+        DISTRIBUTED BY RANDOM BUCKETS AUTO
+        PROPERTIES (
+            "replication_allocation" = "tag.location.default: 1"
+        );
+    """
+
+    sql """
+        CREATE TABLE `tbl2` (
+            `org_code` varchar(150) NOT NULL ,
+            `org_name` varchar(300) NULL
+        ) ENGINE=OLAP
+        DUPLICATE KEY(`org_code`)
+        DISTRIBUTED BY HASH(`org_code`) BUCKETS 4
+        PROPERTIES (
+            "replication_allocation" = "tag.location.default: 1"
+        );
+    """
+
+    sql """
+        CREATE TABLE `tbl3` (
+            `id` bigint NOT NULL,
+            `acntm_name` varchar(500) NULL ,
+            `vendor_name` varchar(500) NULL
+        ) ENGINE=OLAP
+        DUPLICATE KEY(`id`)
+        DISTRIBUTED BY HASH(`id`) BUCKETS AUTO
+        PROPERTIES (
+            "replication_allocation" = "tag.location.default: 1"
+        );
+    """
+
+    sql """
+        insert into tbl1 (unit_name, cur_unit_name) values
+            ('v1', 'o1'),
+            ('v2', 'o2'),
+            ('v3', 'o3'),
+            ('v4', 'o4'),
+            ('v5', 'o5'),
+            (null, 'o1'),
+            ('v1', 'o1'),
+            ('v2', 'o2'),
+            ('v3', 'o3'),
+            ('v4', 'o4'),
+            ('v5', 'o5'),
+            (null, 'o1'),
+            (null, 'o2'),
+            (null, 'o3'),
+            (null, 'o4'),
+            (null, 'o5'),
+            ('v1', 'o1'),
+            ('v2', 'o2'),
+            ('v3', 'o3'),
+            ('v4', 'o4'),
+            ('v5', 'o5');
+    """
+
+    sql """
+        insert into tbl2(org_code, org_name) values
+            ('v1', 'o1'),
+            ('v2', 'o2'),
+            ('v3', 'o3'),
+            ('v4', 'o4'),
+            ('v5', 'o5'),
+            ('v1', null),
+            ('v2', null),
+            ('v3', null),
+            ('v4', null),
+            ('v5', null);
+    """
+
+    sql """
+        insert into tbl3 (id, vendor_name, acntm_name)
+            values(1, 'o1', 'v1'),
+            (2, 'o2', 'v2'),
+            (3, 'o3', 'v3'),
+            (4, 'o4', 'v4'),
+            (5, 'o5', 'v5'),
+            (6, null, 'v1'),
+            (7, null, 'v2'),
+            (8, null, 'v3'),
+            (9, null, 'v4'),
+            (10, null, 'v5');
+    """
+
+    sql " analyze table tbl1 with sync;"
+    sql " analyze table tbl2 with sync;"
+    sql " analyze table tbl3 with sync;"
+
+    sql "set disable_join_reorder=0;"
+    qt_test_right_semi_mark_join """
+        select
+            tbl3.id,
+            tbl3.acntm_name,
+            tbl3.vendor_name,
+            tbl3.vendor_name in (
+                select
+                    tbl1.unit_name
+                from
+                    tbl2
+                    join tbl1 on tbl1.cur_unit_name = tbl2.org_name
+                where
+                    tbl2.org_code = tbl3.acntm_name
+            ) v1,
+            tbl3.vendor_name not in (
+                select
+                    tbl1.unit_name
+                from
+                    tbl2
+                    join tbl1 on tbl1.cur_unit_name = tbl2.org_name
+                where
+                    tbl2.org_code = tbl3.acntm_name
+            ) v2
+        from
+            tbl3 order by 1,2,3,4,5;
+    """
+
+    sql "set disable_join_reorder=1;"
+    qt_test_right_semi_mark_join_2 """
+        select
+            tbl3.id,
+            tbl3.acntm_name,
+            tbl3.vendor_name,
+            tbl3.vendor_name in (
+                select
+                    tbl1.unit_name
+                from
+                    tbl2
+                    join tbl1 on tbl1.cur_unit_name = tbl2.org_name
+                where
+                    tbl2.org_code = tbl3.acntm_name
+            ) v1,
+            tbl3.vendor_name not in (
+                select
+                    tbl1.unit_name
+                from
+                    tbl2
+                    join tbl1 on tbl1.cur_unit_name = tbl2.org_name
+                where
+                    tbl2.org_code = tbl3.acntm_name
+            ) v2
+        from
+            tbl3 order by 1,2,3,4,5;
+    """
+
+    sql "set disable_join_reorder=0;"
+    qt_test_right_semi_mark_join_no_null """
+        select
+            tbl3.id,
+            tbl3.acntm_name,
+            tbl3.vendor_name,
+            tbl3.vendor_name in (
+                select
+                    tbl1.unit_name
+                from
+                    tbl2
+                    join tbl1 on tbl1.cur_unit_name = tbl2.org_name
+                where
+                    tbl2.org_code = tbl3.acntm_name
+                    and tbl1.unit_name is not null
+            ) v1,
+            tbl3.vendor_name not in (
+                select
+                    tbl1.unit_name
+                from
+                    tbl2
+                    join tbl1 on tbl1.cur_unit_name = tbl2.org_name
+                where
+                    tbl2.org_code = tbl3.acntm_name
+                    and tbl1.unit_name is not null
+            ) v2
+        from
+            tbl3 order by 1,2,3,4,5;
+    """
+
+    sql "set disable_join_reorder=1;"
+    qt_test_right_semi_mark_join_no_null_2 """
+        select
+            tbl3.id,
+            tbl3.acntm_name,
+            tbl3.vendor_name,
+            tbl3.vendor_name in (
+                select
+                    tbl1.unit_name
+                from
+                    tbl2
+                    join tbl1 on tbl1.cur_unit_name = tbl2.org_name
+                where
+                    tbl2.org_code = tbl3.acntm_name
+                    and tbl1.unit_name is not null
+            ) v1,
+            tbl3.vendor_name not in (
+                select
+                    tbl1.unit_name
+                from
+                    tbl2
+                    join tbl1 on tbl1.cur_unit_name = tbl2.org_name
+                where
+                    tbl2.org_code = tbl3.acntm_name
+                    and tbl1.unit_name is not null
+            ) v2
+        from
+            tbl3 order by 1,2,3,4,5;
+    """
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to