This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.1 by this push:
new 51b39d09921 [fix](join)Consider mark join when computing
right_col_idx(#50720) (#50727)
51b39d09921 is described below
commit 51b39d099219a3354c4571b7dce9b23125572820
Author: Jerry Hu <[email protected]>
AuthorDate: Mon May 19 14:42:15 2025 +0800
[fix](join)Consider mark join when computing right_col_idx(#50720) (#50727)
---
be/src/pipeline/exec/hashjoin_probe_operator.cpp | 31 ++-
be/src/pipeline/exec/hashjoin_probe_operator.h | 6 +
be/src/vec/common/hash_table/join_hash_table.h | 44 ++---
be/src/vec/exec/join/process_hash_table_probe.h | 10 +-
.../vec/exec/join/process_hash_table_probe_impl.h | 138 +++++++++----
be/src/vec/exec/join/vhash_join_node.cpp | 5 +
be/src/vec/exec/join/vhash_join_node.h | 3 +
.../data/query_p0/join/mark_join/mark_join.out | Bin 239 -> 1007 bytes
.../query_p0/join/mark_join/mark_join.groovy | 214 +++++++++++++++++++++
9 files changed, 382 insertions(+), 69 deletions(-)
diff --git a/be/src/pipeline/exec/hashjoin_probe_operator.cpp
b/be/src/pipeline/exec/hashjoin_probe_operator.cpp
index a98c7215771..85280c8295c 100644
--- a/be/src/pipeline/exec/hashjoin_probe_operator.cpp
+++ b/be/src/pipeline/exec/hashjoin_probe_operator.cpp
@@ -78,6 +78,7 @@ Status HashJoinProbeLocalState::open(RuntimeState* state) {
_process_hashtable_ctx_variants = std::make_unique<HashTableCtxVariants>();
auto& p = _parent->cast<HashJoinProbeOperatorX>();
+ _right_col_idx = p._right_col_idx;
std::visit(
[&](auto&& join_op_variants, auto have_other_join_conjunct) {
using JoinOpType = std::decay_t<decltype(join_op_variants)>;
@@ -536,7 +537,7 @@ Status HashJoinProbeOperatorX::init(const TPlanNode& tnode,
RuntimeState* state)
const bool probe_dispose_null =
_match_all_probe || _build_unique || _join_op ==
TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN ||
_join_op == TJoinOp::NULL_AWARE_LEFT_SEMI_JOIN || _join_op ==
TJoinOp::LEFT_ANTI_JOIN ||
- _join_op == TJoinOp::LEFT_SEMI_JOIN;
+ _join_op == TJoinOp::LEFT_SEMI_JOIN || _is_mark_join;
const std::vector<TEqJoinCondition>& eq_join_conjuncts =
tnode.hash_join_node.eq_join_conjuncts;
std::vector<bool> probe_not_ignore_null(eq_join_conjuncts.size());
size_t conjuncts_index = 0;
@@ -668,14 +669,34 @@ Status HashJoinProbeOperatorX::prepare(RuntimeState*
state) {
}
}
- const int right_col_idx =
- (_is_right_semi_anti && !_have_other_join_conjunct) ? 0 :
_left_table_data_types.size();
+ _right_col_idx = (_is_right_semi_anti && !_have_other_join_conjunct &&
+ (!_is_mark_join || _mark_join_conjuncts.empty()))
+ ? 0
+ : _left_table_data_types.size();
size_t idx = 0;
for (const auto* slot : slots_to_check) {
auto data_type = slot->get_data_type_ptr();
- const auto slot_on_left = idx < right_col_idx;
+ const auto slot_on_left = idx < _right_col_idx;
+
+ if (slot_on_left) {
+ if (idx >= _left_table_data_types.size()) {
+ return Status::InternalError(
+ "Join node(id={}, OP={}) intermediate slot({}, #{})'s
on left table "
+ "idx out bound of _left_table_data_types: {} vs {}",
+ _node_id, _join_op, slot->col_name(), slot->id(), idx,
+ _left_table_data_types.size());
+ }
+ } else if (idx - _right_col_idx >= _right_table_data_types.size()) {
+ return Status::InternalError(
+ "Join node(id={}, OP={}) intermediate slot({}, #{})'s on
right table "
+ "idx out bound of _right_table_data_types: {} vs {}(idx =
{}, _right_col_idx = "
+ "{})",
+ _node_id, _join_op, slot->col_name(), slot->id(), idx -
_right_col_idx,
+ _right_table_data_types.size(), idx, _right_col_idx);
+ }
+
auto target_data_type = slot_on_left ? _left_table_data_types[idx]
- : _right_table_data_types[idx -
right_col_idx];
+ : _right_table_data_types[idx -
_right_col_idx];
++idx;
if (data_type->equals(*target_data_type)) {
continue;
diff --git a/be/src/pipeline/exec/hashjoin_probe_operator.h
b/be/src/pipeline/exec/hashjoin_probe_operator.h
index 3c75a8d10de..7b73b3bf321 100644
--- a/be/src/pipeline/exec/hashjoin_probe_operator.h
+++ b/be/src/pipeline/exec/hashjoin_probe_operator.h
@@ -138,6 +138,9 @@ private:
std::unique_ptr<HashTableCtxVariants> _process_hashtable_ctx_variants =
std::make_unique<HashTableCtxVariants>();
+ // Index of column(slot) from right table in the `_intermediate_row_desc`.
+ size_t _right_col_idx;
+
RuntimeProfile::Counter* _probe_expr_call_timer = nullptr;
RuntimeProfile::Counter* _probe_next_timer = nullptr;
RuntimeProfile::Counter* _probe_side_output_timer = nullptr;
@@ -213,6 +216,9 @@ private:
std::vector<bool> _right_output_slot_flags;
std::vector<std::string> _right_table_column_names;
const std::vector<TExpr> _partition_exprs;
+
+ // Index of column(slot) from right table in the `_intermediate_row_desc`.
+ size_t _right_col_idx;
};
} // namespace pipeline
diff --git a/be/src/vec/common/hash_table/join_hash_table.h
b/be/src/vec/common/hash_table/join_hash_table.h
index 317987541cd..88695ef9b43 100644
--- a/be/src/vec/common/hash_table/join_hash_table.h
+++ b/be/src/vec/common/hash_table/join_hash_table.h
@@ -101,24 +101,23 @@ public:
}
}
- if constexpr (with_other_conjuncts ||
- (is_mark_join && JoinOpType !=
TJoinOp::RIGHT_SEMI_JOIN)) {
- if constexpr (!with_other_conjuncts) {
- constexpr bool is_null_aware_join =
- JoinOpType == TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN ||
- JoinOpType == TJoinOp::NULL_AWARE_LEFT_SEMI_JOIN;
- constexpr bool is_left_half_join = JoinOpType ==
TJoinOp::LEFT_SEMI_JOIN ||
- JoinOpType ==
TJoinOp::LEFT_ANTI_JOIN;
-
- /// For null aware join or left half(semi/anti) join without
other conjuncts and without
- /// mark join conjunct.
- /// If one row on probe side has one match in build side, we
should stop searching the
- /// hash table for this row.
- if (is_null_aware_join || (is_left_half_join &&
!has_mark_join_conjunct)) {
- return _find_batch_conjunct<JoinOpType, need_judge_null,
true>(
- keys, build_idx_map, probe_idx, build_idx,
probe_rows, probe_idxs,
- build_idxs);
- }
+ if constexpr (with_other_conjuncts) {
+ return _find_batch_conjunct<JoinOpType, need_judge_null, false>(
+ keys, build_idx_map, probe_idx, build_idx, probe_rows,
probe_idxs, build_idxs);
+ } else if constexpr (is_mark_join) {
+ constexpr bool is_null_aware_join = JoinOpType ==
TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN ||
+ JoinOpType ==
TJoinOp::NULL_AWARE_LEFT_SEMI_JOIN;
+ constexpr bool is_left_half_join =
+ JoinOpType == TJoinOp::LEFT_SEMI_JOIN || JoinOpType ==
TJoinOp::LEFT_ANTI_JOIN;
+
+ /// For null aware join or left half(semi/anti) join without other
conjuncts and without
+ /// mark join conjunct.
+ /// If one row on probe side has one match in build side, we
should stop searching the
+ /// hash table for this row.
+ if (is_null_aware_join || (is_left_half_join &&
!has_mark_join_conjunct)) {
+ return _find_batch_conjunct<JoinOpType, need_judge_null, true>(
+ keys, build_idx_map, probe_idx, build_idx, probe_rows,
probe_idxs,
+ build_idxs);
}
return _find_batch_conjunct<JoinOpType, need_judge_null, false>(
@@ -339,14 +338,7 @@ private:
auto do_the_probe = [&]() {
while (build_idx && matched_cnt < batch_size) {
- if constexpr (JoinOpType == TJoinOp::RIGHT_ANTI_JOIN ||
- JoinOpType == TJoinOp::RIGHT_SEMI_JOIN) {
- if (!visited[build_idx] && keys[probe_idx] ==
build_keys[build_idx]) {
- probe_idxs[matched_cnt] = probe_idx;
- build_idxs[matched_cnt] = build_idx;
- matched_cnt++;
- }
- } else if constexpr (need_judge_null) {
+ if constexpr (need_judge_null) {
if (build_idx == bucket_size) {
build_idxs[matched_cnt] = build_idx;
probe_idxs[matched_cnt] = probe_idx;
diff --git a/be/src/vec/exec/join/process_hash_table_probe.h
b/be/src/vec/exec/join/process_hash_table_probe.h
index ee46197617a..0228310390c 100644
--- a/be/src/vec/exec/join/process_hash_table_probe.h
+++ b/be/src/vec/exec/join/process_hash_table_probe.h
@@ -23,6 +23,7 @@
#include "vec/columns/column.h"
#include "vec/columns/columns_number.h"
#include "vec/common/arena.h"
+#include "vec/common/custom_allocator.h"
namespace doris {
namespace vectorized {
@@ -132,8 +133,15 @@ struct ProcessHashTableProbe {
RuntimeProfile::Counter* _probe_side_output_timer = nullptr;
RuntimeProfile::Counter* _probe_process_hashtable_timer = nullptr;
- int _right_col_idx;
+ // See `HashJoinProbeOperatorX::_right_col_idx`
+ const int _right_col_idx;
+
int _right_col_len;
+
+ // For right semi with mark join conjunct, we need to store the mark join
flags
+ // in the hash table.
+ // -1 means null, 0 means false, 1 means true
+ DorisVector<int8_t> mark_join_flags;
};
} // namespace vectorized
diff --git a/be/src/vec/exec/join/process_hash_table_probe_impl.h
b/be/src/vec/exec/join/process_hash_table_probe_impl.h
index 9a15df637aa..519239ff172 100644
--- a/be/src/vec/exec/join/process_hash_table_probe_impl.h
+++ b/be/src/vec/exec/join/process_hash_table_probe_impl.h
@@ -57,9 +57,7 @@ ProcessHashTableProbe<JoinOpType,
Parent>::ProcessHashTableProbe(Parent* parent,
_build_side_output_timer(parent->_build_side_output_timer),
_probe_side_output_timer(parent->_probe_side_output_timer),
_probe_process_hashtable_timer(parent->_probe_process_hashtable_timer),
- _right_col_idx((_is_right_semi_anti && !_have_other_join_conjunct)
- ? 0
- : _parent->left_table_data_types().size()),
+ _right_col_idx(parent->_right_col_idx),
_right_col_len(_parent->right_table_data_types().size()) {}
template <int JoinOpType, typename Parent>
@@ -67,11 +65,10 @@ void ProcessHashTableProbe<JoinOpType,
Parent>::build_side_output_column(
MutableColumns& mcol, const std::vector<bool>& output_slot_flags, int
size,
bool have_other_join_conjunct, bool is_mark_join) {
SCOPED_TIMER(_build_side_output_timer);
- constexpr auto is_semi_anti_join = JoinOpType == TJoinOp::RIGHT_ANTI_JOIN
||
- JoinOpType == TJoinOp::RIGHT_SEMI_JOIN
||
- JoinOpType == TJoinOp::LEFT_ANTI_JOIN ||
+ constexpr auto is_semi_anti_join = JoinOpType == TJoinOp::LEFT_ANTI_JOIN ||
+ JoinOpType == TJoinOp::LEFT_SEMI_JOIN ||
JoinOpType ==
TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN ||
- JoinOpType == TJoinOp::LEFT_SEMI_JOIN;
+ JoinOpType ==
TJoinOp::NULL_AWARE_LEFT_SEMI_JOIN;
constexpr auto probe_all =
JoinOpType == TJoinOp::LEFT_OUTER_JOIN || JoinOpType ==
TJoinOp::FULL_OUTER_JOIN;
@@ -209,7 +206,7 @@ Status ProcessHashTableProbe<JoinOpType,
Parent>::do_process(HashTableType& hash
(JoinOpType == doris::TJoinOp::LEFT_ANTI_JOIN ||
JoinOpType == doris::TJoinOp::LEFT_SEMI_JOIN ||
JoinOpType ==
doris::TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN ||
- (is_mark_join && JoinOpType !=
doris::TJoinOp::RIGHT_SEMI_JOIN)));
+ (is_mark_join)));
}
auto& mcol = mutable_block.mutable_columns();
@@ -268,8 +265,8 @@ Status ProcessHashTableProbe<JoinOpType,
Parent>::do_process(HashTableType& hash
build_side_output_column(mcol, *_right_output_slot_flags, current_offset,
with_other_conjuncts,
is_mark_join);
- if constexpr (with_other_conjuncts || (JoinOpType !=
TJoinOp::RIGHT_SEMI_JOIN &&
- JoinOpType !=
TJoinOp::RIGHT_ANTI_JOIN)) {
+ if (with_other_conjuncts || !_parent->_mark_join_conjuncts.empty() ||
+ (JoinOpType != TJoinOp::RIGHT_SEMI_JOIN && JoinOpType !=
TJoinOp::RIGHT_ANTI_JOIN)) {
auto check_all_match_one = [](const std::vector<uint32_t>& vecs,
uint32_t probe_idx,
int size) {
if (!size || vecs[0] != probe_idx || vecs[size - 1] != probe_idx +
size - 1) {
@@ -291,7 +288,7 @@ Status ProcessHashTableProbe<JoinOpType,
Parent>::do_process(HashTableType& hash
output_block->swap(mutable_block.to_block());
- if constexpr (is_mark_join && JoinOpType != TJoinOp::RIGHT_SEMI_JOIN) {
+ if constexpr (is_mark_join) {
return do_mark_join_conjuncts<with_other_conjuncts>(
output_block, hash_table_ctx.hash_table->get_bucket_size());
} else if constexpr (with_other_conjuncts) {
@@ -363,21 +360,31 @@ template <int JoinOpType, typename Parent>
template <bool with_other_conjuncts>
Status ProcessHashTableProbe<JoinOpType, Parent>::do_mark_join_conjuncts(
Block* output_block, size_t hash_table_bucket_size) {
- DCHECK(JoinOpType == TJoinOp::LEFT_ANTI_JOIN ||
- JoinOpType == TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN ||
- JoinOpType == TJoinOp::LEFT_SEMI_JOIN ||
- JoinOpType == TJoinOp::NULL_AWARE_LEFT_SEMI_JOIN);
+ if (JoinOpType != TJoinOp::LEFT_ANTI_JOIN && JoinOpType !=
TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN &&
+ JoinOpType != TJoinOp::LEFT_SEMI_JOIN && JoinOpType !=
TJoinOp::NULL_AWARE_LEFT_SEMI_JOIN &&
+ JoinOpType != TJoinOp::RIGHT_SEMI_JOIN && JoinOpType !=
TJoinOp::RIGHT_ANTI_JOIN) {
+ return Status::InternalError("join type {} is not supported",
JoinOpType);
+ }
constexpr bool is_anti_join = JoinOpType == TJoinOp::LEFT_ANTI_JOIN ||
- JoinOpType ==
TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN;
+ JoinOpType ==
TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN ||
+ JoinOpType == TJoinOp::RIGHT_ANTI_JOIN;
constexpr bool is_null_aware_join = JoinOpType ==
TJoinOp::NULL_AWARE_LEFT_SEMI_JOIN ||
JoinOpType ==
TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN;
+ constexpr bool is_right_half_join =
+ JoinOpType == TJoinOp::RIGHT_SEMI_JOIN || JoinOpType ==
TJoinOp::RIGHT_ANTI_JOIN;
const auto row_count = output_block->rows();
if (!row_count) {
return Status::OK();
}
+ if constexpr (is_right_half_join) {
+ if (mark_join_flags.empty() && _build_block != nullptr) {
+ mark_join_flags.resize(_build_block->rows(), 0);
+ }
+ }
+
auto mark_column_mutable =
output_block->get_by_position(_parent->_mark_column_id).column->assume_mutable();
auto& mark_column = assert_cast<ColumnNullable&>(*mark_column_mutable);
@@ -455,36 +462,70 @@ Status ProcessHashTableProbe<JoinOpType,
Parent>::do_mark_join_conjuncts(
const bool should_be_null_if_build_side_has_null =
*_has_null_in_build_side && is_null_aware_join &&
!with_other_conjuncts;
for (size_t i = 0; i != row_count; ++i) {
- bool not_matched_before = _parent->_last_probe_match !=
_probe_indexs[i];
- if (_build_indexs[i] == 0) {
- bool has_null_mark_value = _parent->_last_probe_null_mark ==
_probe_indexs[i];
- if (not_matched_before) {
- filter_map[i] = true;
- mark_null_map[i] = has_null_mark_value ||
should_be_null_if_build_side_has_null;
- mark_filter_data[i] = false;
+ if constexpr (is_right_half_join) {
+ const auto& build_index = _build_indexs[i];
+ if (build_index == 0) {
+ continue;
+ }
+
+ if (mark_join_flags[build_index] == 1) {
+ continue;
+ }
+
+ if (mark_null_map[i]) {
+ mark_join_flags[build_index] = -1;
+ } else if (mark_filter_data[i]) {
+ mark_join_flags[build_index] = 1;
}
} else {
- if (mark_null_map[i]) { // is null
- _parent->_last_probe_null_mark = _probe_indexs[i];
- } else {
- if (mark_filter_data[i] && not_matched_before) {
- _parent->_last_probe_match = _probe_indexs[i];
+ bool not_matched_before = _parent->_last_probe_match !=
_probe_indexs[i];
+ if (_build_indexs[i] == 0) {
+ bool has_null_mark_value = _parent->_last_probe_null_mark ==
_probe_indexs[i];
+ if (not_matched_before) {
filter_map[i] = true;
+ mark_null_map[i] = has_null_mark_value ||
should_be_null_if_build_side_has_null;
+ mark_filter_data[i] = false;
+ }
+ } else {
+ if (mark_null_map[i]) { // is null
+ _parent->_last_probe_null_mark = _probe_indexs[i];
+ } else {
+ if (mark_filter_data[i] && not_matched_before) {
+ _parent->_last_probe_match = _probe_indexs[i];
+ filter_map[i] = true;
+ }
}
}
}
}
- if constexpr (is_anti_join) {
- // flip the mark column
- for (size_t i = 0; i != row_count; ++i) {
- mark_filter_data[i] ^= 1; // not null/ null
+ if constexpr (is_right_half_join) {
+ if constexpr (is_anti_join) {
+ // flip the mark column
+ for (size_t i = 0; i != row_count; ++i) {
+ if (mark_join_flags[i] == -1) {
+ // -1 means null.
+ continue;
+ }
+
+ mark_join_flags[i] ^= 1;
+ }
+ }
+ // For right semi/anti join, no rows will be output in probe phase.
+ output_block->swap(vectorized::Block());
+ return Status::OK();
+ } else {
+ if constexpr (is_anti_join) {
+ // flip the mark column
+ for (size_t i = 0; i != row_count; ++i) {
+ mark_filter_data[i] ^= 1; // not null/ null
+ }
}
- }
- auto result_column_id = output_block->columns();
- output_block->insert({std::move(filter_column),
std::make_shared<DataTypeUInt8>(), ""});
- return Block::filter_block(output_block, result_column_id,
result_column_id);
+ auto result_column_id = output_block->columns();
+ output_block->insert({std::move(filter_column),
std::make_shared<DataTypeUInt8>(), ""});
+ return Block::filter_block(output_block, result_column_id,
result_column_id);
+ }
}
template <int JoinOpType, typename Parent>
@@ -638,8 +679,31 @@ Status ProcessHashTableProbe<JoinOpType,
Parent>::process_data_in_hashtable(
_build_indexs.data()
+ block_size);
}
+ if constexpr (JoinOpType == TJoinOp::RIGHT_ANTI_JOIN ||
+ JoinOpType == TJoinOp::RIGHT_SEMI_JOIN) {
+ if (is_mark_join) {
+ if (mark_join_flags.empty() && _build_block != nullptr) {
+ mark_join_flags.resize(_build_block->rows(), 0);
+ }
+
+ // mark column is nullable
+ auto* mark_column = assert_cast<vectorized::ColumnNullable*>(
+ mcol[_parent->_mark_column_id].get());
+ mark_column->resize(block_size);
+ auto* null_map = mark_column->get_null_map_data().data();
+ auto* data =
assert_cast<vectorized::ColumnUInt8&>(mark_column->get_nested_column())
+ .get_data()
+ .data();
+ for (size_t i = 0; i != block_size; ++i) {
+ const auto build_index = _build_indexs[i];
+ null_map[i] = mark_join_flags[build_index] == -1;
+ data[i] = mark_join_flags[build_index] == 1;
+ }
+ }
+ }
+
// just resize the left table column in case with other conjunct to
make block size is not zero
- if (_is_right_semi_anti && _have_other_join_conjunct) {
+ if (_is_right_semi_anti && _right_col_idx != 0) {
for (int i = 0; i < _right_col_idx; ++i) {
mcol[i]->resize(block_size);
}
diff --git a/be/src/vec/exec/join/vhash_join_node.cpp
b/be/src/vec/exec/join/vhash_join_node.cpp
index 9fec942d161..410b5ddf7ed 100644
--- a/be/src/vec/exec/join/vhash_join_node.cpp
+++ b/be/src/vec/exec/join/vhash_join_node.cpp
@@ -292,6 +292,11 @@ Status HashJoinNode::prepare(RuntimeState* state) {
_right_table_data_types =
VectorizedUtils::get_data_types(child(1)->row_desc());
_left_table_data_types =
VectorizedUtils::get_data_types(child(0)->row_desc());
_right_table_column_names =
VectorizedUtils::get_column_names(child(1)->row_desc());
+
+ _right_col_idx = (_is_right_semi_anti && !_have_other_join_conjunct &&
+ (!_is_mark_join || _mark_join_conjuncts.empty()))
+ ? 0
+ : _left_table_data_types.size();
// Hash Table Init
_hash_table_init(state);
_construct_mutable_join_block();
diff --git a/be/src/vec/exec/join/vhash_join_node.h
b/be/src/vec/exec/join/vhash_join_node.h
index 5cfac9f9aba..74b1428fc8c 100644
--- a/be/src/vec/exec/join/vhash_join_node.h
+++ b/be/src/vec/exec/join/vhash_join_node.h
@@ -358,6 +358,9 @@ private:
int64_t _build_side_last_mem_used = 0;
MutableBlock _build_side_mutable_block;
+ // Index of column(slot) from right table in the `_intermediate_row_desc`.
+ size_t _right_col_idx;
+
SharedHashTableContextPtr _shared_hash_table_context = nullptr;
Status _materialize_build_side(RuntimeState* state) override;
diff --git a/regression-test/data/query_p0/join/mark_join/mark_join.out
b/regression-test/data/query_p0/join/mark_join/mark_join.out
index ed3575d0e14..ea3b7ddd6a6 100644
Binary files a/regression-test/data/query_p0/join/mark_join/mark_join.out and
b/regression-test/data/query_p0/join/mark_join/mark_join.out differ
diff --git a/regression-test/suites/query_p0/join/mark_join/mark_join.groovy
b/regression-test/suites/query_p0/join/mark_join/mark_join.groovy
index 9759a0e9b4c..0292fd4ae30 100644
--- a/regression-test/suites/query_p0/join/mark_join/mark_join.groovy
+++ b/regression-test/suites/query_p0/join/mark_join/mark_join.groovy
@@ -61,4 +61,218 @@ suite("mark_join") {
qt_test """
select * from t1 where t1.k1 not in (select t2.k3 from t2 where t2.k2 =
t1.k2) or k1 < 10 order by k1, k2;
"""
+
+ sql "drop table if exists tbl1;"
+ sql "drop table if exists tbl2;"
+ sql "drop table if exists tbl3;"
+
+ sql """
+ CREATE TABLE `tbl1` (
+ `unit_name` varchar(1080) NULL,
+ `cur_unit_name` varchar(1080) NOT NULL
+ ) ENGINE=OLAP
+ DUPLICATE KEY(`unit_name`)
+ DISTRIBUTED BY RANDOM BUCKETS AUTO
+ PROPERTIES (
+ "replication_allocation" = "tag.location.default: 1"
+ );
+ """
+
+ sql """
+ CREATE TABLE `tbl2` (
+ `org_code` varchar(150) NOT NULL ,
+ `org_name` varchar(300) NULL
+ ) ENGINE=OLAP
+ DUPLICATE KEY(`org_code`)
+ DISTRIBUTED BY HASH(`org_code`) BUCKETS 4
+ PROPERTIES (
+ "replication_allocation" = "tag.location.default: 1"
+ );
+ """
+
+ sql """
+ CREATE TABLE `tbl3` (
+ `id` bigint NOT NULL,
+ `acntm_name` varchar(500) NULL ,
+ `vendor_name` varchar(500) NULL
+ ) ENGINE=OLAP
+ DUPLICATE KEY(`id`)
+ DISTRIBUTED BY HASH(`id`) BUCKETS AUTO
+ PROPERTIES (
+ "replication_allocation" = "tag.location.default: 1"
+ );
+ """
+
+ sql """
+ insert into tbl1 (unit_name, cur_unit_name) values
+ ('v1', 'o1'),
+ ('v2', 'o2'),
+ ('v3', 'o3'),
+ ('v4', 'o4'),
+ ('v5', 'o5'),
+ (null, 'o1'),
+ ('v1', 'o1'),
+ ('v2', 'o2'),
+ ('v3', 'o3'),
+ ('v4', 'o4'),
+ ('v5', 'o5'),
+ (null, 'o1'),
+ (null, 'o2'),
+ (null, 'o3'),
+ (null, 'o4'),
+ (null, 'o5'),
+ ('v1', 'o1'),
+ ('v2', 'o2'),
+ ('v3', 'o3'),
+ ('v4', 'o4'),
+ ('v5', 'o5');
+ """
+
+ sql """
+ insert into tbl2(org_code, org_name) values
+ ('v1', 'o1'),
+ ('v2', 'o2'),
+ ('v3', 'o3'),
+ ('v4', 'o4'),
+ ('v5', 'o5'),
+ ('v1', null),
+ ('v2', null),
+ ('v3', null),
+ ('v4', null),
+ ('v5', null);
+ """
+
+ sql """
+ insert into tbl3 (id, vendor_name, acntm_name)
+ values(1, 'o1', 'v1'),
+ (2, 'o2', 'v2'),
+ (3, 'o3', 'v3'),
+ (4, 'o4', 'v4'),
+ (5, 'o5', 'v5'),
+ (6, null, 'v1'),
+ (7, null, 'v2'),
+ (8, null, 'v3'),
+ (9, null, 'v4'),
+ (10, null, 'v5');
+ """
+
+ sql " analyze table tbl1 with sync;"
+ sql " analyze table tbl2 with sync;"
+ sql " analyze table tbl3 with sync;"
+
+ sql "set disable_join_reorder=0;"
+ qt_test_right_semi_mark_join """
+ select
+ tbl3.id,
+ tbl3.acntm_name,
+ tbl3.vendor_name,
+ tbl3.vendor_name in (
+ select
+ tbl1.unit_name
+ from
+ tbl2
+ join tbl1 on tbl1.cur_unit_name = tbl2.org_name
+ where
+ tbl2.org_code = tbl3.acntm_name
+ ) v1,
+ tbl3.vendor_name not in (
+ select
+ tbl1.unit_name
+ from
+ tbl2
+ join tbl1 on tbl1.cur_unit_name = tbl2.org_name
+ where
+ tbl2.org_code = tbl3.acntm_name
+ ) v2
+ from
+ tbl3 order by 1,2,3,4,5;
+ """
+
+ sql "set disable_join_reorder=1;"
+ qt_test_right_semi_mark_join_2 """
+ select
+ tbl3.id,
+ tbl3.acntm_name,
+ tbl3.vendor_name,
+ tbl3.vendor_name in (
+ select
+ tbl1.unit_name
+ from
+ tbl2
+ join tbl1 on tbl1.cur_unit_name = tbl2.org_name
+ where
+ tbl2.org_code = tbl3.acntm_name
+ ) v1,
+ tbl3.vendor_name not in (
+ select
+ tbl1.unit_name
+ from
+ tbl2
+ join tbl1 on tbl1.cur_unit_name = tbl2.org_name
+ where
+ tbl2.org_code = tbl3.acntm_name
+ ) v2
+ from
+ tbl3 order by 1,2,3,4,5;
+ """
+
+ sql "set disable_join_reorder=0;"
+ qt_test_right_semi_mark_join_no_null """
+ select
+ tbl3.id,
+ tbl3.acntm_name,
+ tbl3.vendor_name,
+ tbl3.vendor_name in (
+ select
+ tbl1.unit_name
+ from
+ tbl2
+ join tbl1 on tbl1.cur_unit_name = tbl2.org_name
+ where
+ tbl2.org_code = tbl3.acntm_name
+ and tbl1.unit_name is not null
+ ) v1,
+ tbl3.vendor_name not in (
+ select
+ tbl1.unit_name
+ from
+ tbl2
+ join tbl1 on tbl1.cur_unit_name = tbl2.org_name
+ where
+ tbl2.org_code = tbl3.acntm_name
+ and tbl1.unit_name is not null
+ ) v2
+ from
+ tbl3 order by 1,2,3,4,5;
+ """
+
+ sql "set disable_join_reorder=1;"
+ qt_test_right_semi_mark_join_no_null_2 """
+ select
+ tbl3.id,
+ tbl3.acntm_name,
+ tbl3.vendor_name,
+ tbl3.vendor_name in (
+ select
+ tbl1.unit_name
+ from
+ tbl2
+ join tbl1 on tbl1.cur_unit_name = tbl2.org_name
+ where
+ tbl2.org_code = tbl3.acntm_name
+ and tbl1.unit_name is not null
+ ) v1,
+ tbl3.vendor_name not in (
+ select
+ tbl1.unit_name
+ from
+ tbl2
+ join tbl1 on tbl1.cur_unit_name = tbl2.org_name
+ where
+ tbl2.org_code = tbl3.acntm_name
+ and tbl1.unit_name is not null
+ ) v2
+ from
+ tbl3 order by 1,2,3,4,5;
+ """
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]