This is an automated email from the ASF dual-hosted git repository.
mrhhsg pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 81d77fb05c9 [fix](join) Should not use the build block's size to
resize mark_join_flags (#50993)
81d77fb05c9 is described below
commit 81d77fb05c976327d5df7893277eae3163846062
Author: Jerry Hu <[email protected]>
AuthorDate: Tue May 20 17:35:27 2025 +0800
[fix](join) Should not use the build block's size to resize mark_join_flags
(#50993)
### What problem does this PR solve?
Introduced by #51050
The build block maybe be `clear_column_mem_not_keep` in build phase when
the operator is closed.
```cpp
Status HashJoinBuildSinkLocalState::close(RuntimeState* state, Status
exec_status) {
if (_closed) {
return Status::OK();
}
auto& p = _parent->cast<HashJoinBuildSinkOperatorX>();
Defer defer {[&]() {
if (!_should_build_hash_table) {
return;
}
// The build side hash key column maybe no need output, but we need
to keep the column in block
// because it is used to compare with probe side hash key column
if (p._should_keep_hash_key_column && _build_col_ids.size() == 1) {
p._should_keep_column_flags[_build_col_ids[0]] = true;
}
if (_shared_state->build_block) {
// release the memory of unused column in probe stage
_shared_state->build_block->clear_column_mem_not_keep(p._should_keep_column_flags,
p._use_shared_hash_table);
}
if (p._use_shared_hash_table) {
std::unique_lock lock(p._mutex);
p._signaled = true;
for (auto& dep : _shared_state->sink_deps) {
dep->set_ready();
}
for (auto& dep : p._finish_dependencies) {
dep->set_ready();
}
}
}};
```
```
*** Aborted at 1747343165 (unix time) try "date -d @1747343165" if you are
using GNU date ***
*** Current BE git commitID: e7a3e78b97 ***
*** SIGSEGV address not mapped to object (@0x1) received by PID 7474 (TID
9641 OR 0x7f3f8c0e5640) from PID 1; stack trace: ***
0# doris::signal::(anonymous namespace)::FailureSignalHandler(int,
siginfo_t*, void*) at /root/doris/be/src/common/signal_handler.h:421
1# PosixSignals::chained_handler(int, siginfo*, void*) [clone .part.0] in
/usr/lib/jvm/java-17-openjdk-amd64/lib/server/libjvm.so
2# JVM_handle_linux_signal in
/usr/lib/jvm/java-17-openjdk-amd64/lib/server/libjvm.so
3# 0x00007F4368F76520 in /lib/x86_64-linux-gnu/libc.so.6
4# doris::Status doris::pipeline::ProcessHashTableProbe<7>::finish_probing
> > >(doris::vectorized::MethodKeysFixed > >&,
doris::vectorized::MutableBlock&, doris::vectorized::Block*, bool*, bool) at
/root/doris/be/src/pipeline/exec/join/process_hash_table_probe_impl.h:738
5# std::__detail::__variant::__gen_vtable_impl
(*)(doris::pipeline::HashJoinProbeOperatorX::pull(doris::RuntimeState*,
doris::vectorized::Block*, bool*) const::$_1&&, std::variant > >,
doris::vectorized::MethodOneNumber > >, doris::vectorized::MethodOneNumber > >,
doris::vectorized::MethodOneNumber > >, doris::vectorized::MethodOneNumber > >,
doris::vectorized::MethodOneNumber, doris::JoinHashTable, HashCRC32 > > >,
doris::vectorized::MethodOneNumber, doris::JoinHashTable, HashCRC32 [...]
6# doris::pipeline::HashJoinProbeOperatorX::pull(doris::RuntimeState*,
doris::vectorized::Block*, bool*) const at
/root/doris/be/src/pipeline/exec/hashjoin_probe_operator.cpp:281
7# doris::pipeline::StatefulOperatorX::get_block(doris::RuntimeState*,
doris::vectorized::Block*, bool*) at
/root/doris/be/src/pipeline/exec/operator.cpp:670
8#
doris::pipeline::OperatorXBase::get_block_after_projects(doris::RuntimeState*,
doris::vectorized::Block*, bool*) at
/root/doris/be/src/pipeline/exec/operator.cpp:381
9# doris::pipeline::PipelineTask::execute(bool*) in
/mnt/hdd01/ci/doris-deploy-master-local/be/lib/doris_be
10# doris::pipeline::TaskScheduler::_do_work(int) at
/root/doris/be/src/pipeline/task_scheduler.cpp:144
11# doris::ThreadPool::dispatch_thread() at
/root/doris/be/src/util/threadpool.cpp:622
12# doris::Thread::supervise_thread(void*) at
/root/doris/be/src/util/thread.cpp:469
13# start_thread at ./nptl/pthread_create.c:442
14# 0x00007F436905A850 at ../sysdeps/unix/sysv/linux/x86_64/clone3.S:83
```
Related PR: #xxx
Problem Summary:
### Release note
None
### Check List (For Author)
- Test <!-- At least one of them must be included. -->
- [ ] Regression test
- [ ] Unit Test
- [ ] Manual test (add detailed scripts or steps below)
- [ ] No need to test or manual test. Explain why:
- [ ] This is a refactor/code format and no logic has been changed.
- [ ] Previous test can cover this change.
- [ ] No code files have been changed.
- [ ] Other reason <!-- Add your reason? -->
- Behavior changed:
- [ ] No.
- [ ] Yes. <!-- Explain the behavior change -->
- Does this need documentation?
- [ ] No.
- [ ] Yes. <!-- Add document PR link here. eg:
https://github.com/apache/doris-website/pull/1214 -->
### Check List (For Reviewer who merge this PR)
- [ ] Confirm the release note
- [ ] Confirm test cases
- [ ] Confirm document
- [ ] Add branch pick label <!-- Add branch pick label that this PR
should merge into -->
---
.../exec/join/process_hash_table_probe_impl.h | 20 ++--
.../join/mark_join/right_semi_mark_join.out | Bin 0 -> 568 bytes
.../join/mark_join/right_semi_mark_join.groovy | 104 +++++++++++++++++++++
3 files changed, 115 insertions(+), 9 deletions(-)
diff --git a/be/src/pipeline/exec/join/process_hash_table_probe_impl.h
b/be/src/pipeline/exec/join/process_hash_table_probe_impl.h
index 62fa5505d81..29c24205972 100644
--- a/be/src/pipeline/exec/join/process_hash_table_probe_impl.h
+++ b/be/src/pipeline/exec/join/process_hash_table_probe_impl.h
@@ -285,6 +285,14 @@ Status
ProcessHashTableProbe<JoinOpType>::process(HashTableType& hash_table_ctx,
JoinOpType == TJoinOp::NULL_AWARE_LEFT_SEMI_JOIN) &&
hash_table_ctx.hash_table
->empty_build_side(); // empty build side will return
false to instead null
+
+ if constexpr (JoinOpType == TJoinOp::RIGHT_SEMI_JOIN ||
+ JoinOpType == TJoinOp::RIGHT_ANTI_JOIN) {
+ if (mark_join_flags.empty()) {
+ mark_join_flags.resize(hash_table_ctx.hash_table->size(), 0);
+ }
+ }
+
return do_mark_join_conjuncts(output_block, ignore_null_map ? nullptr
: null_map);
} else if (_have_other_join_conjunct) {
return do_other_join_conjuncts(output_block,
hash_table_ctx.hash_table->get_visited());
@@ -491,12 +499,6 @@ Status
ProcessHashTableProbe<JoinOpType>::do_mark_join_conjuncts(vectorized::Blo
}
}
- if constexpr (is_right_half_join) {
- if (mark_join_flags.empty() && _build_block != nullptr) {
- mark_join_flags.resize(_build_block->rows(), 0);
- }
- }
-
auto filter_column = vectorized::ColumnUInt8::create(row_count, 0);
auto* __restrict filter_map = filter_column->get_data().data();
for (size_t i = 0; i != row_count; ++i) {
@@ -547,7 +549,7 @@ Status
ProcessHashTableProbe<JoinOpType>::do_mark_join_conjuncts(vectorized::Blo
}
}
// For right semi/anti join, no rows will be output in probe phase.
- output_block->clear_column_data();
+ output_block->clear();
return Status::OK();
} else {
if constexpr (is_anti_join) {
@@ -721,8 +723,8 @@ Status
ProcessHashTableProbe<JoinOpType>::finish_probing(HashTableType& hash_tab
if constexpr (JoinOpType == TJoinOp::RIGHT_ANTI_JOIN ||
JoinOpType == TJoinOp::RIGHT_SEMI_JOIN) {
if (is_mark_join) {
- if (mark_join_flags.empty() && _build_block != nullptr) {
- mark_join_flags.resize(_build_block->rows(), 0);
+ if (mark_join_flags.empty()) {
+ mark_join_flags.resize(hash_table_ctx.hash_table->size(),
0);
}
// mark column is nullable
diff --git
a/regression-test/data/query_p0/join/mark_join/right_semi_mark_join.out
b/regression-test/data/query_p0/join/mark_join/right_semi_mark_join.out
new file mode 100644
index 00000000000..e00e19be11e
Binary files /dev/null and
b/regression-test/data/query_p0/join/mark_join/right_semi_mark_join.out differ
diff --git
a/regression-test/suites/query_p0/join/mark_join/right_semi_mark_join.groovy
b/regression-test/suites/query_p0/join/mark_join/right_semi_mark_join.groovy
new file mode 100644
index 00000000000..3557475cdd2
--- /dev/null
+++ b/regression-test/suites/query_p0/join/mark_join/right_semi_mark_join.groovy
@@ -0,0 +1,104 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+
+suite("right_semi_mark_join") {
+ sql "drop table if exists tbl1;"
+ sql "drop table if exists tbl2;"
+ sql "drop table if exists tbl3;"
+
+ sql """
+ create table tbl1 (pk int, col1 bigint, col2 bigint) engine = olap
DUPLICATE KEY(pk) distributed by hash(pk) buckets 10
properties("replication_num" = "1");
+ """
+
+ sql """
+ insert into
+ tbl1(pk, col1, col2)
+ values
+ (0, null, 18332), (1, 788547, null), (2, 4644959, -56), (3,
8364628, 72), (4, null, -5581),
+ (5, 2344024, -62), (6, -2689177, 22979), (7, 1320, -41), (8,
null, -54), (9, 12, -6236),
+ (10, -8321648, null), (11, 153691, null), (12, -8056, null), (13,
-12, -2343514), (14, -35, -3361960);
+ """
+
+ sql """
+ create table tbl2 (
+ pk int, col1 bigint, col2 bigint
+ ) engine = olap
+ distributed by hash(pk) buckets 4
+ properties("replication_num" = "1");
+ """
+
+ sql """
+ insert into
+ tbl2(pk, col1, col2)
+ values
+ (0, 108, 31161), (1, 1479175, 6764263), (2, 110, 25), (3, 110,
-18656), (4, null, -51),
+ (5, 21, 27), (6, -6950217, 1585978), (7, null, null), (8, null,
3453467), (9, null, -6701140);
+ """
+
+ sql """
+ create table tbl3 (
+ pk int, col1 bigint, col2 bigint, col3 bigint
+ ) engine = olap
+ DUPLICATE KEY(pk) distributed by hash(pk) buckets 10
+ properties("replication_num" = "1");
+ """
+
+ sql """
+ insert into
+ tbl3(pk, col1, col2)
+ values
+ (0, 55, -58), (1, 49, 29792), (2, 95, 32361), (3, 31243, -27428),
(4, -27400, null),
+ (5, 31243, null), (6, null, -27428), (7, null, 7), (8, 31243,
-21951), (9, 13186, 24466),
+ (10, null, -8), (11, null, null), (12, -18, 32361), (13, null,
-18), (14, 21681, 14079),
+ (15, 31241, -17653), (16, 5825, 13559), (17, null, -10508), (18,
null, 20682), (19, 31243, -98),
+ (73, -32480, 24424), (74, 31, -27428), (75, 31243, -718), (76,
null, 20822), (77, 31243, -27428),
+ (78, -15934, null), (79, 78, -27428), (80, 8572, -27428), (81,
31243, 4077), (82, null, 114),
+ (83, 10, -71), (84, -32489, 32361), (85, null, null), (86, -22984,
32361), (87, 26607, -27428),
+ (5, 31243, null), (6, null, -27428), (7, null, 7), (8, 31243,
-21951), (9, 13186, 24466),
+ (10, null, -8), (11, null, null), (12, -18, 32361), (13, null,
-18), (14, 21681, 14079),
+ (15, 31241, -17653), (16, 5825, 13559), (17, null, -10508), (18,
null, 20682), (19, 31243, -98),
+ (73, -32480, 24424), (74, 31, -27428), (75, 31243, -718), (76,
null, 20822), (77, 31243, -27428),
+ (78, -15934, null), (79, 78, -27428), (80, 8572, -27428), (81,
31243, 4077), (82, null, 114),
+ (83, 10, -71), (84, -32489, 32361), (85, null, null), (86, -22984,
32361), (87, 26607, -27428),
+ (10, null, -8), (11, null, null), (12, -18, 32361), (13, null,
-18), (14, 21681, 14079),
+ (15, 31241, -17653), (16, 5825, 13559), (17, null, -10508), (18,
null, 20682), (19, 31243, -98),
+ (73, -32480, 24424), (74, 31, -27428), (75, 31243, -718), (76,
null, 20822), (77, 31243, -27428),
+ (78, -15934, null), (79, 78, -27428), (80, 8572, -27428), (81,
31243, 4077), (82, null, 114),
+ (83, 10, -71), (84, -32489, 32361), (85, null, null), (86, -22984,
32361), (87, 26607, -27428);
+ """
+
+ qt_test """
+ SELECT
+ T1.pk AS C1,
+ T1.col2 AS C2
+ FROM
+ tbl1 AS T1 FULL
+ OUTER JOIN tbl2 AS T2 ON T1.col1 <= T2.col2
+ OR T2.col1 IN (
+ SELECT
+ T3.col2
+ FROM
+ tbl3 AS T3
+ WHERE
+ T2.col2 = T3.col1
+ )
+ ORDER BY
+ C1,
+ C2 DESC;
+ """
+}
\ No newline at end of file
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]