This is an automated email from the ASF dual-hosted git repository.

mrhhsg pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 81d77fb05c9 [fix](join) Should not use the build block's size to 
resize mark_join_flags (#50993)
81d77fb05c9 is described below

commit 81d77fb05c976327d5df7893277eae3163846062
Author: Jerry Hu <[email protected]>
AuthorDate: Tue May 20 17:35:27 2025 +0800

    [fix](join) Should not use the build block's size to resize mark_join_flags 
(#50993)
    
    ### What problem does this PR solve?
    
    Introduced by #51050
    
    The build block maybe be `clear_column_mem_not_keep` in build phase when
    the operator is closed.
    
    ```cpp
    Status HashJoinBuildSinkLocalState::close(RuntimeState* state, Status 
exec_status) {
        if (_closed) {
            return Status::OK();
        }
        auto& p = _parent->cast<HashJoinBuildSinkOperatorX>();
        Defer defer {[&]() {
            if (!_should_build_hash_table) {
                return;
            }
            // The build side hash key column maybe no need output, but we need 
to keep the column in block
            // because it is used to compare with probe side hash key column
    
            if (p._should_keep_hash_key_column && _build_col_ids.size() == 1) {
                p._should_keep_column_flags[_build_col_ids[0]] = true;
            }
    
            if (_shared_state->build_block) {
                // release the memory of unused column in probe stage
                
_shared_state->build_block->clear_column_mem_not_keep(p._should_keep_column_flags,
                                                                      
p._use_shared_hash_table);
            }
    
            if (p._use_shared_hash_table) {
                std::unique_lock lock(p._mutex);
                p._signaled = true;
                for (auto& dep : _shared_state->sink_deps) {
                    dep->set_ready();
                }
                for (auto& dep : p._finish_dependencies) {
                    dep->set_ready();
                }
            }
        }};
    ```
    
    ```
    *** Aborted at 1747343165 (unix time) try "date -d @1747343165" if you are 
using GNU date ***
    *** Current BE git commitID: e7a3e78b97 ***
    *** SIGSEGV address not mapped to object (@0x1) received by PID 7474 (TID 
9641 OR 0x7f3f8c0e5640) from PID 1; stack trace: ***
     0# doris::signal::(anonymous namespace)::FailureSignalHandler(int, 
siginfo_t*, void*) at /root/doris/be/src/common/signal_handler.h:421
     1# PosixSignals::chained_handler(int, siginfo*, void*) [clone .part.0] in 
/usr/lib/jvm/java-17-openjdk-amd64/lib/server/libjvm.so
     2# JVM_handle_linux_signal in 
/usr/lib/jvm/java-17-openjdk-amd64/lib/server/libjvm.so
     3# 0x00007F4368F76520 in /lib/x86_64-linux-gnu/libc.so.6
     4# doris::Status doris::pipeline::ProcessHashTableProbe<7>::finish_probing 
> > >(doris::vectorized::MethodKeysFixed > >&, 
doris::vectorized::MutableBlock&, doris::vectorized::Block*, bool*, bool) at 
/root/doris/be/src/pipeline/exec/join/process_hash_table_probe_impl.h:738
     5# std::__detail::__variant::__gen_vtable_impl 
(*)(doris::pipeline::HashJoinProbeOperatorX::pull(doris::RuntimeState*, 
doris::vectorized::Block*, bool*) const::$_1&&, std::variant > >, 
doris::vectorized::MethodOneNumber > >, doris::vectorized::MethodOneNumber > >, 
doris::vectorized::MethodOneNumber > >, doris::vectorized::MethodOneNumber > >, 
doris::vectorized::MethodOneNumber, doris::JoinHashTable, HashCRC32 > > >, 
doris::vectorized::MethodOneNumber, doris::JoinHashTable, HashCRC32  [...]
     6# doris::pipeline::HashJoinProbeOperatorX::pull(doris::RuntimeState*, 
doris::vectorized::Block*, bool*) const at 
/root/doris/be/src/pipeline/exec/hashjoin_probe_operator.cpp:281
     7# doris::pipeline::StatefulOperatorX::get_block(doris::RuntimeState*, 
doris::vectorized::Block*, bool*) at 
/root/doris/be/src/pipeline/exec/operator.cpp:670
     8# 
doris::pipeline::OperatorXBase::get_block_after_projects(doris::RuntimeState*, 
doris::vectorized::Block*, bool*) at 
/root/doris/be/src/pipeline/exec/operator.cpp:381
     9# doris::pipeline::PipelineTask::execute(bool*) in 
/mnt/hdd01/ci/doris-deploy-master-local/be/lib/doris_be
    10# doris::pipeline::TaskScheduler::_do_work(int) at 
/root/doris/be/src/pipeline/task_scheduler.cpp:144
    11# doris::ThreadPool::dispatch_thread() at 
/root/doris/be/src/util/threadpool.cpp:622
    12# doris::Thread::supervise_thread(void*) at 
/root/doris/be/src/util/thread.cpp:469
    13# start_thread at ./nptl/pthread_create.c:442
    14# 0x00007F436905A850 at ../sysdeps/unix/sysv/linux/x86_64/clone3.S:83
    ```
    
    Related PR: #xxx
    
    Problem Summary:
    
    ### Release note
    
    None
    
    ### Check List (For Author)
    
    - Test <!-- At least one of them must be included. -->
        - [ ] Regression test
        - [ ] Unit Test
        - [ ] Manual test (add detailed scripts or steps below)
        - [ ] No need to test or manual test. Explain why:
    - [ ] This is a refactor/code format and no logic has been changed.
            - [ ] Previous test can cover this change.
            - [ ] No code files have been changed.
            - [ ] Other reason <!-- Add your reason?  -->
    
    - Behavior changed:
        - [ ] No.
        - [ ] Yes. <!-- Explain the behavior change -->
    
    - Does this need documentation?
        - [ ] No.
    - [ ] Yes. <!-- Add document PR link here. eg:
    https://github.com/apache/doris-website/pull/1214 -->
    
    ### Check List (For Reviewer who merge this PR)
    
    - [ ] Confirm the release note
    - [ ] Confirm test cases
    - [ ] Confirm document
    - [ ] Add branch pick label <!-- Add branch pick label that this PR
    should merge into -->
---
 .../exec/join/process_hash_table_probe_impl.h      |  20 ++--
 .../join/mark_join/right_semi_mark_join.out        | Bin 0 -> 568 bytes
 .../join/mark_join/right_semi_mark_join.groovy     | 104 +++++++++++++++++++++
 3 files changed, 115 insertions(+), 9 deletions(-)

diff --git a/be/src/pipeline/exec/join/process_hash_table_probe_impl.h 
b/be/src/pipeline/exec/join/process_hash_table_probe_impl.h
index 62fa5505d81..29c24205972 100644
--- a/be/src/pipeline/exec/join/process_hash_table_probe_impl.h
+++ b/be/src/pipeline/exec/join/process_hash_table_probe_impl.h
@@ -285,6 +285,14 @@ Status 
ProcessHashTableProbe<JoinOpType>::process(HashTableType& hash_table_ctx,
                  JoinOpType == TJoinOp::NULL_AWARE_LEFT_SEMI_JOIN) &&
                 hash_table_ctx.hash_table
                         ->empty_build_side(); // empty build side will return 
false to instead null
+
+        if constexpr (JoinOpType == TJoinOp::RIGHT_SEMI_JOIN ||
+                      JoinOpType == TJoinOp::RIGHT_ANTI_JOIN) {
+            if (mark_join_flags.empty()) {
+                mark_join_flags.resize(hash_table_ctx.hash_table->size(), 0);
+            }
+        }
+
         return do_mark_join_conjuncts(output_block, ignore_null_map ? nullptr 
: null_map);
     } else if (_have_other_join_conjunct) {
         return do_other_join_conjuncts(output_block, 
hash_table_ctx.hash_table->get_visited());
@@ -491,12 +499,6 @@ Status 
ProcessHashTableProbe<JoinOpType>::do_mark_join_conjuncts(vectorized::Blo
         }
     }
 
-    if constexpr (is_right_half_join) {
-        if (mark_join_flags.empty() && _build_block != nullptr) {
-            mark_join_flags.resize(_build_block->rows(), 0);
-        }
-    }
-
     auto filter_column = vectorized::ColumnUInt8::create(row_count, 0);
     auto* __restrict filter_map = filter_column->get_data().data();
     for (size_t i = 0; i != row_count; ++i) {
@@ -547,7 +549,7 @@ Status 
ProcessHashTableProbe<JoinOpType>::do_mark_join_conjuncts(vectorized::Blo
             }
         }
         // For right semi/anti join, no rows will be output in probe phase.
-        output_block->clear_column_data();
+        output_block->clear();
         return Status::OK();
     } else {
         if constexpr (is_anti_join) {
@@ -721,8 +723,8 @@ Status 
ProcessHashTableProbe<JoinOpType>::finish_probing(HashTableType& hash_tab
         if constexpr (JoinOpType == TJoinOp::RIGHT_ANTI_JOIN ||
                       JoinOpType == TJoinOp::RIGHT_SEMI_JOIN) {
             if (is_mark_join) {
-                if (mark_join_flags.empty() && _build_block != nullptr) {
-                    mark_join_flags.resize(_build_block->rows(), 0);
+                if (mark_join_flags.empty()) {
+                    mark_join_flags.resize(hash_table_ctx.hash_table->size(), 
0);
                 }
 
                 // mark column is nullable
diff --git 
a/regression-test/data/query_p0/join/mark_join/right_semi_mark_join.out 
b/regression-test/data/query_p0/join/mark_join/right_semi_mark_join.out
new file mode 100644
index 00000000000..e00e19be11e
Binary files /dev/null and 
b/regression-test/data/query_p0/join/mark_join/right_semi_mark_join.out differ
diff --git 
a/regression-test/suites/query_p0/join/mark_join/right_semi_mark_join.groovy 
b/regression-test/suites/query_p0/join/mark_join/right_semi_mark_join.groovy
new file mode 100644
index 00000000000..3557475cdd2
--- /dev/null
+++ b/regression-test/suites/query_p0/join/mark_join/right_semi_mark_join.groovy
@@ -0,0 +1,104 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+
+suite("right_semi_mark_join") {
+    sql "drop table if exists tbl1;"
+    sql "drop table if exists tbl2;"
+    sql "drop table if exists tbl3;"
+
+    sql """
+        create table tbl1 (pk int, col1 bigint, col2 bigint) engine = olap 
DUPLICATE KEY(pk) distributed by hash(pk) buckets 10 
properties("replication_num" = "1");
+    """
+
+    sql """
+        insert into
+            tbl1(pk, col1, col2)
+        values
+            (0, null, 18332),  (1, 788547, null), (2, 4644959, -56),  (3, 
8364628, 72),  (4, null, -5581),
+            (5, 2344024, -62), (6, -2689177, 22979),  (7, 1320, -41), (8, 
null, -54),  (9, 12, -6236),
+            (10, -8321648, null), (11, 153691, null), (12, -8056, null), (13, 
-12, -2343514), (14, -35, -3361960);
+    """
+
+    sql """
+        create table tbl2 (
+            pk int, col1 bigint, col2 bigint
+        ) engine = olap 
+        distributed by hash(pk) buckets 4
+        properties("replication_num" = "1");
+    """
+
+    sql """
+        insert into
+            tbl2(pk, col1, col2)
+        values
+            (0, 108, 31161), (1, 1479175, 6764263), (2, 110, 25), (3, 110, 
-18656), (4, null, -51),
+            (5, 21, 27), (6, -6950217, 1585978), (7, null, null), (8, null, 
3453467),  (9, null, -6701140);
+    """
+    
+    sql """
+        create table tbl3 (
+            pk int, col1 bigint, col2 bigint, col3 bigint
+        ) engine = olap 
+        DUPLICATE KEY(pk) distributed by hash(pk) buckets 10
+        properties("replication_num" = "1");
+    """
+
+    sql """
+        insert into
+            tbl3(pk, col1, col2)
+        values
+            (0, 55, -58), (1, 49, 29792), (2, 95, 32361),  (3, 31243, -27428), 
(4, -27400, null),
+            (5, 31243, null), (6, null, -27428), (7, null, 7), (8, 31243, 
-21951), (9, 13186, 24466),
+            (10, null, -8), (11, null, null), (12, -18, 32361), (13, null, 
-18), (14, 21681, 14079),
+            (15, 31241, -17653), (16, 5825, 13559), (17, null, -10508), (18, 
null, 20682), (19, 31243, -98),
+            (73, -32480, 24424), (74, 31, -27428), (75, 31243, -718), (76, 
null, 20822), (77, 31243, -27428),
+            (78, -15934, null), (79, 78, -27428), (80, 8572, -27428), (81, 
31243, 4077), (82, null, 114),
+            (83, 10, -71), (84, -32489, 32361), (85, null, null), (86, -22984, 
32361), (87, 26607, -27428),
+            (5, 31243, null), (6, null, -27428), (7, null, 7), (8, 31243, 
-21951), (9, 13186, 24466),
+            (10, null, -8), (11, null, null), (12, -18, 32361), (13, null, 
-18), (14, 21681, 14079),
+            (15, 31241, -17653), (16, 5825, 13559), (17, null, -10508), (18, 
null, 20682), (19, 31243, -98),
+            (73, -32480, 24424), (74, 31, -27428), (75, 31243, -718), (76, 
null, 20822), (77, 31243, -27428),
+            (78, -15934, null), (79, 78, -27428), (80, 8572, -27428), (81, 
31243, 4077), (82, null, 114),
+            (83, 10, -71), (84, -32489, 32361), (85, null, null), (86, -22984, 
32361), (87, 26607, -27428),
+            (10, null, -8), (11, null, null), (12, -18, 32361), (13, null, 
-18), (14, 21681, 14079),
+            (15, 31241, -17653), (16, 5825, 13559), (17, null, -10508), (18, 
null, 20682), (19, 31243, -98),
+            (73, -32480, 24424), (74, 31, -27428), (75, 31243, -718), (76, 
null, 20822), (77, 31243, -27428),
+            (78, -15934, null), (79, 78, -27428), (80, 8572, -27428), (81, 
31243, 4077), (82, null, 114),
+            (83, 10, -71), (84, -32489, 32361), (85, null, null), (86, -22984, 
32361), (87, 26607, -27428);
+    """
+
+    qt_test """
+        SELECT
+            T1.pk AS C1,
+            T1.col2 AS C2
+        FROM
+            tbl1 AS T1 FULL
+            OUTER JOIN tbl2 AS T2 ON T1.col1 <= T2.col2
+            OR T2.col1 IN (
+                SELECT
+                    T3.col2
+                FROM
+                    tbl3 AS T3
+                WHERE
+                    T2.col2 = T3.col1
+            )
+        ORDER BY
+            C1,
+            C2 DESC;
+    """
+}
\ No newline at end of file


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to