This is an automated email from the ASF dual-hosted git repository.

starocean999 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 982e733ac8c [fix](nereids)keep at least one hash output slot when 
prune slots in hash join node (#47318)
982e733ac8c is described below

commit 982e733ac8c028c64558549e77b064e92837be80
Author: starocean999 <li...@selectdb.com>
AuthorDate: Mon Feb 10 14:50:47 2025 +0800

    [fix](nereids)keep at least one hash output slot when prune slots in hash 
join node (#47318)
    
    consider sql bellow:
    ```
    SELECT
        9
    FROM
        table_20_undef_partitions2_keys3_properties4_distributed_by5 AS 
tbl_alias2
    WHERE
        (
            NOT (
                tbl_alias2.col_int_undef_signed NOT IN (
                    SELECT
                        8
                    FROM
                        
table_50_undef_partitions2_keys3_properties4_distributed_by53
                )
                AND  '2023-12-12' IN ('2023-12-19')
            )
        );
    ```
    no columns from hash join node is needed, so the hash output slots are
    empty. But BE would keep all columns from both table when hash output
    slots are empty. So FE will keep at least one column in hash output
    slots to let BE happy
---
 .../glue/translator/PhysicalPlanTranslator.java    |  12 ++
 .../test_column_prune_in_hash_join.out             | Bin 0 -> 255 bytes
 .../test_column_prune_in_hash_join.groovy          | 131 +++++++++++++++++++++
 3 files changed, 143 insertions(+)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java
index 468f9c50496..91cef4aae53 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java
@@ -1963,6 +1963,7 @@ public class PhysicalPlanTranslator extends 
DefaultPlanVisitor<PlanFragment, Pla
             joinNode.setProjectList(projectionExprs);
             // prune the hashOutputSlotIds
             if (joinNode instanceof HashJoinNode) {
+                Set<SlotId> oldHashOutputSlotIds = 
Sets.newHashSet(((HashJoinNode) joinNode).getHashOutputSlotIds());
                 ((HashJoinNode) joinNode).getHashOutputSlotIds().clear();
                 Set<ExprId> requiredExprIds = Sets.newHashSet();
                 Set<SlotId> requiredOtherConjunctsSlotIdSet = 
Sets.newHashSet();
@@ -1986,6 +1987,17 @@ public class PhysicalPlanTranslator extends 
DefaultPlanVisitor<PlanFragment, Pla
                         ((HashJoinNode) 
joinNode).addSlotIdToHashOutputSlotIds(slotId);
                     }
                 }
+                if (((HashJoinNode) 
joinNode).getHashOutputSlotIds().isEmpty()) {
+                    // In FE, if all columns are pruned, hash output slots are 
empty.
+                    // On the contrary, BE will keep all columns if hash 
output slots are empty.
+                    // Currently BE will keep this behavior in order to be 
compatible with older planner.
+                    // So we have to workaround this in FE by keeping at least 
one slot in oldHashOutputSlotIds.
+                    // TODO: Remove this code when old planner is deleted and 
BE changes to be consistent with FE.
+                    for (SlotId slotId : oldHashOutputSlotIds) {
+                        ((HashJoinNode) 
joinNode).addSlotIdToHashOutputSlotIds(slotId);
+                        break;
+                    }
+                }
             }
             return inputFragment;
         }
diff --git 
a/regression-test/data/correctness_p0/test_column_prune_in_hash_join.out 
b/regression-test/data/correctness_p0/test_column_prune_in_hash_join.out
new file mode 100644
index 00000000000..9432b56120f
Binary files /dev/null and 
b/regression-test/data/correctness_p0/test_column_prune_in_hash_join.out differ
diff --git 
a/regression-test/suites/correctness_p0/test_column_prune_in_hash_join.groovy 
b/regression-test/suites/correctness_p0/test_column_prune_in_hash_join.groovy
new file mode 100644
index 00000000000..4fcfc8eff4b
--- /dev/null
+++ 
b/regression-test/suites/correctness_p0/test_column_prune_in_hash_join.groovy
@@ -0,0 +1,131 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_column_prune_in_hash_join") {
+    sql """
+        drop table if exists 
table_20_undef_partitions2_keys3_properties4_distributed_by5;
+    """
+    sql """
+        drop table if exists 
table_50_undef_partitions2_keys3_properties4_distributed_by53;
+    """
+    
+    sql """
+        create table 
table_20_undef_partitions2_keys3_properties4_distributed_by5 (
+        col_int_undef_signed int  null  ,
+        col_int_undef_signed_not_null int  not null  ,
+        col_date_undef_signed date  null  ,
+        col_date_undef_signed_not_null date  not null  ,
+        col_varchar_5__undef_signed varchar(5)  null  ,
+        col_varchar_5__undef_signed_not_null varchar(5)  not null  ,
+        pk int
+        ) engine=olap
+        DUPLICATE KEY(col_int_undef_signed)
+        PARTITION BY             RANGE(col_int_undef_signed) (
+                        PARTITION p0 VALUES LESS THAN ('4'),
+                        PARTITION p1 VALUES LESS THAN ('6'),
+                        PARTITION p2 VALUES LESS THAN ('7'),
+                        PARTITION p3 VALUES LESS THAN ('8'),
+                        PARTITION p4 VALUES LESS THAN ('10'),
+                        PARTITION p5 VALUES LESS THAN ('83647'),
+                        PARTITION p100 VALUES LESS THAN ('2147483647')
+                    )
+                
+        distributed by hash(pk) buckets 10
+        properties("replication_num" = "1");
+    """
+
+    sql """
+        create table 
table_50_undef_partitions2_keys3_properties4_distributed_by53 (
+        col_date_undef_signed date  null  ,
+        col_int_undef_signed int  null  ,
+        col_int_undef_signed_not_null int  not null  ,
+        col_date_undef_signed_not_null date  not null  ,
+        col_varchar_5__undef_signed varchar(5)  null  ,
+        col_varchar_5__undef_signed_not_null varchar(5)  not null  ,
+        pk int
+        ) engine=olap
+        DUPLICATE KEY(col_date_undef_signed, col_int_undef_signed)
+        PARTITION BY             RANGE(col_date_undef_signed) (
+                        PARTITION p0 VALUES LESS THAN ('2023-12-11'),
+                        PARTITION p1 VALUES LESS THAN ('2023-12-15'),
+                        PARTITION p2 VALUES LESS THAN ('2023-12-16'),
+                        PARTITION p3 VALUES LESS THAN ('2023-12-17'),
+                        PARTITION p4 VALUES LESS THAN ('2024-01-18'),
+                        PARTITION p5 VALUES LESS THAN ('2026-02-18'),
+                        PARTITION p100 VALUES LESS THAN ('9999-12-31')
+                    )
+                
+        distributed by hash(pk) buckets 10
+        properties("replication_num" = "1");
+    """
+
+    sql """insert into 
table_20_undef_partitions2_keys3_properties4_distributed_by5(pk,col_int_undef_signed,col_int_undef_signed_not_null,col_date_undef_signed,col_date_undef_signed_not_null,col_varchar_5__undef_signed,col_varchar_5__undef_signed_not_null)
 values 
(0,6,7625942,null,'2023-12-13','e','c'),(1,4,9,'2023-12-12','2023-12-12','m','w'),(2,null,-17559,'2023-12-10','2023-12-11','p','t'),(3,-15839,8,'2023-12-12','2023-12-12','h','d'),(4,null,5578188,'2023-12-17','2023-12-14','c','i'
 [...]
+    sql """insert into 
table_50_undef_partitions2_keys3_properties4_distributed_by53(pk,col_int_undef_signed,col_int_undef_signed_not_null,col_date_undef_signed,col_date_undef_signed_not_null,col_varchar_5__undef_signed,col_varchar_5__undef_signed_not_null)
 values 
(0,2,4,'2023-12-17','2023-12-14','q','t'),(1,3,4,'2023-12-16','2023-12-14','s','j'),(2,-1553,9,'2023-12-17','2004-01-22','w','x'),(3,7,-15007,'2023-12-09','2023-12-14','i','y'),(4,10788,16430,'2023-12-11','2006-08-11','g','f'),
 [...]
+
+    qt_sql_prune_all_mark_join """
+        SELECT
+            9
+        FROM
+            table_20_undef_partitions2_keys3_properties4_distributed_by5 AS 
tbl_alias2
+        WHERE
+            (
+                NOT (
+                    tbl_alias2.col_int_undef_signed NOT IN (
+                        SELECT
+                            8
+                        FROM
+                            
table_50_undef_partitions2_keys3_properties4_distributed_by53
+                    )
+                    AND  '2023-12-12' IN ('2023-12-19')
+                )
+            );
+    """
+    
+    qt_sql_prune_other_conjuncts """
+            SELECT
+                9
+            FROM
+                table_20_undef_partitions2_keys3_properties4_distributed_by5 
AS tbl_alias2
+            WHERE
+                (
+                    NOT (
+                        tbl_alias2.col_int_undef_signed NOT IN (
+                            SELECT
+                                8
+                            FROM
+                                
table_50_undef_partitions2_keys3_properties4_distributed_by53
+                        )
+                    )
+                );
+    """
+
+    qt_sql_prune_all """
+            SELECT
+                9
+            FROM
+                table_20_undef_partitions2_keys3_properties4_distributed_by5 
AS tbl_alias2
+            WHERE
+                (
+                    tbl_alias2.col_int_undef_signed NOT IN (
+                        SELECT
+                            8
+                        FROM
+                            
table_50_undef_partitions2_keys3_properties4_distributed_by53
+                    )
+                );
+    """
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to