This is an automated email from the ASF dual-hosted git repository. starocean999 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 982e733ac8c [fix](nereids)keep at least one hash output slot when prune slots in hash join node (#47318) 982e733ac8c is described below commit 982e733ac8c028c64558549e77b064e92837be80 Author: starocean999 <li...@selectdb.com> AuthorDate: Mon Feb 10 14:50:47 2025 +0800 [fix](nereids)keep at least one hash output slot when prune slots in hash join node (#47318) consider sql bellow: ``` SELECT 9 FROM table_20_undef_partitions2_keys3_properties4_distributed_by5 AS tbl_alias2 WHERE ( NOT ( tbl_alias2.col_int_undef_signed NOT IN ( SELECT 8 FROM table_50_undef_partitions2_keys3_properties4_distributed_by53 ) AND '2023-12-12' IN ('2023-12-19') ) ); ``` no columns from hash join node is needed, so the hash output slots are empty. But BE would keep all columns from both table when hash output slots are empty. So FE will keep at least one column in hash output slots to let BE happy --- .../glue/translator/PhysicalPlanTranslator.java | 12 ++ .../test_column_prune_in_hash_join.out | Bin 0 -> 255 bytes .../test_column_prune_in_hash_join.groovy | 131 +++++++++++++++++++++ 3 files changed, 143 insertions(+) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java index 468f9c50496..91cef4aae53 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java @@ -1963,6 +1963,7 @@ public class PhysicalPlanTranslator extends DefaultPlanVisitor<PlanFragment, Pla joinNode.setProjectList(projectionExprs); // prune the hashOutputSlotIds if (joinNode instanceof HashJoinNode) { + Set<SlotId> oldHashOutputSlotIds = Sets.newHashSet(((HashJoinNode) joinNode).getHashOutputSlotIds()); ((HashJoinNode) joinNode).getHashOutputSlotIds().clear(); Set<ExprId> requiredExprIds = Sets.newHashSet(); Set<SlotId> requiredOtherConjunctsSlotIdSet = Sets.newHashSet(); @@ -1986,6 +1987,17 @@ public class PhysicalPlanTranslator extends DefaultPlanVisitor<PlanFragment, Pla ((HashJoinNode) joinNode).addSlotIdToHashOutputSlotIds(slotId); } } + if (((HashJoinNode) joinNode).getHashOutputSlotIds().isEmpty()) { + // In FE, if all columns are pruned, hash output slots are empty. + // On the contrary, BE will keep all columns if hash output slots are empty. + // Currently BE will keep this behavior in order to be compatible with older planner. + // So we have to workaround this in FE by keeping at least one slot in oldHashOutputSlotIds. + // TODO: Remove this code when old planner is deleted and BE changes to be consistent with FE. + for (SlotId slotId : oldHashOutputSlotIds) { + ((HashJoinNode) joinNode).addSlotIdToHashOutputSlotIds(slotId); + break; + } + } } return inputFragment; } diff --git a/regression-test/data/correctness_p0/test_column_prune_in_hash_join.out b/regression-test/data/correctness_p0/test_column_prune_in_hash_join.out new file mode 100644 index 00000000000..9432b56120f Binary files /dev/null and b/regression-test/data/correctness_p0/test_column_prune_in_hash_join.out differ diff --git a/regression-test/suites/correctness_p0/test_column_prune_in_hash_join.groovy b/regression-test/suites/correctness_p0/test_column_prune_in_hash_join.groovy new file mode 100644 index 00000000000..4fcfc8eff4b --- /dev/null +++ b/regression-test/suites/correctness_p0/test_column_prune_in_hash_join.groovy @@ -0,0 +1,131 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_column_prune_in_hash_join") { + sql """ + drop table if exists table_20_undef_partitions2_keys3_properties4_distributed_by5; + """ + sql """ + drop table if exists table_50_undef_partitions2_keys3_properties4_distributed_by53; + """ + + sql """ + create table table_20_undef_partitions2_keys3_properties4_distributed_by5 ( + col_int_undef_signed int null , + col_int_undef_signed_not_null int not null , + col_date_undef_signed date null , + col_date_undef_signed_not_null date not null , + col_varchar_5__undef_signed varchar(5) null , + col_varchar_5__undef_signed_not_null varchar(5) not null , + pk int + ) engine=olap + DUPLICATE KEY(col_int_undef_signed) + PARTITION BY RANGE(col_int_undef_signed) ( + PARTITION p0 VALUES LESS THAN ('4'), + PARTITION p1 VALUES LESS THAN ('6'), + PARTITION p2 VALUES LESS THAN ('7'), + PARTITION p3 VALUES LESS THAN ('8'), + PARTITION p4 VALUES LESS THAN ('10'), + PARTITION p5 VALUES LESS THAN ('83647'), + PARTITION p100 VALUES LESS THAN ('2147483647') + ) + + distributed by hash(pk) buckets 10 + properties("replication_num" = "1"); + """ + + sql """ + create table table_50_undef_partitions2_keys3_properties4_distributed_by53 ( + col_date_undef_signed date null , + col_int_undef_signed int null , + col_int_undef_signed_not_null int not null , + col_date_undef_signed_not_null date not null , + col_varchar_5__undef_signed varchar(5) null , + col_varchar_5__undef_signed_not_null varchar(5) not null , + pk int + ) engine=olap + DUPLICATE KEY(col_date_undef_signed, col_int_undef_signed) + PARTITION BY RANGE(col_date_undef_signed) ( + PARTITION p0 VALUES LESS THAN ('2023-12-11'), + PARTITION p1 VALUES LESS THAN ('2023-12-15'), + PARTITION p2 VALUES LESS THAN ('2023-12-16'), + PARTITION p3 VALUES LESS THAN ('2023-12-17'), + PARTITION p4 VALUES LESS THAN ('2024-01-18'), + PARTITION p5 VALUES LESS THAN ('2026-02-18'), + PARTITION p100 VALUES LESS THAN ('9999-12-31') + ) + + distributed by hash(pk) buckets 10 + properties("replication_num" = "1"); + """ + + sql """insert into table_20_undef_partitions2_keys3_properties4_distributed_by5(pk,col_int_undef_signed,col_int_undef_signed_not_null,col_date_undef_signed,col_date_undef_signed_not_null,col_varchar_5__undef_signed,col_varchar_5__undef_signed_not_null) values (0,6,7625942,null,'2023-12-13','e','c'),(1,4,9,'2023-12-12','2023-12-12','m','w'),(2,null,-17559,'2023-12-10','2023-12-11','p','t'),(3,-15839,8,'2023-12-12','2023-12-12','h','d'),(4,null,5578188,'2023-12-17','2023-12-14','c','i' [...] + sql """insert into table_50_undef_partitions2_keys3_properties4_distributed_by53(pk,col_int_undef_signed,col_int_undef_signed_not_null,col_date_undef_signed,col_date_undef_signed_not_null,col_varchar_5__undef_signed,col_varchar_5__undef_signed_not_null) values (0,2,4,'2023-12-17','2023-12-14','q','t'),(1,3,4,'2023-12-16','2023-12-14','s','j'),(2,-1553,9,'2023-12-17','2004-01-22','w','x'),(3,7,-15007,'2023-12-09','2023-12-14','i','y'),(4,10788,16430,'2023-12-11','2006-08-11','g','f'), [...] + + qt_sql_prune_all_mark_join """ + SELECT + 9 + FROM + table_20_undef_partitions2_keys3_properties4_distributed_by5 AS tbl_alias2 + WHERE + ( + NOT ( + tbl_alias2.col_int_undef_signed NOT IN ( + SELECT + 8 + FROM + table_50_undef_partitions2_keys3_properties4_distributed_by53 + ) + AND '2023-12-12' IN ('2023-12-19') + ) + ); + """ + + qt_sql_prune_other_conjuncts """ + SELECT + 9 + FROM + table_20_undef_partitions2_keys3_properties4_distributed_by5 AS tbl_alias2 + WHERE + ( + NOT ( + tbl_alias2.col_int_undef_signed NOT IN ( + SELECT + 8 + FROM + table_50_undef_partitions2_keys3_properties4_distributed_by53 + ) + ) + ); + """ + + qt_sql_prune_all """ + SELECT + 9 + FROM + table_20_undef_partitions2_keys3_properties4_distributed_by5 AS tbl_alias2 + WHERE + ( + tbl_alias2.col_int_undef_signed NOT IN ( + SELECT + 8 + FROM + table_50_undef_partitions2_keys3_properties4_distributed_by53 + ) + ); + """ +} --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org