This is an automated email from the ASF dual-hosted git repository. jakevin pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new ac3290021d [fix](Nereids): MergeSetOperations can merge SetOperation ALL. (#20902) ac3290021d is described below commit ac3290021dbdc5d55229c0a6218f7031c048bb68 Author: jakevin <jakevin...@gmail.com> AuthorDate: Sun Jun 18 17:49:03 2023 +0800 [fix](Nereids): MergeSetOperations can merge SetOperation ALL. (#20902) --- .../doris/nereids/jobs/executor/Rewriter.java | 33 +++++---- .../nereids/rules/rewrite/BuildAggForUnion.java | 7 +- .../nereids/rules/rewrite/MergeSetOperations.java | 6 +- .../nereids_tpcds_shape_sf100_p0/shape/query49.out | 84 +++++++++++----------- .../nereids_tpcds_shape_sf100_p0/shape/query75.out | 68 +++++++++--------- 5 files changed, 100 insertions(+), 98 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java index 9697a8d007..ec8bfb6531 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java @@ -218,21 +218,28 @@ public class Rewriter extends AbstractBatchJobExecutor { // this rule should invoke after ColumnPruning custom(RuleType.ELIMINATE_UNNECESSARY_PROJECT, EliminateUnnecessaryProject::new), - // we need to execute this rule at the end of rewrite - // to avoid two consecutive same project appear when we do optimization. topic("Others optimization", - bottomUp(ImmutableList.<RuleFactory>builder().addAll(ImmutableList.of( - new EliminateNotNull(), - new EliminateLimit(), - new EliminateFilter(), - new EliminateAggregate(), - new MergeSetOperations(), - new PushdownLimit(), - new BuildAggForUnion() - // after eliminate filter, the project maybe can push down again, - // so we add push down rules - )).addAll(RuleSet.PUSH_DOWN_FILTERS).build()) + bottomUp(ImmutableList.<RuleFactory>builder() + .addAll(ImmutableList.of( + new EliminateNotNull(), + new EliminateLimit(), + new EliminateFilter(), + new EliminateAggregate(), + new PushdownLimit() + )) + // after eliminate some plan, we maybe can push down some plan again, so add push down rules + .add(new PushdownLimit()) + .addAll(RuleSet.PUSH_DOWN_FILTERS) + .build() + ) ), + + topic("Intersection optimization", + // Do MergeSetOperation first because we hope to match pattern of Distinct SetOperator. + bottomUp(new MergeSetOperations()), + bottomUp(new BuildAggForUnion()) + ), + topic("Window optimization", topDown( new PushdownLimit(), diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/BuildAggForUnion.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/BuildAggForUnion.java index 3e13860fd5..d1f4ba2d04 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/BuildAggForUnion.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/BuildAggForUnion.java @@ -27,7 +27,12 @@ import com.google.common.collect.ImmutableList; import java.util.Optional; /** - * For distinct union, add agg node. + * Convert Union into Agg + UnionAll. + * <pre> + * Agg + * Union -> | + * UnionAll + * </pre> */ public class BuildAggForUnion extends OneRewriteRuleFactory { @Override diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/MergeSetOperations.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/MergeSetOperations.java index f0a72be315..9d17f00e88 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/MergeSetOperations.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/MergeSetOperations.java @@ -48,7 +48,6 @@ public class MergeSetOperations implements RewriteRuleFactory { @Override public List<Rule> buildRules() { return ImmutableList.of( - RuleType.MERGE_SET_OPERATION.build( logicalSetOperation(any(), any()).when(MergeSetOperations::canMerge).then(parentSetOperation -> { List<Plan> newChildren = parentSetOperation.children() .stream() @@ -61,8 +60,7 @@ public class MergeSetOperations implements RewriteRuleFactory { }).collect(ImmutableList.toImmutableList()); return parentSetOperation.withChildren(newChildren); - }) - ) + }).toRule(RuleType.MERGE_SET_OPERATION) ); } @@ -80,7 +78,7 @@ public class MergeSetOperations implements RewriteRuleFactory { } public static boolean isSameQualifierOrChildQualifierIsAll(LogicalSetOperation parentSetOperation, - LogicalSetOperation childSetOperation) { + LogicalSetOperation childSetOperation) { return parentSetOperation.getQualifier() == childSetOperation.getQualifier() || childSetOperation.getQualifier() == Qualifier.ALL; } diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query49.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query49.out index ec39065e1f..557f4a5e33 100644 --- a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query49.out +++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query49.out @@ -7,64 +7,60 @@ PhysicalTopN --------PhysicalDistribute ----------hashAgg[LOCAL] ------------PhysicalUnion ---------------hashAgg[GLOBAL] -----------------PhysicalDistribute -------------------hashAgg[LOCAL] ---------------------PhysicalUnion -----------------------PhysicalProject -------------------------filter(((return_rank <= 10) OR (currency_rank <= 10))) +--------------PhysicalProject +----------------filter(((return_rank <= 10) OR (currency_rank <= 10))) +------------------PhysicalWindow +--------------------PhysicalQuickSort +----------------------PhysicalDistribute +------------------------PhysicalQuickSort --------------------------PhysicalWindow ----------------------------PhysicalQuickSort ------------------------------PhysicalDistribute --------------------------------PhysicalQuickSort -----------------------------------PhysicalWindow -------------------------------------PhysicalQuickSort +----------------------------------PhysicalProject +------------------------------------hashAgg[GLOBAL] --------------------------------------PhysicalDistribute -----------------------------------------PhysicalQuickSort +----------------------------------------hashAgg[LOCAL] ------------------------------------------PhysicalProject ---------------------------------------------hashAgg[GLOBAL] -----------------------------------------------PhysicalDistribute -------------------------------------------------hashAgg[LOCAL] +--------------------------------------------hashJoin[INNER_JOIN](ws.ws_order_number = wr.wr_order_number)(item = wr.wr_item_sk) +----------------------------------------------PhysicalProject +------------------------------------------------filter((wr.wr_return_amt > 10000.00)) +--------------------------------------------------PhysicalOlapScan[web_returns] +----------------------------------------------hashJoin[INNER_JOIN](ws.ws_sold_date_sk = date_dim.d_date_sk) +------------------------------------------------PhysicalProject +--------------------------------------------------filter((ws.ws_net_paid > 0.00)(ws.ws_quantity > 0)(ws.ws_net_profit > 1.00)) +----------------------------------------------------PhysicalOlapScan[web_sales] +------------------------------------------------PhysicalDistribute --------------------------------------------------PhysicalProject -----------------------------------------------------hashJoin[INNER_JOIN](ws.ws_order_number = wr.wr_order_number)(item = wr.wr_item_sk) -------------------------------------------------------PhysicalProject ---------------------------------------------------------filter((wr.wr_return_amt > 10000.00)) -----------------------------------------------------------PhysicalOlapScan[web_returns] -------------------------------------------------------hashJoin[INNER_JOIN](ws.ws_sold_date_sk = date_dim.d_date_sk) ---------------------------------------------------------PhysicalProject -----------------------------------------------------------filter((ws.ws_net_paid > 0.00)(ws.ws_quantity > 0)(ws.ws_net_profit > 1.00)) -------------------------------------------------------------PhysicalOlapScan[web_sales] ---------------------------------------------------------PhysicalDistribute -----------------------------------------------------------PhysicalProject -------------------------------------------------------------filter((date_dim.d_moy = 12)(date_dim.d_year = 1999)) ---------------------------------------------------------------PhysicalOlapScan[date_dim] -----------------------PhysicalProject -------------------------filter(((return_rank <= 10) OR (currency_rank <= 10))) +----------------------------------------------------filter((date_dim.d_moy = 12)(date_dim.d_year = 1999)) +------------------------------------------------------PhysicalOlapScan[date_dim] +--------------PhysicalProject +----------------filter(((return_rank <= 10) OR (currency_rank <= 10))) +------------------PhysicalWindow +--------------------PhysicalQuickSort +----------------------PhysicalDistribute +------------------------PhysicalQuickSort --------------------------PhysicalWindow ----------------------------PhysicalQuickSort ------------------------------PhysicalDistribute --------------------------------PhysicalQuickSort -----------------------------------PhysicalWindow -------------------------------------PhysicalQuickSort +----------------------------------PhysicalProject +------------------------------------hashAgg[GLOBAL] --------------------------------------PhysicalDistribute -----------------------------------------PhysicalQuickSort +----------------------------------------hashAgg[LOCAL] ------------------------------------------PhysicalProject ---------------------------------------------hashAgg[GLOBAL] -----------------------------------------------PhysicalDistribute -------------------------------------------------hashAgg[LOCAL] +--------------------------------------------hashJoin[INNER_JOIN](cs.cs_order_number = cr.cr_order_number)(item = cr.cr_item_sk) +----------------------------------------------PhysicalProject +------------------------------------------------filter((cr.cr_return_amount > 10000.00)) +--------------------------------------------------PhysicalOlapScan[catalog_returns] +----------------------------------------------hashJoin[INNER_JOIN](cs.cs_sold_date_sk = date_dim.d_date_sk) +------------------------------------------------PhysicalProject +--------------------------------------------------filter((cs.cs_net_paid > 0.00)(cs.cs_quantity > 0)(cs.cs_net_profit > 1.00)) +----------------------------------------------------PhysicalOlapScan[catalog_sales] +------------------------------------------------PhysicalDistribute --------------------------------------------------PhysicalProject -----------------------------------------------------hashJoin[INNER_JOIN](cs.cs_order_number = cr.cr_order_number)(item = cr.cr_item_sk) -------------------------------------------------------PhysicalProject ---------------------------------------------------------filter((cr.cr_return_amount > 10000.00)) -----------------------------------------------------------PhysicalOlapScan[catalog_returns] -------------------------------------------------------hashJoin[INNER_JOIN](cs.cs_sold_date_sk = date_dim.d_date_sk) ---------------------------------------------------------PhysicalProject -----------------------------------------------------------filter((cs.cs_net_paid > 0.00)(cs.cs_quantity > 0)(cs.cs_net_profit > 1.00)) -------------------------------------------------------------PhysicalOlapScan[catalog_sales] ---------------------------------------------------------PhysicalDistribute -----------------------------------------------------------PhysicalProject -------------------------------------------------------------filter((date_dim.d_moy = 12)(date_dim.d_year = 1999)) ---------------------------------------------------------------PhysicalOlapScan[date_dim] +----------------------------------------------------filter((date_dim.d_moy = 12)(date_dim.d_year = 1999)) +------------------------------------------------------PhysicalOlapScan[date_dim] --------------PhysicalProject ----------------filter(((return_rank <= 10) OR (currency_rank <= 10))) ------------------PhysicalWindow diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query75.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query75.out index 5964454aa5..0a86b61e33 100644 --- a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query75.out +++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query75.out @@ -7,44 +7,40 @@ CteAnchor[cteId= ( CTEId#3=] ) --------PhysicalDistribute ----------hashAgg[LOCAL] ------------PhysicalUnion ---------------hashAgg[GLOBAL] -----------------PhysicalDistribute -------------------hashAgg[LOCAL] ---------------------PhysicalUnion -----------------------PhysicalProject -------------------------hashJoin[RIGHT_OUTER_JOIN](catalog_sales.cs_item_sk = catalog_returns.cr_item_sk)(catalog_sales.cs_order_number = catalog_returns.cr_order_number) ---------------------------PhysicalProject -----------------------------PhysicalOlapScan[catalog_returns] ---------------------------PhysicalProject -----------------------------hashJoin[INNER_JOIN](date_dim.d_date_sk = catalog_sales.cs_sold_date_sk) -------------------------------hashJoin[INNER_JOIN](item.i_item_sk = catalog_sales.cs_item_sk) ---------------------------------PhysicalProject -----------------------------------PhysicalOlapScan[catalog_sales] ---------------------------------PhysicalDistribute -----------------------------------PhysicalProject -------------------------------------filter((cast(i_category as VARCHAR(*)) = 'Home')) ---------------------------------------PhysicalOlapScan[item] -------------------------------PhysicalDistribute ---------------------------------PhysicalProject -----------------------------------filter(((date_dim.d_year = 1998) OR (date_dim.d_year = 1999))) -------------------------------------PhysicalOlapScan[date_dim] -----------------------PhysicalProject -------------------------hashJoin[RIGHT_OUTER_JOIN](store_sales.ss_item_sk = store_returns.sr_item_sk)(store_sales.ss_ticket_number = store_returns.sr_ticket_number) +--------------PhysicalProject +----------------hashJoin[RIGHT_OUTER_JOIN](catalog_sales.cs_item_sk = catalog_returns.cr_item_sk)(catalog_sales.cs_order_number = catalog_returns.cr_order_number) +------------------PhysicalProject +--------------------PhysicalOlapScan[catalog_returns] +------------------PhysicalProject +--------------------hashJoin[INNER_JOIN](date_dim.d_date_sk = catalog_sales.cs_sold_date_sk) +----------------------hashJoin[INNER_JOIN](item.i_item_sk = catalog_sales.cs_item_sk) +------------------------PhysicalProject +--------------------------PhysicalOlapScan[catalog_sales] +------------------------PhysicalDistribute --------------------------PhysicalProject -----------------------------PhysicalOlapScan[store_returns] +----------------------------filter((cast(i_category as VARCHAR(*)) = 'Home')) +------------------------------PhysicalOlapScan[item] +----------------------PhysicalDistribute +------------------------PhysicalProject +--------------------------filter(((date_dim.d_year = 1998) OR (date_dim.d_year = 1999))) +----------------------------PhysicalOlapScan[date_dim] +--------------PhysicalProject +----------------hashJoin[RIGHT_OUTER_JOIN](store_sales.ss_item_sk = store_returns.sr_item_sk)(store_sales.ss_ticket_number = store_returns.sr_ticket_number) +------------------PhysicalProject +--------------------PhysicalOlapScan[store_returns] +------------------PhysicalProject +--------------------hashJoin[INNER_JOIN](date_dim.d_date_sk = store_sales.ss_sold_date_sk) +----------------------hashJoin[INNER_JOIN](item.i_item_sk = store_sales.ss_item_sk) +------------------------PhysicalProject +--------------------------PhysicalOlapScan[store_sales] +------------------------PhysicalDistribute --------------------------PhysicalProject -----------------------------hashJoin[INNER_JOIN](date_dim.d_date_sk = store_sales.ss_sold_date_sk) -------------------------------hashJoin[INNER_JOIN](item.i_item_sk = store_sales.ss_item_sk) ---------------------------------PhysicalProject -----------------------------------PhysicalOlapScan[store_sales] ---------------------------------PhysicalDistribute -----------------------------------PhysicalProject -------------------------------------filter((cast(i_category as VARCHAR(*)) = 'Home')) ---------------------------------------PhysicalOlapScan[item] -------------------------------PhysicalDistribute ---------------------------------PhysicalProject -----------------------------------filter(((date_dim.d_year = 1998) OR (date_dim.d_year = 1999))) -------------------------------------PhysicalOlapScan[date_dim] +----------------------------filter((cast(i_category as VARCHAR(*)) = 'Home')) +------------------------------PhysicalOlapScan[item] +----------------------PhysicalDistribute +------------------------PhysicalProject +--------------------------filter(((date_dim.d_year = 1998) OR (date_dim.d_year = 1999))) +----------------------------PhysicalOlapScan[date_dim] --------------PhysicalProject ----------------hashJoin[RIGHT_OUTER_JOIN](web_sales.ws_item_sk = web_returns.wr_item_sk)(web_sales.ws_order_number = web_returns.wr_order_number) ------------------PhysicalProject --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org