This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-4.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-4.1 by this push:
new 93af55ddd64 branch-4.1:[fix](fe) Fix redundant aggregation in
agg-union query plan #62231 (#63577)
93af55ddd64 is described below
commit 93af55ddd64ea20bf78e4b29d16691417b790441
Author: feiniaofeiafei <[email protected]>
AuthorDate: Mon May 25 18:16:35 2026 +0800
branch-4.1:[fix](fe) Fix redundant aggregation in agg-union query plan
#62231 (#63577)
picked from #62231
---
.../properties/ChildrenPropertiesRegulator.java | 22 +++-
.../rules/rewrite/AggregateUnionPlanTest.java | 117 +++++++++++++++++++++
.../tpcds_sf100/noStatsRfPrune/query75.out | 117 ++++++++++-----------
.../tpcds_sf100/no_stats_shape/query75.out | 117 ++++++++++-----------
.../shape_check/tpcds_sf10t_orc/shape/query75.out | 117 ++++++++++-----------
5 files changed, 310 insertions(+), 180 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/properties/ChildrenPropertiesRegulator.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/properties/ChildrenPropertiesRegulator.java
index 071c450a220..1b3bac4cd4d 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/properties/ChildrenPropertiesRegulator.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/properties/ChildrenPropertiesRegulator.java
@@ -21,6 +21,7 @@ import org.apache.doris.common.Pair;
import org.apache.doris.nereids.cost.Cost;
import org.apache.doris.nereids.cost.CostCalculator;
import org.apache.doris.nereids.jobs.JobContext;
+import org.apache.doris.nereids.memo.Group;
import org.apache.doris.nereids.memo.GroupExpression;
import org.apache.doris.nereids.properties.DistributionSpecHash.ShuffleType;
import org.apache.doris.nereids.stats.StatsCalculator;
@@ -225,9 +226,24 @@ public class ChildrenPropertiesRegulator extends
PlanVisitor<List<List<PhysicalP
* no matter x.ndv is high or not, it is not worthwhile to shuffle A and B
by x
* and hence we forbid one phase agg */
private boolean banAggUnionAll(PhysicalHashAggregate<? extends Plan>
aggregate) {
- return aggregate.getAggMode() == AggMode.INPUT_TO_RESULT
- && children.get(0).getPlan() instanceof PhysicalUnion
- && !((PhysicalUnion) children.get(0).getPlan()).isDistinct();
+ if (aggregate.getAggMode() == AggMode.INPUT_TO_RESULT &&
children.get(0).getPlan() instanceof PhysicalUnion
+ && !((PhysicalUnion) children.get(0).getPlan()).isDistinct()) {
+ GroupExpression gExprUnion = children.get(0);
+ List<Group> groups = gExprUnion.children();
+ Pair<Cost, List<PhysicalProperties>> pair =
gExprUnion.getLowestCostTable().get(requiredProperties.get(0));
+ int i = 0;
+ // If none of the union inputs have PhysicalDistribute, allow
one-phase aggregation
+ for (Group group : groups) {
+ GroupExpression groupExpression =
group.getBestPlan(pair.second.get(i));
+ i++;
+ if (groupExpression != null && groupExpression.getPlan()
instanceof PhysicalDistribute) {
+ return true;
+ }
+ }
+
+ return false;
+ }
+ return false;
}
@Override
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/AggregateUnionPlanTest.java
b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/AggregateUnionPlanTest.java
new file mode 100644
index 00000000000..96c8ca2d76e
--- /dev/null
+++
b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/AggregateUnionPlanTest.java
@@ -0,0 +1,117 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.nereids.rules.rewrite;
+
+import org.apache.doris.nereids.trees.plans.AggMode;
+import org.apache.doris.nereids.trees.plans.physical.PhysicalUnion;
+import org.apache.doris.nereids.util.MatchingUtils;
+import org.apache.doris.nereids.util.MemoPatternMatchSupported;
+import org.apache.doris.nereids.util.PlanChecker;
+import org.apache.doris.utframe.TestWithFeService;
+
+import org.junit.jupiter.api.Test;
+
+/**
+ * Tests for banAggUnionAll fix in ChildrenPropertiesRegulator.
+ */
+public class AggregateUnionPlanTest extends TestWithFeService implements
MemoPatternMatchSupported {
+
+ @Override
+ protected void runBeforeAll() throws Exception {
+ createDatabase("test_agg_union");
+
connectContext.getSessionVariable().setDisableNereidsRules("PRUNE_EMPTY_PARTITION");
+
+ // Tables with RANDOM distribution: no PhysicalDistribute needed in
union inputs
+ createTable("CREATE TABLE test_agg_union.t1_random ("
+ + " a INT NULL, b INT NULL"
+ + ") ENGINE=OLAP DUPLICATE KEY(a, b)"
+ + " DISTRIBUTED BY RANDOM BUCKETS AUTO"
+ + " PROPERTIES ('replication_allocation' =
'tag.location.default: 1');");
+ createTable("CREATE TABLE test_agg_union.t2_random ("
+ + " a INT NULL, b INT NULL"
+ + ") ENGINE=OLAP DUPLICATE KEY(a, b)"
+ + " DISTRIBUTED BY RANDOM BUCKETS AUTO"
+ + " PROPERTIES ('replication_allocation' =
'tag.location.default: 1');");
+
+ // Tables with HASH distribution: same key, no PhysicalDistribute
needed when
+ // group-by matches the distribution key
+ createTable("CREATE TABLE test_agg_union.t1_hash ("
+ + " a INT NULL, b INT NULL"
+ + ") ENGINE=OLAP DUPLICATE KEY(a, b)"
+ + " DISTRIBUTED BY HASH(a) BUCKETS 3"
+ + " PROPERTIES ('replication_allocation' =
'tag.location.default: 1');");
+ createTable("CREATE TABLE test_agg_union.t2_hash ("
+ + " a INT NULL, b INT NULL"
+ + ") ENGINE=OLAP DUPLICATE KEY(a, b)"
+ + " DISTRIBUTED BY HASH(a) BUCKETS 3"
+ + " PROPERTIES ('replication_allocation' =
'tag.location.default: 1');");
+ }
+
+ @Test
+ public void testAggUnionRandomDistributeUseOnePhase() {
+ // Reproduces the exact bug scenario described in the issue
+ String sql = "SELECT a, b FROM test_agg_union.t1_random GROUP BY a, b"
+ + " UNION"
+ + " SELECT a, b FROM test_agg_union.t2_random GROUP BY a, b";
+
+ PlanChecker.from(connectContext).checkExplain(sql, planner -> {
+ // The outer dedup agg should be one-phase (INPUT_TO_RESULT):
+ // - Before fix: BUFFER_TO_RESULT (two-phase, with redundant
LOCAL agg)
+ // - After fix: INPUT_TO_RESULT (one-phase, no redundant LOCAL
agg)
+ MatchingUtils.assertMatches(planner.getOptimizedPlan(),
+ physicalResultSink(
+ physicalHashAggregate(any())
+ .when(agg -> agg.getAggMode() ==
AggMode.INPUT_TO_RESULT)
+ .when(agg -> agg.child(0) instanceof
PhysicalUnion
+ ||
hasUnionDescendant(agg.child(0)))
+ ));
+ });
+ }
+
+ @Test
+ public void testOuterAggOverUnionAllRandomUsesTwoPhase() {
+ String sql = "SELECT a, b FROM"
+ + " (SELECT a, b FROM test_agg_union.t1_random"
+ + " UNION ALL"
+ + " SELECT a, b FROM test_agg_union.t2_random) t"
+ + " GROUP BY a, b";
+
+ PlanChecker.from(connectContext).checkExplain(sql, planner -> {
+ MatchingUtils.assertMatches(planner.getOptimizedPlan(),
+ physicalResultSink(
+ physicalHashAggregate(any())
+ .when(agg -> agg.getAggMode() ==
AggMode.BUFFER_TO_RESULT)
+ ));
+ });
+ }
+
+ /**
+ * Walk up through PhysicalProject nodes to find if a PhysicalUnion sits
below.
+ * Used to handle optional project nodes that the optimizer may insert
between
+ * the dedup agg and the union.
+ */
+ private boolean
hasUnionDescendant(org.apache.doris.nereids.trees.plans.Plan plan) {
+ if (plan instanceof PhysicalUnion) {
+ return true;
+ }
+ if (plan instanceof
org.apache.doris.nereids.trees.plans.physical.PhysicalProject) {
+ return hasUnionDescendant(plan.child(0));
+ }
+ return false;
+ }
+}
diff --git
a/regression-test/data/shape_check/tpcds_sf100/noStatsRfPrune/query75.out
b/regression-test/data/shape_check/tpcds_sf100/noStatsRfPrune/query75.out
index 7a6c63c2385..7df97e1416a 100644
--- a/regression-test/data/shape_check/tpcds_sf100/noStatsRfPrune/query75.out
+++ b/regression-test/data/shape_check/tpcds_sf100/noStatsRfPrune/query75.out
@@ -6,65 +6,64 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
------PhysicalDistribute[DistributionSpecHash]
--------hashAgg[LOCAL]
----------hashAgg[GLOBAL]
-------------hashAgg[LOCAL]
---------------PhysicalUnion
-----------------hashAgg[GLOBAL]
-------------------PhysicalDistribute[DistributionSpecHash]
---------------------hashAgg[LOCAL]
-----------------------PhysicalProject
-------------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((date_dim.d_date_sk = catalog_sales.cs_sold_date_sk))
otherCondition=() build RFs:RF1 d_date_sk->[cs_sold_date_sk]
---------------------------PhysicalProject
-----------------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((item.i_item_sk = catalog_sales.cs_item_sk)) otherCondition=()
build RFs:RF0 i_item_sk->[cs_item_sk]
-------------------------------PhysicalProject
---------------------------------hashJoin[LEFT_OUTER_JOIN colocated]
hashCondition=((catalog_sales.cs_item_sk = catalog_returns.cr_item_sk) and
(catalog_sales.cs_order_number = catalog_returns.cr_order_number))
otherCondition=()
-----------------------------------PhysicalProject
-------------------------------------PhysicalOlapScan[catalog_sales] apply RFs:
RF0 RF1
-----------------------------------PhysicalProject
-------------------------------------PhysicalOlapScan[catalog_returns]
-------------------------------PhysicalProject
---------------------------------filter((item.i_category = 'Home'))
-----------------------------------PhysicalOlapScan[item]
---------------------------PhysicalProject
-----------------------------filter(d_year IN (1998, 1999))
-------------------------------PhysicalOlapScan[date_dim]
-----------------hashAgg[GLOBAL]
-------------------PhysicalDistribute[DistributionSpecHash]
---------------------hashAgg[LOCAL]
-----------------------PhysicalProject
-------------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((date_dim.d_date_sk = store_sales.ss_sold_date_sk))
otherCondition=() build RFs:RF3 d_date_sk->[ss_sold_date_sk]
---------------------------PhysicalProject
-----------------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((item.i_item_sk = store_sales.ss_item_sk)) otherCondition=()
build RFs:RF2 i_item_sk->[ss_item_sk]
-------------------------------PhysicalProject
---------------------------------hashJoin[LEFT_OUTER_JOIN colocated]
hashCondition=((store_sales.ss_item_sk = store_returns.sr_item_sk) and
(store_sales.ss_ticket_number = store_returns.sr_ticket_number))
otherCondition=()
-----------------------------------PhysicalProject
-------------------------------------PhysicalOlapScan[store_sales] apply RFs:
RF2 RF3
-----------------------------------PhysicalProject
-------------------------------------PhysicalOlapScan[store_returns]
-------------------------------PhysicalProject
---------------------------------filter((item.i_category = 'Home'))
-----------------------------------PhysicalOlapScan[item]
---------------------------PhysicalProject
-----------------------------filter(d_year IN (1998, 1999))
-------------------------------PhysicalOlapScan[date_dim]
-----------------hashAgg[GLOBAL]
-------------------PhysicalDistribute[DistributionSpecHash]
---------------------hashAgg[LOCAL]
-----------------------PhysicalProject
-------------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((date_dim.d_date_sk = web_sales.ws_sold_date_sk))
otherCondition=() build RFs:RF5 d_date_sk->[ws_sold_date_sk]
---------------------------PhysicalProject
-----------------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((item.i_item_sk = web_sales.ws_item_sk)) otherCondition=() build
RFs:RF4 i_item_sk->[ws_item_sk]
-------------------------------PhysicalProject
---------------------------------hashJoin[LEFT_OUTER_JOIN colocated]
hashCondition=((web_sales.ws_item_sk = web_returns.wr_item_sk) and
(web_sales.ws_order_number = web_returns.wr_order_number)) otherCondition=()
-----------------------------------PhysicalProject
-------------------------------------PhysicalOlapScan[web_sales] apply RFs: RF4
RF5
-----------------------------------PhysicalProject
-------------------------------------PhysicalOlapScan[web_returns]
-------------------------------PhysicalProject
---------------------------------filter((item.i_category = 'Home'))
-----------------------------------PhysicalOlapScan[item]
---------------------------PhysicalProject
-----------------------------filter(d_year IN (1998, 1999))
-------------------------------PhysicalOlapScan[date_dim]
+------------PhysicalUnion
+--------------hashAgg[GLOBAL]
+----------------PhysicalDistribute[DistributionSpecHash]
+------------------hashAgg[LOCAL]
+--------------------PhysicalProject
+----------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((date_dim.d_date_sk = catalog_sales.cs_sold_date_sk))
otherCondition=() build RFs:RF1 d_date_sk->[cs_sold_date_sk]
+------------------------PhysicalProject
+--------------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((item.i_item_sk = catalog_sales.cs_item_sk)) otherCondition=()
build RFs:RF0 i_item_sk->[cs_item_sk]
+----------------------------PhysicalProject
+------------------------------hashJoin[LEFT_OUTER_JOIN colocated]
hashCondition=((catalog_sales.cs_item_sk = catalog_returns.cr_item_sk) and
(catalog_sales.cs_order_number = catalog_returns.cr_order_number))
otherCondition=()
+--------------------------------PhysicalProject
+----------------------------------PhysicalOlapScan[catalog_sales] apply RFs:
RF0 RF1
+--------------------------------PhysicalProject
+----------------------------------PhysicalOlapScan[catalog_returns]
+----------------------------PhysicalProject
+------------------------------filter((item.i_category = 'Home'))
+--------------------------------PhysicalOlapScan[item]
+------------------------PhysicalProject
+--------------------------filter(d_year IN (1998, 1999))
+----------------------------PhysicalOlapScan[date_dim]
+--------------hashAgg[GLOBAL]
+----------------PhysicalDistribute[DistributionSpecHash]
+------------------hashAgg[LOCAL]
+--------------------PhysicalProject
+----------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((date_dim.d_date_sk = store_sales.ss_sold_date_sk))
otherCondition=() build RFs:RF3 d_date_sk->[ss_sold_date_sk]
+------------------------PhysicalProject
+--------------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((item.i_item_sk = store_sales.ss_item_sk)) otherCondition=()
build RFs:RF2 i_item_sk->[ss_item_sk]
+----------------------------PhysicalProject
+------------------------------hashJoin[LEFT_OUTER_JOIN colocated]
hashCondition=((store_sales.ss_item_sk = store_returns.sr_item_sk) and
(store_sales.ss_ticket_number = store_returns.sr_ticket_number))
otherCondition=()
+--------------------------------PhysicalProject
+----------------------------------PhysicalOlapScan[store_sales] apply RFs: RF2
RF3
+--------------------------------PhysicalProject
+----------------------------------PhysicalOlapScan[store_returns]
+----------------------------PhysicalProject
+------------------------------filter((item.i_category = 'Home'))
+--------------------------------PhysicalOlapScan[item]
+------------------------PhysicalProject
+--------------------------filter(d_year IN (1998, 1999))
+----------------------------PhysicalOlapScan[date_dim]
+--------------hashAgg[GLOBAL]
+----------------PhysicalDistribute[DistributionSpecHash]
+------------------hashAgg[LOCAL]
+--------------------PhysicalProject
+----------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((date_dim.d_date_sk = web_sales.ws_sold_date_sk))
otherCondition=() build RFs:RF5 d_date_sk->[ws_sold_date_sk]
+------------------------PhysicalProject
+--------------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((item.i_item_sk = web_sales.ws_item_sk)) otherCondition=() build
RFs:RF4 i_item_sk->[ws_item_sk]
+----------------------------PhysicalProject
+------------------------------hashJoin[LEFT_OUTER_JOIN colocated]
hashCondition=((web_sales.ws_item_sk = web_returns.wr_item_sk) and
(web_sales.ws_order_number = web_returns.wr_order_number)) otherCondition=()
+--------------------------------PhysicalProject
+----------------------------------PhysicalOlapScan[web_sales] apply RFs: RF4
RF5
+--------------------------------PhysicalProject
+----------------------------------PhysicalOlapScan[web_returns]
+----------------------------PhysicalProject
+------------------------------filter((item.i_category = 'Home'))
+--------------------------------PhysicalOlapScan[item]
+------------------------PhysicalProject
+--------------------------filter(d_year IN (1998, 1999))
+----------------------------PhysicalOlapScan[date_dim]
--PhysicalResultSink
----PhysicalTopN[MERGE_SORT]
------PhysicalDistribute[DistributionSpecGather]
diff --git
a/regression-test/data/shape_check/tpcds_sf100/no_stats_shape/query75.out
b/regression-test/data/shape_check/tpcds_sf100/no_stats_shape/query75.out
index 7a6c63c2385..7df97e1416a 100644
--- a/regression-test/data/shape_check/tpcds_sf100/no_stats_shape/query75.out
+++ b/regression-test/data/shape_check/tpcds_sf100/no_stats_shape/query75.out
@@ -6,65 +6,64 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
------PhysicalDistribute[DistributionSpecHash]
--------hashAgg[LOCAL]
----------hashAgg[GLOBAL]
-------------hashAgg[LOCAL]
---------------PhysicalUnion
-----------------hashAgg[GLOBAL]
-------------------PhysicalDistribute[DistributionSpecHash]
---------------------hashAgg[LOCAL]
-----------------------PhysicalProject
-------------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((date_dim.d_date_sk = catalog_sales.cs_sold_date_sk))
otherCondition=() build RFs:RF1 d_date_sk->[cs_sold_date_sk]
---------------------------PhysicalProject
-----------------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((item.i_item_sk = catalog_sales.cs_item_sk)) otherCondition=()
build RFs:RF0 i_item_sk->[cs_item_sk]
-------------------------------PhysicalProject
---------------------------------hashJoin[LEFT_OUTER_JOIN colocated]
hashCondition=((catalog_sales.cs_item_sk = catalog_returns.cr_item_sk) and
(catalog_sales.cs_order_number = catalog_returns.cr_order_number))
otherCondition=()
-----------------------------------PhysicalProject
-------------------------------------PhysicalOlapScan[catalog_sales] apply RFs:
RF0 RF1
-----------------------------------PhysicalProject
-------------------------------------PhysicalOlapScan[catalog_returns]
-------------------------------PhysicalProject
---------------------------------filter((item.i_category = 'Home'))
-----------------------------------PhysicalOlapScan[item]
---------------------------PhysicalProject
-----------------------------filter(d_year IN (1998, 1999))
-------------------------------PhysicalOlapScan[date_dim]
-----------------hashAgg[GLOBAL]
-------------------PhysicalDistribute[DistributionSpecHash]
---------------------hashAgg[LOCAL]
-----------------------PhysicalProject
-------------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((date_dim.d_date_sk = store_sales.ss_sold_date_sk))
otherCondition=() build RFs:RF3 d_date_sk->[ss_sold_date_sk]
---------------------------PhysicalProject
-----------------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((item.i_item_sk = store_sales.ss_item_sk)) otherCondition=()
build RFs:RF2 i_item_sk->[ss_item_sk]
-------------------------------PhysicalProject
---------------------------------hashJoin[LEFT_OUTER_JOIN colocated]
hashCondition=((store_sales.ss_item_sk = store_returns.sr_item_sk) and
(store_sales.ss_ticket_number = store_returns.sr_ticket_number))
otherCondition=()
-----------------------------------PhysicalProject
-------------------------------------PhysicalOlapScan[store_sales] apply RFs:
RF2 RF3
-----------------------------------PhysicalProject
-------------------------------------PhysicalOlapScan[store_returns]
-------------------------------PhysicalProject
---------------------------------filter((item.i_category = 'Home'))
-----------------------------------PhysicalOlapScan[item]
---------------------------PhysicalProject
-----------------------------filter(d_year IN (1998, 1999))
-------------------------------PhysicalOlapScan[date_dim]
-----------------hashAgg[GLOBAL]
-------------------PhysicalDistribute[DistributionSpecHash]
---------------------hashAgg[LOCAL]
-----------------------PhysicalProject
-------------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((date_dim.d_date_sk = web_sales.ws_sold_date_sk))
otherCondition=() build RFs:RF5 d_date_sk->[ws_sold_date_sk]
---------------------------PhysicalProject
-----------------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((item.i_item_sk = web_sales.ws_item_sk)) otherCondition=() build
RFs:RF4 i_item_sk->[ws_item_sk]
-------------------------------PhysicalProject
---------------------------------hashJoin[LEFT_OUTER_JOIN colocated]
hashCondition=((web_sales.ws_item_sk = web_returns.wr_item_sk) and
(web_sales.ws_order_number = web_returns.wr_order_number)) otherCondition=()
-----------------------------------PhysicalProject
-------------------------------------PhysicalOlapScan[web_sales] apply RFs: RF4
RF5
-----------------------------------PhysicalProject
-------------------------------------PhysicalOlapScan[web_returns]
-------------------------------PhysicalProject
---------------------------------filter((item.i_category = 'Home'))
-----------------------------------PhysicalOlapScan[item]
---------------------------PhysicalProject
-----------------------------filter(d_year IN (1998, 1999))
-------------------------------PhysicalOlapScan[date_dim]
+------------PhysicalUnion
+--------------hashAgg[GLOBAL]
+----------------PhysicalDistribute[DistributionSpecHash]
+------------------hashAgg[LOCAL]
+--------------------PhysicalProject
+----------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((date_dim.d_date_sk = catalog_sales.cs_sold_date_sk))
otherCondition=() build RFs:RF1 d_date_sk->[cs_sold_date_sk]
+------------------------PhysicalProject
+--------------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((item.i_item_sk = catalog_sales.cs_item_sk)) otherCondition=()
build RFs:RF0 i_item_sk->[cs_item_sk]
+----------------------------PhysicalProject
+------------------------------hashJoin[LEFT_OUTER_JOIN colocated]
hashCondition=((catalog_sales.cs_item_sk = catalog_returns.cr_item_sk) and
(catalog_sales.cs_order_number = catalog_returns.cr_order_number))
otherCondition=()
+--------------------------------PhysicalProject
+----------------------------------PhysicalOlapScan[catalog_sales] apply RFs:
RF0 RF1
+--------------------------------PhysicalProject
+----------------------------------PhysicalOlapScan[catalog_returns]
+----------------------------PhysicalProject
+------------------------------filter((item.i_category = 'Home'))
+--------------------------------PhysicalOlapScan[item]
+------------------------PhysicalProject
+--------------------------filter(d_year IN (1998, 1999))
+----------------------------PhysicalOlapScan[date_dim]
+--------------hashAgg[GLOBAL]
+----------------PhysicalDistribute[DistributionSpecHash]
+------------------hashAgg[LOCAL]
+--------------------PhysicalProject
+----------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((date_dim.d_date_sk = store_sales.ss_sold_date_sk))
otherCondition=() build RFs:RF3 d_date_sk->[ss_sold_date_sk]
+------------------------PhysicalProject
+--------------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((item.i_item_sk = store_sales.ss_item_sk)) otherCondition=()
build RFs:RF2 i_item_sk->[ss_item_sk]
+----------------------------PhysicalProject
+------------------------------hashJoin[LEFT_OUTER_JOIN colocated]
hashCondition=((store_sales.ss_item_sk = store_returns.sr_item_sk) and
(store_sales.ss_ticket_number = store_returns.sr_ticket_number))
otherCondition=()
+--------------------------------PhysicalProject
+----------------------------------PhysicalOlapScan[store_sales] apply RFs: RF2
RF3
+--------------------------------PhysicalProject
+----------------------------------PhysicalOlapScan[store_returns]
+----------------------------PhysicalProject
+------------------------------filter((item.i_category = 'Home'))
+--------------------------------PhysicalOlapScan[item]
+------------------------PhysicalProject
+--------------------------filter(d_year IN (1998, 1999))
+----------------------------PhysicalOlapScan[date_dim]
+--------------hashAgg[GLOBAL]
+----------------PhysicalDistribute[DistributionSpecHash]
+------------------hashAgg[LOCAL]
+--------------------PhysicalProject
+----------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((date_dim.d_date_sk = web_sales.ws_sold_date_sk))
otherCondition=() build RFs:RF5 d_date_sk->[ws_sold_date_sk]
+------------------------PhysicalProject
+--------------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((item.i_item_sk = web_sales.ws_item_sk)) otherCondition=() build
RFs:RF4 i_item_sk->[ws_item_sk]
+----------------------------PhysicalProject
+------------------------------hashJoin[LEFT_OUTER_JOIN colocated]
hashCondition=((web_sales.ws_item_sk = web_returns.wr_item_sk) and
(web_sales.ws_order_number = web_returns.wr_order_number)) otherCondition=()
+--------------------------------PhysicalProject
+----------------------------------PhysicalOlapScan[web_sales] apply RFs: RF4
RF5
+--------------------------------PhysicalProject
+----------------------------------PhysicalOlapScan[web_returns]
+----------------------------PhysicalProject
+------------------------------filter((item.i_category = 'Home'))
+--------------------------------PhysicalOlapScan[item]
+------------------------PhysicalProject
+--------------------------filter(d_year IN (1998, 1999))
+----------------------------PhysicalOlapScan[date_dim]
--PhysicalResultSink
----PhysicalTopN[MERGE_SORT]
------PhysicalDistribute[DistributionSpecGather]
diff --git a/regression-test/data/shape_check/tpcds_sf10t_orc/shape/query75.out
b/regression-test/data/shape_check/tpcds_sf10t_orc/shape/query75.out
index b45f54f94a7..8bc4e87f63e 100644
--- a/regression-test/data/shape_check/tpcds_sf10t_orc/shape/query75.out
+++ b/regression-test/data/shape_check/tpcds_sf10t_orc/shape/query75.out
@@ -6,65 +6,64 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
------PhysicalDistribute[DistributionSpecHash]
--------hashAgg[LOCAL]
----------hashAgg[GLOBAL]
-------------hashAgg[LOCAL]
---------------PhysicalUnion
-----------------hashAgg[GLOBAL]
-------------------PhysicalDistribute[DistributionSpecHash]
---------------------hashAgg[LOCAL]
-----------------------PhysicalProject
-------------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((date_dim.d_date_sk = catalog_sales.cs_sold_date_sk))
otherCondition=() build RFs:RF1 d_date_sk->[cs_sold_date_sk]
---------------------------PhysicalProject
-----------------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((item.i_item_sk = catalog_sales.cs_item_sk)) otherCondition=()
build RFs:RF0 i_item_sk->[cs_item_sk]
-------------------------------PhysicalProject
---------------------------------hashJoin[LEFT_OUTER_JOIN shuffle]
hashCondition=((catalog_sales.cs_item_sk = catalog_returns.cr_item_sk) and
(catalog_sales.cs_order_number = catalog_returns.cr_order_number))
otherCondition=()
-----------------------------------PhysicalProject
-------------------------------------PhysicalOlapScan[catalog_sales] apply RFs:
RF0 RF1
-----------------------------------PhysicalProject
-------------------------------------PhysicalOlapScan[catalog_returns]
-------------------------------PhysicalProject
---------------------------------filter((item.i_category = 'Sports'))
-----------------------------------PhysicalOlapScan[item]
---------------------------PhysicalProject
-----------------------------filter(d_year IN (2000, 2001))
-------------------------------PhysicalOlapScan[date_dim]
-----------------hashAgg[GLOBAL]
-------------------PhysicalDistribute[DistributionSpecHash]
---------------------hashAgg[LOCAL]
-----------------------PhysicalProject
-------------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((date_dim.d_date_sk = store_sales.ss_sold_date_sk))
otherCondition=() build RFs:RF3 d_date_sk->[ss_sold_date_sk]
---------------------------PhysicalProject
-----------------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((item.i_item_sk = store_sales.ss_item_sk)) otherCondition=()
build RFs:RF2 i_item_sk->[ss_item_sk]
-------------------------------PhysicalProject
---------------------------------hashJoin[LEFT_OUTER_JOIN shuffle]
hashCondition=((store_sales.ss_item_sk = store_returns.sr_item_sk) and
(store_sales.ss_ticket_number = store_returns.sr_ticket_number))
otherCondition=()
-----------------------------------PhysicalProject
-------------------------------------PhysicalOlapScan[store_sales] apply RFs:
RF2 RF3
-----------------------------------PhysicalProject
-------------------------------------PhysicalOlapScan[store_returns]
-------------------------------PhysicalProject
---------------------------------filter((item.i_category = 'Sports'))
-----------------------------------PhysicalOlapScan[item]
---------------------------PhysicalProject
-----------------------------filter(d_year IN (2000, 2001))
-------------------------------PhysicalOlapScan[date_dim]
-----------------hashAgg[GLOBAL]
-------------------PhysicalDistribute[DistributionSpecHash]
---------------------hashAgg[LOCAL]
-----------------------PhysicalProject
-------------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((date_dim.d_date_sk = web_sales.ws_sold_date_sk))
otherCondition=() build RFs:RF5 d_date_sk->[ws_sold_date_sk]
---------------------------PhysicalProject
-----------------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((item.i_item_sk = web_sales.ws_item_sk)) otherCondition=() build
RFs:RF4 i_item_sk->[ws_item_sk]
-------------------------------PhysicalProject
---------------------------------hashJoin[LEFT_OUTER_JOIN shuffle]
hashCondition=((web_sales.ws_item_sk = web_returns.wr_item_sk) and
(web_sales.ws_order_number = web_returns.wr_order_number)) otherCondition=()
-----------------------------------PhysicalProject
-------------------------------------PhysicalOlapScan[web_sales] apply RFs: RF4
RF5
-----------------------------------PhysicalProject
-------------------------------------PhysicalOlapScan[web_returns]
-------------------------------PhysicalProject
---------------------------------filter((item.i_category = 'Sports'))
-----------------------------------PhysicalOlapScan[item]
---------------------------PhysicalProject
-----------------------------filter(d_year IN (2000, 2001))
-------------------------------PhysicalOlapScan[date_dim]
+------------PhysicalUnion
+--------------hashAgg[GLOBAL]
+----------------PhysicalDistribute[DistributionSpecHash]
+------------------hashAgg[LOCAL]
+--------------------PhysicalProject
+----------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((date_dim.d_date_sk = catalog_sales.cs_sold_date_sk))
otherCondition=() build RFs:RF1 d_date_sk->[cs_sold_date_sk]
+------------------------PhysicalProject
+--------------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((item.i_item_sk = catalog_sales.cs_item_sk)) otherCondition=()
build RFs:RF0 i_item_sk->[cs_item_sk]
+----------------------------PhysicalProject
+------------------------------hashJoin[LEFT_OUTER_JOIN shuffle]
hashCondition=((catalog_sales.cs_item_sk = catalog_returns.cr_item_sk) and
(catalog_sales.cs_order_number = catalog_returns.cr_order_number))
otherCondition=()
+--------------------------------PhysicalProject
+----------------------------------PhysicalOlapScan[catalog_sales] apply RFs:
RF0 RF1
+--------------------------------PhysicalProject
+----------------------------------PhysicalOlapScan[catalog_returns]
+----------------------------PhysicalProject
+------------------------------filter((item.i_category = 'Sports'))
+--------------------------------PhysicalOlapScan[item]
+------------------------PhysicalProject
+--------------------------filter(d_year IN (2000, 2001))
+----------------------------PhysicalOlapScan[date_dim]
+--------------hashAgg[GLOBAL]
+----------------PhysicalDistribute[DistributionSpecHash]
+------------------hashAgg[LOCAL]
+--------------------PhysicalProject
+----------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((date_dim.d_date_sk = store_sales.ss_sold_date_sk))
otherCondition=() build RFs:RF3 d_date_sk->[ss_sold_date_sk]
+------------------------PhysicalProject
+--------------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((item.i_item_sk = store_sales.ss_item_sk)) otherCondition=()
build RFs:RF2 i_item_sk->[ss_item_sk]
+----------------------------PhysicalProject
+------------------------------hashJoin[LEFT_OUTER_JOIN shuffle]
hashCondition=((store_sales.ss_item_sk = store_returns.sr_item_sk) and
(store_sales.ss_ticket_number = store_returns.sr_ticket_number))
otherCondition=()
+--------------------------------PhysicalProject
+----------------------------------PhysicalOlapScan[store_sales] apply RFs: RF2
RF3
+--------------------------------PhysicalProject
+----------------------------------PhysicalOlapScan[store_returns]
+----------------------------PhysicalProject
+------------------------------filter((item.i_category = 'Sports'))
+--------------------------------PhysicalOlapScan[item]
+------------------------PhysicalProject
+--------------------------filter(d_year IN (2000, 2001))
+----------------------------PhysicalOlapScan[date_dim]
+--------------hashAgg[GLOBAL]
+----------------PhysicalDistribute[DistributionSpecHash]
+------------------hashAgg[LOCAL]
+--------------------PhysicalProject
+----------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((date_dim.d_date_sk = web_sales.ws_sold_date_sk))
otherCondition=() build RFs:RF5 d_date_sk->[ws_sold_date_sk]
+------------------------PhysicalProject
+--------------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((item.i_item_sk = web_sales.ws_item_sk)) otherCondition=() build
RFs:RF4 i_item_sk->[ws_item_sk]
+----------------------------PhysicalProject
+------------------------------hashJoin[LEFT_OUTER_JOIN shuffle]
hashCondition=((web_sales.ws_item_sk = web_returns.wr_item_sk) and
(web_sales.ws_order_number = web_returns.wr_order_number)) otherCondition=()
+--------------------------------PhysicalProject
+----------------------------------PhysicalOlapScan[web_sales] apply RFs: RF4
RF5
+--------------------------------PhysicalProject
+----------------------------------PhysicalOlapScan[web_returns]
+----------------------------PhysicalProject
+------------------------------filter((item.i_category = 'Sports'))
+--------------------------------PhysicalOlapScan[item]
+------------------------PhysicalProject
+--------------------------filter(d_year IN (2000, 2001))
+----------------------------PhysicalOlapScan[date_dim]
--PhysicalResultSink
----PhysicalTopN[MERGE_SORT]
------PhysicalDistribute[DistributionSpecGather]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]