englefly commented on code in PR #48808:
URL: https://github.com/apache/doris/pull/48808#discussion_r1995065336


##########
fe/fe-core/src/main/java/org/apache/doris/nereids/stats/HboStatsCalculator.java:
##########
@@ -0,0 +1,109 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.nereids.stats;
+
+import org.apache.doris.catalog.Env;
+import org.apache.doris.nereids.CascadesContext;
+import org.apache.doris.nereids.memo.GroupExpression;
+import org.apache.doris.nereids.trees.expressions.CTEId;
+import org.apache.doris.nereids.trees.plans.AbstractPlan;
+import org.apache.doris.nereids.trees.plans.Plan;
+import org.apache.doris.nereids.trees.plans.PlanNodeAndHash;
+import org.apache.doris.nereids.trees.plans.algebra.Aggregate;
+import org.apache.doris.nereids.trees.plans.algebra.Filter;
+import org.apache.doris.nereids.trees.plans.algebra.Join;
+import org.apache.doris.statistics.ColumnStatistic;
+import org.apache.doris.statistics.Statistics;
+import org.apache.doris.statistics.hbo.PlanStatistics;
+import org.apache.doris.statistics.hbo.RecentRunsPlanStatistics;
+
+import java.util.Map;
+import java.util.Objects;
+
+/**
+ * StatsCalculator by using hbo plan stats. to do estimation.
+ */
+public class HboStatsCalculator extends StatsCalculator {
+    private final HboPlanStatisticsProvider hboPlanStatisticsProvider;
+
+    public HboStatsCalculator(GroupExpression groupExpression, boolean 
forbidUnknownColStats,
+            Map<String, ColumnStatistic> columnStatisticMap, boolean 
isPlayNereidsDump,
+            Map<CTEId, Statistics> cteIdToStats, CascadesContext context) {
+        super(groupExpression, forbidUnknownColStats, columnStatisticMap, 
isPlayNereidsDump,
+                cteIdToStats, context);
+        this.hboPlanStatisticsProvider = 
Objects.requireNonNull(Env.getCurrentEnv().getHboPlanStatisticsManager()
+                        .getHboPlanStatisticsProvider(), 
"HboPlanStatisticsProvider is null");
+    }
+
+    @Override
+    public void estimate() {
+        super.estimate();
+    }
+
+    /**
+     * NOTE: Can't override computeScan since the publishing side's plan hash 
of scan node
+     * use the scan's hbo string but embedding the filter info into the input 
table structure.
+     * if the matching logic here want to support filter node's hbo plan 
stats. reusing, it only needs
+     * to hook the computeFilter and use original scan's plan hash string, and 
also embedding the
+     * parent filter info into the scan node also.
+     * @param filter filter
+     * @return Statistics
+     */
+    @Override
+    protected Statistics computeFilter(Filter filter) {
+        Statistics legacyStats = super.computeFilter(filter);
+        boolean isLogicalFilterOnTs = 
HboUtils.isLogicalFilterOnLogicalScan(filter);
+        boolean isPhysicalFilterOnTs = 
HboUtils.isPhysicalFilterOnPhysicalScan(filter);
+        if (isLogicalFilterOnTs || isPhysicalFilterOnTs) {
+            AbstractPlan scanPlan = HboUtils.getScanUnderFilterNode(filter);
+            return getStatsFromHboPlanStats(scanPlan, legacyStats);

Review Comment:
   why getHboStats by scan, not filter?



##########
fe/fe-core/src/main/java/org/apache/doris/nereids/stats/HboUtils.java:
##########
@@ -0,0 +1,537 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.nereids.stats;
+
+import org.apache.doris.catalog.Env;
+import org.apache.doris.common.util.DebugUtil;
+import org.apache.doris.nereids.exceptions.AnalysisException;
+import org.apache.doris.nereids.trees.expressions.Expression;
+import org.apache.doris.nereids.trees.plans.AbstractPlan;
+import org.apache.doris.nereids.trees.plans.GroupPlan;
+import org.apache.doris.nereids.trees.plans.Plan;
+import org.apache.doris.nereids.trees.plans.PlanNodeAndHash;
+import org.apache.doris.nereids.trees.plans.RelationId;
+import org.apache.doris.nereids.trees.plans.algebra.Filter;
+import org.apache.doris.nereids.trees.plans.logical.AbstractLogicalPlan;
+import org.apache.doris.nereids.trees.plans.logical.LogicalFilter;
+import org.apache.doris.nereids.trees.plans.logical.LogicalOlapScan;
+import org.apache.doris.nereids.trees.plans.physical.AbstractPhysicalPlan;
+import org.apache.doris.nereids.trees.plans.physical.PhysicalFilter;
+import org.apache.doris.nereids.trees.plans.physical.PhysicalOlapScan;
+import org.apache.doris.nereids.trees.plans.physical.PhysicalPlan;
+import org.apache.doris.qe.ConnectContext;
+import org.apache.doris.statistics.hbo.InputTableStatisticsInfo;
+import org.apache.doris.statistics.hbo.PlanStatistics;
+import org.apache.doris.statistics.hbo.PlanStatisticsMatchStrategy;
+import org.apache.doris.statistics.hbo.PlanStatisticsWithInputInfo;
+import org.apache.doris.statistics.hbo.RecentRunsPlanStatistics;
+import org.apache.doris.statistics.hbo.RecentRunsPlanStatisticsEntry;
+import org.apache.doris.statistics.hbo.ScanPlanStatistics;
+import org.apache.doris.thrift.TPlanNodeRuntimeStatsItem;
+
+import com.google.common.collect.ImmutableList;
+import com.google.common.hash.Hashing;
+
+import java.nio.charset.StandardCharsets;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+import java.util.Set;
+
+/**
+ * Hbo utils.
+ */
+public class HboUtils {
+
+    /**
+     * Get accurate stats index
+     * @param recentRunsPlanStatistics recentRunsPlanStatistics
+     * @param inputTableStatistics inputTableStatistics
+     * @param rfSafeThreshold rfSafeThreshold
+     * @param isEnableHboNonStrictMatchingMode isEnableHboNonStrictMatchingMode
+     * @param strategy match strategy
+     * @return accurate stats index
+     */
+    public static Optional<Integer> getAccurateStatsIndex(
+            RecentRunsPlanStatistics recentRunsPlanStatistics,
+            List<PlanStatistics> inputTableStatistics, double rfSafeThreshold,
+            boolean isEnableHboNonStrictMatchingMode, 
PlanStatisticsMatchStrategy strategy) {
+        List<RecentRunsPlanStatisticsEntry> recentRunsStatistics = 
recentRunsPlanStatistics.getRecentRunsStatistics();
+        if (recentRunsStatistics.isEmpty()) {
+            return Optional.empty();
+        }
+
+        for (int recentRunsIndex = 0; recentRunsIndex < 
recentRunsStatistics.size(); ++recentRunsIndex) {
+            if (inputTableStatistics.size() != 
recentRunsStatistics.get(recentRunsIndex)
+                    .getInputTableStatistics().size()) {
+                continue;
+            }
+            boolean accurateMatch = true;
+            for (int inputTableIndex = 0; accurateMatch
+                    && inputTableIndex < inputTableStatistics.size(); 
++inputTableIndex) {
+                ScanPlanStatistics curInputStatistics = (ScanPlanStatistics) 
inputTableStatistics.get(inputTableIndex);
+                ScanPlanStatistics historicalInputStatistics =
+                        (ScanPlanStatistics) 
recentRunsStatistics.get(recentRunsIndex)
+                                
.getInputTableStatistics().get(inputTableIndex);
+                boolean isRFSafe = 
historicalInputStatistics.isRuntimeFilterSafeNode(rfSafeThreshold);
+                if (!isRFSafe) {
+                    accurateMatch = false;
+                } else if (!curInputStatistics.isPartitionedTable()
+                        && !historicalInputStatistics.isPartitionedTable()) {
+                    accurateMatch = 
canAccurateMatchForNonPartitionTable(curInputStatistics,
+                            historicalInputStatistics, strategy);
+                } else if (curInputStatistics.isPartitionedTable()
+                        && historicalInputStatistics.isPartitionedTable()) {
+                    // find the first full matching entry in recentRunEntries
+                    accurateMatch = 
canAccurateMatchForPartitionTable(curInputStatistics,
+                            historicalInputStatistics, 
isEnableHboNonStrictMatchingMode, strategy);
+                } else {
+                    throw new RuntimeException("unexpected state during hbo 
input table stats matching");
+                }
+            }
+            if (accurateMatch) {
+                return Optional.of(recentRunsIndex);
+            }
+        }
+        return Optional.empty();
+    }
+
+    /**
+     * Can accurate match for Non-Partition table
+     * @param currentInputStatistics currentInputStatistics
+     * @param recentRunsInputStatistics recentRunsInputStatistics
+     * @return can accurate match for Non-Partition table.
+     */
+    public static boolean 
canAccurateMatchForNonPartitionTable(ScanPlanStatistics currentInputStatistics,
+            ScanPlanStatistics recentRunsInputStatistics, 
PlanStatisticsMatchStrategy strategy) {
+        boolean hasSameOtherPredicate = 
currentInputStatistics.hasSameOtherPredicates(recentRunsInputStatistics);
+        if (strategy.equals(PlanStatisticsMatchStrategy.OTHER_ONLY_MATCH)
+                || strategy.equals(PlanStatisticsMatchStrategy.FULL_MATCH)
+                || 
strategy.equals(PlanStatisticsMatchStrategy.PARTITION_AND_OTHER_MATCH)) {
+            return hasSameOtherPredicate;
+        } else {
+            return false;
+        }
+    }
+
+    /**
+     * Can accurate match for partition table
+     * @param currentInputStatistics currentInputStatistics
+     * @param recentRunsInputStatistics historicalInputStatistics
+     * @param strategy strategy
+     * @return can accurate match for partition table
+     */
+    public static boolean canAccurateMatchForPartitionTable(ScanPlanStatistics 
currentInputStatistics,
+            ScanPlanStatistics recentRunsInputStatistics, boolean 
isEnableHboNonStrictMatchingMode,
+            PlanStatisticsMatchStrategy strategy) {
+        // For partition table, must ensure
+        // 1. the pruned partition is the same
+        // 2. partition column predicate is the exactly same(for single value 
partition it is not special,
+        // but for range partition, although the select partition id is the 
same, but the partition column
+        // filter may not be the same, which may have impact on the hbo cache 
matching)
+        // 3. the other predicate with the constant is the same
+        boolean hasSamePartition = 
currentInputStatistics.hasSamePartitionId(recentRunsInputStatistics);
+        boolean hasSamePartitionColumnPredicate = currentInputStatistics
+                .hasSamePartitionColumnPredicates(recentRunsInputStatistics);
+        boolean hasSameOtherPredicate = 
currentInputStatistics.hasSameOtherPredicates(recentRunsInputStatistics);
+        //boolean hasSimilarStats = true;
+        //hasSimilarStats = 
similarStats(currentInputStatistics.getOutputRows(),
+        // recentRunsInputStatistics.getOutputRows(), rowThreshold);
+        if (strategy.equals(PlanStatisticsMatchStrategy.FULL_MATCH)) {
+            return hasSamePartition && hasSamePartitionColumnPredicate && 
hasSameOtherPredicate;
+        } else if 
(strategy.equals(PlanStatisticsMatchStrategy.PARTITION_AND_OTHER_MATCH)) {
+            return hasSamePartition && hasSameOtherPredicate/* && 
hasSimilarStats*/;
+        } else if (isEnableHboNonStrictMatchingMode
+                && 
strategy.equals(PlanStatisticsMatchStrategy.PARTITION_ONLY_MATCH)) {
+            return hasSamePartition;
+        //} else if (needMatchSelectedPartition && 
needMatchPartitionColumnPredicate && !needMatchOtherPredicate) {
+        //    return hasSamePartition && hasSamePartitionColumnPredicate && 
hasSimilarStats;
+        //} else if (needMatchSelectedPartition && 
!needMatchPartitionColumnPredicate && !needMatchOtherPredicate) {
+        //    return hasSamePartition && hasSimilarStats;
+        } else {
+            return false;
+        }
+    }
+
+    /**
+     * Get similar stats index
+     * @param recentRunsPlanStatistics recentRunsPlanStatistics
+     * @param inputTableStatistics inputTableStatistics
+     * @param rowThreshold rowThreshold
+     * @param hboRfSafeThreshold hboRfSafeThreshold
+     * @return similar StatsIndex
+     */
+    public static Optional<Integer> getSimilarStatsIndex(
+            RecentRunsPlanStatistics recentRunsPlanStatistics,
+            List<PlanStatistics> inputTableStatistics,
+            double rowThreshold, double hboRfSafeThreshold) {
+        List<RecentRunsPlanStatisticsEntry> recentRunsStatistics = 
recentRunsPlanStatistics.getRecentRunsStatistics();
+        if (recentRunsStatistics.isEmpty()) {
+            return Optional.empty();
+        }
+
+        for (int recentRunsIndex = 0; recentRunsIndex < 
recentRunsStatistics.size(); ++recentRunsIndex) {
+            if (inputTableStatistics.size() != 
recentRunsStatistics.get(recentRunsIndex)
+                    .getInputTableStatistics().size()) {
+                continue;
+            }
+            boolean rowSimilarity = true;
+            for (int inputTablesIndex = 0; rowSimilarity
+                    && inputTablesIndex < inputTableStatistics.size(); 
++inputTablesIndex) {
+                PlanStatistics currentInputStatistics = 
inputTableStatistics.get(inputTablesIndex);
+                PlanStatistics historicalInputStatistics = 
recentRunsStatistics.get(recentRunsIndex)
+                        .getInputTableStatistics().get(inputTablesIndex);
+                // check if rf safe
+                boolean isRFSafe = 
historicalInputStatistics.isRuntimeFilterSafeNode(hboRfSafeThreshold);
+                if (!isRFSafe) {
+                    rowSimilarity = false;
+                } else {
+                    rowSimilarity = 
similarStats(currentInputStatistics.getOutputRows(),
+                            historicalInputStatistics.getOutputRows(), 
rowThreshold);
+                }
+            }
+            if (rowSimilarity) {
+                return Optional.of(recentRunsIndex);
+            }
+        }
+        return Optional.empty();
+    }
+
+    /**
+     * similarStats
+     * @param stats1 stats1
+     * @param stats2 stats2
+     * @param threshold threshold
+     * @return similar Stats
+     */
+    public static boolean similarStats(double stats1, double stats2, double 
threshold) {
+        if (Double.isNaN(stats1) && Double.isNaN(stats2)) {
+            return true;
+        }
+        return stats1 >= (1 - threshold) * stats2 && stats1 <= (1 + threshold) 
* stats2;
+    }
+
+    /**
+     * Collect full qualifier of scan with relation id info, for 
distinguishing the same table in the query.
+     * This is mainly for pattern matching of hbo, supporting different join 
orders, such as a join b and b join a
+     * matching the same hbo stats. info.
+     * @param planNode planNode
+     * @param scanQualifierList scan qualifier list
+     */
+    public static void collectScanQualifierList(AbstractPlan planNode, 
List<String> scanQualifierList) {
+        if (planNode instanceof LogicalOlapScan) {
+            scanQualifierList.add(((LogicalOlapScan) 
planNode).getQualifierWithRelationId());
+        } else if (planNode instanceof PhysicalOlapScan) {
+            scanQualifierList.add(((PhysicalOlapScan) 
planNode).getQualifierWithRelationId());
+        } else if (planNode instanceof GroupPlan
+                && !((GroupPlan) 
planNode).getGroup().getLogicalExpressions().isEmpty()
+                && ((GroupPlan) planNode).getGroup()
+                .getLogicalExpressions().get(0).getPlan() instanceof 
AbstractLogicalPlan) {
+            Plan logicalPlan = ((GroupPlan) 
planNode).getGroup().getLogicalExpressions().get(0).getPlan();
+            collectScanQualifierList((AbstractPlan) logicalPlan, 
scanQualifierList);
+        } else if (planNode instanceof GroupPlan
+                && ((GroupPlan) 
planNode).getGroup().getLogicalExpressions().isEmpty()
+                && !((GroupPlan) 
planNode).getGroup().getPhysicalExpressions().isEmpty()
+                && ((GroupPlan) planNode).getGroup()
+                .getPhysicalExpressions().get(0).getPlan() instanceof 
AbstractPhysicalPlan) {
+            Plan physicalPlan = ((GroupPlan) 
planNode).getGroup().getPhysicalExpressions().get(0).getPlan();
+            collectScanQualifierList((AbstractPlan) physicalPlan, 
scanQualifierList);
+        } else {
+            for (Object child : planNode.children()) {
+                collectScanQualifierList((AbstractPlan) child, 
scanQualifierList);
+            }
+        }
+    }
+
+    /**
+     * Get plan fingerprint hash value
+     * @param planFingerprint plan fingerprint
+     * @return plan fingerprint hash value
+     */
+    public static String getPlanFingerprintHash(String planFingerprint) {
+        return Hashing.sha256().hashString(planFingerprint, 
StandardCharsets.UTF_8).toString();
+    }
+
+    /**
+     * Get matched hbo plan stats. entry.
+     * @param recentHboPlanStatistics recentHboPlanStatistics
+     * @param inputTableStatistics inputTableStatistics
+     * @param hboRfSafeThreshold hboRfSafeThreshold
+     * @param isEnableHboNonStrictMatchingMode isEnableHboNonStrictMatchingMode
+     * @return matched hbo plan stats. entry.
+     */
+    public static Optional<RecentRunsPlanStatisticsEntry> 
getMatchedHboPlanStatisticsEntry(
+            RecentRunsPlanStatistics recentHboPlanStatistics,
+            List<PlanStatistics> inputTableStatistics,
+            double hboRfSafeThreshold,
+            boolean isEnableHboNonStrictMatchingMode) {
+        List<RecentRunsPlanStatisticsEntry> recentRunsStatistics = 
recentHboPlanStatistics.getRecentRunsStatistics();
+        if (recentRunsStatistics.isEmpty()) {
+            return Optional.empty();
+        }
+
+        // TODO: if only non-partition table exists, the following steps may 
be redundant.
+        // MATCH 1: PlanStatisticsMatchStrategy.FULL_MATCH
+        Optional<Integer> accurateStatsIndex = HboUtils.getAccurateStatsIndex(
+                recentHboPlanStatistics, inputTableStatistics, 
hboRfSafeThreshold,
+                isEnableHboNonStrictMatchingMode, 
PlanStatisticsMatchStrategy.FULL_MATCH);
+        if (accurateStatsIndex.isPresent()) {
+            return 
Optional.of(recentRunsStatistics.get(accurateStatsIndex.get()));
+        }
+
+        // MATCH 2: PlanStatisticsMatchStrategy.PARTITION_AND_OTHER_MATCH
+        Optional<Integer> accurateStatsMatchPartitionIdAndOtherPredicateIndex 
= HboUtils.getAccurateStatsIndex(
+                recentHboPlanStatistics, inputTableStatistics, 
hboRfSafeThreshold,
+                isEnableHboNonStrictMatchingMode, 
PlanStatisticsMatchStrategy.PARTITION_AND_OTHER_MATCH);
+        if (accurateStatsMatchPartitionIdAndOtherPredicateIndex.isPresent()) {
+            return 
Optional.of(recentRunsStatistics.get(accurateStatsMatchPartitionIdAndOtherPredicateIndex.get()));
+        }
+
+        // MATCH 3: PlanStatisticsMatchStrategy.PARTITION_ONLY_MATCH
+        if (isEnableHboNonStrictMatchingMode) {
+            Optional<Integer> accurateStatsOnlyMatchPartitionIdIndex
+                    = HboUtils.getAccurateStatsIndex(
+                    recentHboPlanStatistics, inputTableStatistics, 
hboRfSafeThreshold,
+                    true, PlanStatisticsMatchStrategy.PARTITION_ONLY_MATCH);
+            if (accurateStatsOnlyMatchPartitionIdIndex.isPresent()) {
+                return 
Optional.of(recentRunsStatistics.get(accurateStatsOnlyMatchPartitionIdIndex.get()));
+            }
+
+            // MATCH 4: TODO: this option is actually useless since the 
inputTableStatistics is not exactly
+            // the current input, but actually a mocked one with the 
non-current row count.
+            //Optional<Integer> similarStatsIndex = 
HboUtils.getSimilarStatsIndex(
+            //        recentHboPlanStatistics, inputTableStatistics, 
historyMatchingThreshold, hboRfSafeThreshold);
+            //if (similarStatsIndex.isPresent()) {
+            //    return 
Optional.of(recentRunsStatistics.get(similarStatsIndex.get()));
+            //}
+        }
+        return Optional.empty();
+    }
+
+    /**
+     * getPlanNodeHash
+     * @param planNode planNode
+     * @return planNode And Hash
+     */
+    public static PlanNodeAndHash getPlanNodeHash(AbstractPlan planNode) {
+        String planFingerprint;
+        String planHash;
+        if (planNode instanceof AbstractPhysicalPlan) {

Review Comment:
   why need this branch? logicalPan branch and physicalPlan branch are the same
   define AbstractPlan.getPlanTreeFingerprint() is better



##########
fe/fe-core/src/main/java/org/apache/doris/nereids/stats/HboStatsCalculator.java:
##########
@@ -0,0 +1,109 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.nereids.stats;
+
+import org.apache.doris.catalog.Env;
+import org.apache.doris.nereids.CascadesContext;
+import org.apache.doris.nereids.memo.GroupExpression;
+import org.apache.doris.nereids.trees.expressions.CTEId;
+import org.apache.doris.nereids.trees.plans.AbstractPlan;
+import org.apache.doris.nereids.trees.plans.Plan;
+import org.apache.doris.nereids.trees.plans.PlanNodeAndHash;
+import org.apache.doris.nereids.trees.plans.algebra.Aggregate;
+import org.apache.doris.nereids.trees.plans.algebra.Filter;
+import org.apache.doris.nereids.trees.plans.algebra.Join;
+import org.apache.doris.statistics.ColumnStatistic;
+import org.apache.doris.statistics.Statistics;
+import org.apache.doris.statistics.hbo.PlanStatistics;
+import org.apache.doris.statistics.hbo.RecentRunsPlanStatistics;
+
+import java.util.Map;
+import java.util.Objects;
+
+/**
+ * StatsCalculator by using hbo plan stats. to do estimation.
+ */
+public class HboStatsCalculator extends StatsCalculator {
+    private final HboPlanStatisticsProvider hboPlanStatisticsProvider;

Review Comment:
   how to solve skew problem?



##########
fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/AbstractPlan.java:
##########
@@ -111,6 +120,68 @@ public boolean canBind() {
         return true;
     }
 
+    public String getFingerprint() {
+        return "";
+    }
+
+    /**
+     * Get fingerprint of plan.
+     */
+    public String getPlanTreeFingerprint() {
+        StringBuilder builder = new StringBuilder();
+        builder.append(this.getFingerprint());
+        if (this.children().isEmpty()) {
+            return builder.toString();
+        } else {
+            List<Plan> mutableChildren = new ArrayList<>(children);
+            // the following sorting by scans depended on will increase the 
plan matching possibility
+            // during hbo plan matching stage, e.g, the final physical plan is 
encoded as 'a join b',
+            // but the group plan is 'b join a' which can be matched also.
+            // NOTE: it will increase the risk brought from rf, as above, the 
physical plan 'a join b'
+            // is with rf's potential influence which will not exactly the 
same as 'b join a',
+            // but when we ignore the join sides as above and want to increase 
the hbo plan stats.'s adaptability,
+            // it may bring the unsuitable matching and increase the 
dependence for the rf-safe checking.
+            Collections.sort(mutableChildren, new Comparator<Plan>() {
+                @Override
+                public int compare(Plan plan1, Plan plan2) {
+                    List<String> scanQualifierList1 = new ArrayList<>();
+                    List<String> scanQualifierList2 = new ArrayList<>();
+                    HboUtils.collectScanQualifierList((AbstractPlan) plan1, 
scanQualifierList1);
+                    HboUtils.collectScanQualifierList((AbstractPlan) plan2, 
scanQualifierList2);
+                    Collections.sort(scanQualifierList1);
+                    Collections.sort(scanQualifierList2);
+                    String qualifiedName1 = 
Utils.qualifiedName(scanQualifierList1, "");
+                    String qualifiedName2 = 
Utils.qualifiedName(scanQualifierList2, "");
+                    return qualifiedName1.compareTo(qualifiedName2);
+                }
+            });
+            for (Plan plan : mutableChildren) {
+                if (plan instanceof GroupPlan) {
+                    builder.append(((GroupPlan) plan).getFingerprint());
+                } else if (plan instanceof AbstractLogicalPlan) {
+                    builder.append(((AbstractPlan) 
plan).getPlanTreeFingerprint());
+                } else if (plan instanceof AbstractPhysicalPlan) {
+                    if (!isLocalAggPhysicalNode((AbstractPhysicalPlan) plan)) {

Review Comment:
   why skip AGG?
   "group by A" and "group by A, B, C" are different



##########
fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/AbstractPlan.java:
##########
@@ -111,6 +120,68 @@ public boolean canBind() {
         return true;
     }
 
+    public String getFingerprint() {
+        return "";
+    }
+
+    /**
+     * Get fingerprint of plan.
+     */
+    public String getPlanTreeFingerprint() {
+        StringBuilder builder = new StringBuilder();
+        builder.append(this.getFingerprint());
+        if (this.children().isEmpty()) {
+            return builder.toString();
+        } else {
+            List<Plan> mutableChildren = new ArrayList<>(children);
+            // the following sorting by scans depended on will increase the 
plan matching possibility
+            // during hbo plan matching stage, e.g, the final physical plan is 
encoded as 'a join b',
+            // but the group plan is 'b join a' which can be matched also.
+            // NOTE: it will increase the risk brought from rf, as above, the 
physical plan 'a join b'
+            // is with rf's potential influence which will not exactly the 
same as 'b join a',
+            // but when we ignore the join sides as above and want to increase 
the hbo plan stats.'s adaptability,
+            // it may bring the unsuitable matching and increase the 
dependence for the rf-safe checking.
+            Collections.sort(mutableChildren, new Comparator<Plan>() {
+                @Override
+                public int compare(Plan plan1, Plan plan2) {
+                    List<String> scanQualifierList1 = new ArrayList<>();
+                    List<String> scanQualifierList2 = new ArrayList<>();
+                    HboUtils.collectScanQualifierList((AbstractPlan) plan1, 
scanQualifierList1);
+                    HboUtils.collectScanQualifierList((AbstractPlan) plan2, 
scanQualifierList2);
+                    Collections.sort(scanQualifierList1);
+                    Collections.sort(scanQualifierList2);
+                    String qualifiedName1 = 
Utils.qualifiedName(scanQualifierList1, "");
+                    String qualifiedName2 = 
Utils.qualifiedName(scanQualifierList2, "");
+                    return qualifiedName1.compareTo(qualifiedName2);
+                }
+            });
+            for (Plan plan : mutableChildren) {
+                if (plan instanceof GroupPlan) {
+                    builder.append(((GroupPlan) plan).getFingerprint());
+                } else if (plan instanceof AbstractLogicalPlan) {
+                    builder.append(((AbstractPlan) 
plan).getPlanTreeFingerprint());
+                } else if (plan instanceof AbstractPhysicalPlan) {
+                    if (!isLocalAggPhysicalNode((AbstractPhysicalPlan) plan)) {
+                        builder.append(((AbstractPlan) 
plan).getPlanTreeFingerprint());
+                    } else {
+                        builder.append(((AbstractPlan) 
plan.child(0)).getPlanTreeFingerprint());
+                    }
+                } else {
+                    throw new AnalysisException("illegal plan type 
getPlanTreeFingerprint");
+                }
+            }
+            return builder.toString();
+        }
+    }
+
+    public static boolean isLocalAggPhysicalNode(AbstractPhysicalPlan plan) {
+        if (plan instanceof PhysicalHashAggregate && 
((PhysicalHashAggregate<?>) plan).getAggPhase().isLocal()) {

Review Comment:
   how about 3-phase and 4-phase agg?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to