Kikyou1997 commented on code in PR #11179:
URL: https://github.com/apache/doris/pull/11179#discussion_r929637321


##########
fe/fe-core/src/main/java/org/apache/doris/nereids/stats/HashJoinEstimation.java:
##########
@@ -0,0 +1,155 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.nereids.stats;
+
+import org.apache.doris.common.CheckedMath;
+import org.apache.doris.nereids.trees.expressions.Expression;
+import org.apache.doris.nereids.trees.expressions.Slot;
+import org.apache.doris.nereids.trees.expressions.SlotReference;
+import org.apache.doris.nereids.trees.plans.JoinType;
+import org.apache.doris.nereids.util.ExpressionUtils;
+import org.apache.doris.statistics.ColumnStats;
+import org.apache.doris.statistics.StatsDeriveResult;
+
+import com.google.common.base.Preconditions;
+
+import java.util.List;
+import java.util.Map;
+
+/**
+ * Estimate hash join stats.
+ * TODO: Update other props in the ColumnStats properly.
+ */
+public class HashJoinEstimation {
+
+    /**
+     * Do estimate.
+     */
+    public static StatsDeriveResult estimate(StatsDeriveResult leftStats, 
StatsDeriveResult rightStats,
+            Expression eqCondition, JoinType joinType) {
+        StatsDeriveResult statsDeriveResult = new StatsDeriveResult(leftStats);
+        statsDeriveResult.merge(rightStats);
+        List<Expression> eqConjunctList = 
ExpressionUtils.extractConjunct(eqCondition);
+        long rowCount = -1;
+        if (joinType.isSemiOrAntiJoin()) {
+            rowCount = getSemiJoinRowCount(leftStats, rightStats, 
eqConjunctList, joinType);
+        } else if (joinType.isInnerJoin() || joinType.isOuterJoin()) {
+            rowCount = getJoinRowCount(leftStats, rightStats, eqConjunctList, 
joinType);
+        } else {
+            throw new RuntimeException("joinType is not supported");
+        }
+        statsDeriveResult.setRowCount(rowCount);
+        return statsDeriveResult;
+    }
+
+    private static long getSemiJoinRowCount(StatsDeriveResult leftStats, 
StatsDeriveResult rightStats,
+            List<Expression> eqConjunctList, JoinType joinType) {
+        long rowCount;
+        if (JoinType.RIGHT_SEMI_JOIN.equals(joinType) || 
JoinType.RIGHT_ANTI_JOIN.equals(joinType)) {
+            if (rightStats.getRowCount() == -1) {
+                return -1;
+            }
+            rowCount = rightStats.getRowCount();
+        } else {
+            if (leftStats.getRowCount() == -1) {
+                return -1;
+            }
+            rowCount = leftStats.getRowCount();
+        }
+        Map<Slot, ColumnStats> leftSlotToColStats = 
leftStats.getSlotRefToColumnStatsMap();
+        Map<Slot, ColumnStats> rightSlotToColStats = 
rightStats.getSlotRefToColumnStatsMap();
+        double minSelectivity = 1.0;
+        for (Expression eqJoinPredicate : eqConjunctList) {
+            long lhsNdv = 
leftSlotToColStats.get(eqJoinPredicate.child(0)).getNdv();
+            lhsNdv = Math.min(lhsNdv, leftStats.getRowCount());
+            long rhsNdv = 
rightSlotToColStats.get(eqJoinPredicate.child(1)).getNdv();
+            rhsNdv = Math.min(rhsNdv, rightStats.getRowCount());
+            // Skip conjuncts with unknown NDV on either side.
+            if (lhsNdv == -1 || rhsNdv == -1) {
+                continue;
+            }
+            // TODO: Do we need NULL_AWARE_LEFT_ANTI_JOIN type as stale 
optimizer?
+            double selectivity = 1.0;
+            switch (joinType) {
+                case LEFT_SEMI_JOIN: {
+                    selectivity = (double) Math.min(lhsNdv, rhsNdv) / (double) 
(lhsNdv);
+                    break;
+                }
+                case RIGHT_SEMI_JOIN: {
+                    selectivity = (double) Math.min(lhsNdv, rhsNdv) / (double) 
(rhsNdv);
+                    break;
+                }
+                case LEFT_ANTI_JOIN:
+                    selectivity = (double) (lhsNdv > rhsNdv ? (lhsNdv - 
rhsNdv) : lhsNdv) / (double) lhsNdv;
+                    break;
+                case RIGHT_ANTI_JOIN: {
+                    selectivity = (double) (rhsNdv > lhsNdv ? (rhsNdv - 
lhsNdv) : rhsNdv) / (double) rhsNdv;
+                    break;
+                }
+                default:
+                    throw new RuntimeException("joinType is not supported");
+            }
+            minSelectivity = Math.min(minSelectivity, selectivity);
+        }
+        Preconditions.checkState(rowCount != -1);
+        return Math.round(rowCount * minSelectivity);
+    }
+
+    private static long getJoinRowCount(StatsDeriveResult leftStats, 
StatsDeriveResult rightStats,
+            List<Expression> eqConjunctList, JoinType joinType) {
+        long lhsCard = leftStats.getRowCount();
+        long rhsCard = rightStats.getRowCount();
+        Map<Slot, ColumnStats> leftSlotToColumnStats = 
leftStats.getSlotRefToColumnStatsMap();
+        Map<Slot, ColumnStats> rightSlotToColumnStats = 
rightStats.getSlotRefToColumnStatsMap();
+        if (lhsCard == -1 || rhsCard == -1) {
+            return lhsCard;
+        }
+        long result = -1;
+        for (Expression eqJoinConjunct : eqConjunctList) {
+            Expression left = eqJoinConjunct.child(0);
+            if (!(left instanceof SlotReference)) {

Review Comment:
   Will support such case in the future, I guess there doesn't have such join 
condition in tpch



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to