This is an automated email from the ASF dual-hosted git repository.

englefly pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 76620c21aa [improvement](nereids) prune hash join output slot ids list 
(#20789)
76620c21aa is described below

commit 76620c21aa5a0086565ba5c52ad78f733583356d
Author: xzj7019 <131111794+xzj7...@users.noreply.github.com>
AuthorDate: Wed Jun 28 17:28:18 2023 +0800

    [improvement](nereids) prune hash join output slot ids list (#20789)
    
    1. prune hash join output slot ids list based on slot ids in required 
project and other conjunctions, to reduce the be side effort.
    2. support pruning for semi/anti also
---
 .../glue/translator/PhysicalPlanTranslator.java    | 57 +++++++++++++++++-----
 .../org/apache/doris/planner/HashJoinNode.java     | 21 +++++++-
 2 files changed, 65 insertions(+), 13 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java
index 1f504af02f..4aec102644 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java
@@ -1067,6 +1067,10 @@ public class PhysicalPlanTranslator extends 
DefaultPlanVisitor<PlanFragment, Pla
                     sd = 
context.getDescTable().copySlotDescriptor(intermediateDescriptor, 
leftSlotDescriptor);
                 } else {
                     sd = context.createSlotDesc(intermediateDescriptor, sf);
+                    if (hashOutputSlotReferenceMap.get(sf.getExprId()) != 
null) {
+                        
hashJoinNode.addSlotIdToHashOutputSlotIds(leftSlotDescriptor.getId());
+                        
hashJoinNode.getHashOutputExprSlotIdMap().put(sf.getExprId(), 
leftSlotDescriptor.getId());
+                    }
                 }
                 leftIntermediateSlotDescriptor.add(sd);
             }
@@ -1083,6 +1087,10 @@ public class PhysicalPlanTranslator extends 
DefaultPlanVisitor<PlanFragment, Pla
                     sd = 
context.getDescTable().copySlotDescriptor(intermediateDescriptor, 
rightSlotDescriptor);
                 } else {
                     sd = context.createSlotDesc(intermediateDescriptor, sf);
+                    if (hashOutputSlotReferenceMap.get(sf.getExprId()) != 
null) {
+                        
hashJoinNode.addSlotIdToHashOutputSlotIds(rightSlotDescriptor.getId());
+                        
hashJoinNode.getHashOutputExprSlotIdMap().put(sf.getExprId(), 
rightSlotDescriptor.getId());
+                    }
                 }
                 rightIntermediateSlotDescriptor.add(sd);
             }
@@ -1100,6 +1108,7 @@ public class PhysicalPlanTranslator extends 
DefaultPlanVisitor<PlanFragment, Pla
                     sd = context.createSlotDesc(intermediateDescriptor, sf);
                     if (hashOutputSlotReferenceMap.get(sf.getExprId()) != 
null) {
                         
hashJoinNode.addSlotIdToHashOutputSlotIds(leftSlotDescriptor.getId());
+                        
hashJoinNode.getHashOutputExprSlotIdMap().put(sf.getExprId(), 
leftSlotDescriptor.getId());
                     }
                 }
                 leftIntermediateSlotDescriptor.add(sd);
@@ -1117,6 +1126,7 @@ public class PhysicalPlanTranslator extends 
DefaultPlanVisitor<PlanFragment, Pla
                     sd = context.createSlotDesc(intermediateDescriptor, sf);
                     if (hashOutputSlotReferenceMap.get(sf.getExprId()) != 
null) {
                         
hashJoinNode.addSlotIdToHashOutputSlotIds(rightSlotDescriptor.getId());
+                        
hashJoinNode.getHashOutputExprSlotIdMap().put(sf.getExprId(), 
rightSlotDescriptor.getId());
                     }
                 }
                 rightIntermediateSlotDescriptor.add(sd);
@@ -1124,8 +1134,15 @@ public class PhysicalPlanTranslator extends 
DefaultPlanVisitor<PlanFragment, Pla
         }
 
         if (hashJoin.getMarkJoinSlotReference().isPresent()) {
-            
outputSlotReferences.add(hashJoin.getMarkJoinSlotReference().get());
-            context.createSlotDesc(intermediateDescriptor, 
hashJoin.getMarkJoinSlotReference().get());
+            SlotReference sf = hashJoin.getMarkJoinSlotReference().get();
+            outputSlotReferences.add(sf);
+            context.createSlotDesc(intermediateDescriptor, sf);
+            if (hashOutputSlotReferenceMap.get(sf.getExprId()) != null) {
+                SlotRef markJoinSlotId = context.findSlotRef(sf.getExprId());
+                Preconditions.checkState(markJoinSlotId != null);
+                
hashJoinNode.addSlotIdToHashOutputSlotIds(markJoinSlotId.getSlotId());
+                hashJoinNode.getHashOutputExprSlotIdMap().put(sf.getExprId(), 
markJoinSlotId.getSlotId());
+            }
         }
 
         // set slots as nullable for outer join
@@ -1390,6 +1407,15 @@ public class PhysicalPlanTranslator extends 
DefaultPlanVisitor<PlanFragment, Pla
                 .map(NamedExpression::toSlot)
                 .collect(Collectors.toList());
 
+        List<Expr> predicateList = inputPlanNode.getConjuncts();
+        Set<SlotId> requiredSlotIdSet = Sets.newHashSet();
+        for (Expr expr : execExprList) {
+            extractExecSlot(expr, requiredSlotIdSet);
+        }
+        Set<SlotId> requiredByProjectSlotIdSet = 
Sets.newHashSet(requiredSlotIdSet);
+        for (Expr expr : predicateList) {
+            extractExecSlot(expr, requiredSlotIdSet);
+        }
         // For hash join node, use vSrcToOutputSMap to describe the expression 
calculation, use
         // vIntermediateTupleDescList as input, and set vOutputTupleDesc as 
the final output.
         // TODO: HashJoinNode's be implementation is not support projection 
yet, remove this after when supported.
@@ -1398,17 +1424,26 @@ public class PhysicalPlanTranslator extends 
DefaultPlanVisitor<PlanFragment, Pla
             JoinNodeBase hashJoinNode = (JoinNodeBase) inputPlanNode;
             hashJoinNode.setvOutputTupleDesc(tupleDescriptor);
             hashJoinNode.setvSrcToOutputSMap(execExprList);
+            // prune the hashOutputSlotIds
+            if (hashJoinNode instanceof HashJoinNode) {
+                ((HashJoinNode) hashJoinNode).getHashOutputSlotIds().clear();
+                Set<ExprId> requiredExprIds = Sets.newHashSet();
+                Set<SlotId> requiredOtherConjunctsSlotIdSet = 
Sets.newHashSet();
+                List<Expr> otherConjuncts = ((HashJoinNode) 
hashJoinNode).getOtherJoinConjuncts();
+                for (Expr expr : otherConjuncts) {
+                    extractExecSlot(expr, requiredOtherConjunctsSlotIdSet);
+                }
+                requiredOtherConjunctsSlotIdSet.forEach(e -> 
requiredExprIds.add(context.findExprId(e)));
+                requiredSlotIdSet.forEach(e -> 
requiredExprIds.add(context.findExprId(e)));
+                for (ExprId exprId : requiredExprIds) {
+                    SlotId slotId = ((HashJoinNode) 
hashJoinNode).getHashOutputExprSlotIdMap().get(exprId);
+                    Preconditions.checkState(slotId != null);
+                    ((HashJoinNode) 
hashJoinNode).addSlotIdToHashOutputSlotIds(slotId);
+                }
+            }
             return inputFragment;
         }
-        List<Expr> predicateList = inputPlanNode.getConjuncts();
-        Set<SlotId> requiredSlotIdSet = Sets.newHashSet();
-        for (Expr expr : execExprList) {
-            extractExecSlot(expr, requiredSlotIdSet);
-        }
-        Set<SlotId> requiredByProjectSlotIdSet = 
Sets.newHashSet(requiredSlotIdSet);
-        for (Expr expr : predicateList) {
-            extractExecSlot(expr, requiredSlotIdSet);
-        }
+
         if (inputPlanNode instanceof TableFunctionNode) {
             TableFunctionNode tableFunctionNode = (TableFunctionNode) 
inputPlanNode;
             
tableFunctionNode.setOutputSlotIds(Lists.newArrayList(requiredSlotIdSet));
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/planner/HashJoinNode.java 
b/fe/fe-core/src/main/java/org/apache/doris/planner/HashJoinNode.java
index 1f816db3df..02418fc01a 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/HashJoinNode.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/HashJoinNode.java
@@ -37,6 +37,7 @@ import org.apache.doris.catalog.TableIf;
 import org.apache.doris.common.CheckedMath;
 import org.apache.doris.common.Pair;
 import org.apache.doris.common.UserException;
+import org.apache.doris.nereids.trees.expressions.ExprId;
 import org.apache.doris.statistics.StatisticalType;
 import org.apache.doris.thrift.TEqJoinCondition;
 import org.apache.doris.thrift.TExplainLevel;
@@ -47,12 +48,14 @@ import org.apache.doris.thrift.TPlanNodeType;
 import com.google.common.base.MoreObjects;
 import com.google.common.base.Preconditions;
 import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
 import com.google.common.collect.Sets;
 import org.apache.logging.log4j.LogManager;
 import org.apache.logging.log4j.Logger;
 
 import java.util.ArrayList;
 import java.util.Collections;
+import java.util.HashSet;
 import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;
@@ -76,7 +79,9 @@ public class HashJoinNode extends JoinNodeBase {
     private boolean isColocate = false; //the flag for colocate join
     private String colocateReason = ""; // if can not do colocate join, set 
reason here
 
-    private List<SlotId> hashOutputSlotIds = new ArrayList<>(); //init for 
nereids
+    private Set<SlotId> hashOutputSlotIds = Sets.newHashSet(); //init for 
nereids
+
+    private Map<ExprId, SlotId> hashOutputExprSlotIdMap = Maps.newHashMap();
 
     /**
      * Constructor of HashJoinNode.
@@ -230,7 +235,15 @@ public class HashJoinNode extends JoinNodeBase {
         Expr.getIds(otherJoinConjuncts, null, otherAndConjunctSlotIds);
         Expr.getIds(conjuncts, null, otherAndConjunctSlotIds);
         hashOutputSlotIdSet.addAll(otherAndConjunctSlotIds);
-        hashOutputSlotIds = new ArrayList<>(hashOutputSlotIdSet);
+        hashOutputSlotIds = new HashSet<>(hashOutputSlotIdSet);
+    }
+
+    public Map<ExprId, SlotId> getHashOutputExprSlotIdMap() {
+        return hashOutputExprSlotIdMap;
+    }
+
+    public Set<SlotId> getHashOutputSlotIds() {
+        return hashOutputSlotIds;
     }
 
     @Override
@@ -808,6 +821,10 @@ public class HashJoinNode extends JoinNodeBase {
         this.otherJoinConjuncts = otherJoinConjuncts;
     }
 
+    public List<Expr> getOtherJoinConjuncts() {
+        return otherJoinConjuncts;
+    }
+
     SlotRef getMappedInputSlotRef(SlotRef slotRef) {
         if (outputSmap != null) {
             Expr mappedExpr = outputSmap.mappingForRhsExpr(slotRef);


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to