This is an automated email from the ASF dual-hosted git repository. englefly pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 76620c21aa [improvement](nereids) prune hash join output slot ids list (#20789) 76620c21aa is described below commit 76620c21aa5a0086565ba5c52ad78f733583356d Author: xzj7019 <131111794+xzj7...@users.noreply.github.com> AuthorDate: Wed Jun 28 17:28:18 2023 +0800 [improvement](nereids) prune hash join output slot ids list (#20789) 1. prune hash join output slot ids list based on slot ids in required project and other conjunctions, to reduce the be side effort. 2. support pruning for semi/anti also --- .../glue/translator/PhysicalPlanTranslator.java | 57 +++++++++++++++++----- .../org/apache/doris/planner/HashJoinNode.java | 21 +++++++- 2 files changed, 65 insertions(+), 13 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java index 1f504af02f..4aec102644 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java @@ -1067,6 +1067,10 @@ public class PhysicalPlanTranslator extends DefaultPlanVisitor<PlanFragment, Pla sd = context.getDescTable().copySlotDescriptor(intermediateDescriptor, leftSlotDescriptor); } else { sd = context.createSlotDesc(intermediateDescriptor, sf); + if (hashOutputSlotReferenceMap.get(sf.getExprId()) != null) { + hashJoinNode.addSlotIdToHashOutputSlotIds(leftSlotDescriptor.getId()); + hashJoinNode.getHashOutputExprSlotIdMap().put(sf.getExprId(), leftSlotDescriptor.getId()); + } } leftIntermediateSlotDescriptor.add(sd); } @@ -1083,6 +1087,10 @@ public class PhysicalPlanTranslator extends DefaultPlanVisitor<PlanFragment, Pla sd = context.getDescTable().copySlotDescriptor(intermediateDescriptor, rightSlotDescriptor); } else { sd = context.createSlotDesc(intermediateDescriptor, sf); + if (hashOutputSlotReferenceMap.get(sf.getExprId()) != null) { + hashJoinNode.addSlotIdToHashOutputSlotIds(rightSlotDescriptor.getId()); + hashJoinNode.getHashOutputExprSlotIdMap().put(sf.getExprId(), rightSlotDescriptor.getId()); + } } rightIntermediateSlotDescriptor.add(sd); } @@ -1100,6 +1108,7 @@ public class PhysicalPlanTranslator extends DefaultPlanVisitor<PlanFragment, Pla sd = context.createSlotDesc(intermediateDescriptor, sf); if (hashOutputSlotReferenceMap.get(sf.getExprId()) != null) { hashJoinNode.addSlotIdToHashOutputSlotIds(leftSlotDescriptor.getId()); + hashJoinNode.getHashOutputExprSlotIdMap().put(sf.getExprId(), leftSlotDescriptor.getId()); } } leftIntermediateSlotDescriptor.add(sd); @@ -1117,6 +1126,7 @@ public class PhysicalPlanTranslator extends DefaultPlanVisitor<PlanFragment, Pla sd = context.createSlotDesc(intermediateDescriptor, sf); if (hashOutputSlotReferenceMap.get(sf.getExprId()) != null) { hashJoinNode.addSlotIdToHashOutputSlotIds(rightSlotDescriptor.getId()); + hashJoinNode.getHashOutputExprSlotIdMap().put(sf.getExprId(), rightSlotDescriptor.getId()); } } rightIntermediateSlotDescriptor.add(sd); @@ -1124,8 +1134,15 @@ public class PhysicalPlanTranslator extends DefaultPlanVisitor<PlanFragment, Pla } if (hashJoin.getMarkJoinSlotReference().isPresent()) { - outputSlotReferences.add(hashJoin.getMarkJoinSlotReference().get()); - context.createSlotDesc(intermediateDescriptor, hashJoin.getMarkJoinSlotReference().get()); + SlotReference sf = hashJoin.getMarkJoinSlotReference().get(); + outputSlotReferences.add(sf); + context.createSlotDesc(intermediateDescriptor, sf); + if (hashOutputSlotReferenceMap.get(sf.getExprId()) != null) { + SlotRef markJoinSlotId = context.findSlotRef(sf.getExprId()); + Preconditions.checkState(markJoinSlotId != null); + hashJoinNode.addSlotIdToHashOutputSlotIds(markJoinSlotId.getSlotId()); + hashJoinNode.getHashOutputExprSlotIdMap().put(sf.getExprId(), markJoinSlotId.getSlotId()); + } } // set slots as nullable for outer join @@ -1390,6 +1407,15 @@ public class PhysicalPlanTranslator extends DefaultPlanVisitor<PlanFragment, Pla .map(NamedExpression::toSlot) .collect(Collectors.toList()); + List<Expr> predicateList = inputPlanNode.getConjuncts(); + Set<SlotId> requiredSlotIdSet = Sets.newHashSet(); + for (Expr expr : execExprList) { + extractExecSlot(expr, requiredSlotIdSet); + } + Set<SlotId> requiredByProjectSlotIdSet = Sets.newHashSet(requiredSlotIdSet); + for (Expr expr : predicateList) { + extractExecSlot(expr, requiredSlotIdSet); + } // For hash join node, use vSrcToOutputSMap to describe the expression calculation, use // vIntermediateTupleDescList as input, and set vOutputTupleDesc as the final output. // TODO: HashJoinNode's be implementation is not support projection yet, remove this after when supported. @@ -1398,17 +1424,26 @@ public class PhysicalPlanTranslator extends DefaultPlanVisitor<PlanFragment, Pla JoinNodeBase hashJoinNode = (JoinNodeBase) inputPlanNode; hashJoinNode.setvOutputTupleDesc(tupleDescriptor); hashJoinNode.setvSrcToOutputSMap(execExprList); + // prune the hashOutputSlotIds + if (hashJoinNode instanceof HashJoinNode) { + ((HashJoinNode) hashJoinNode).getHashOutputSlotIds().clear(); + Set<ExprId> requiredExprIds = Sets.newHashSet(); + Set<SlotId> requiredOtherConjunctsSlotIdSet = Sets.newHashSet(); + List<Expr> otherConjuncts = ((HashJoinNode) hashJoinNode).getOtherJoinConjuncts(); + for (Expr expr : otherConjuncts) { + extractExecSlot(expr, requiredOtherConjunctsSlotIdSet); + } + requiredOtherConjunctsSlotIdSet.forEach(e -> requiredExprIds.add(context.findExprId(e))); + requiredSlotIdSet.forEach(e -> requiredExprIds.add(context.findExprId(e))); + for (ExprId exprId : requiredExprIds) { + SlotId slotId = ((HashJoinNode) hashJoinNode).getHashOutputExprSlotIdMap().get(exprId); + Preconditions.checkState(slotId != null); + ((HashJoinNode) hashJoinNode).addSlotIdToHashOutputSlotIds(slotId); + } + } return inputFragment; } - List<Expr> predicateList = inputPlanNode.getConjuncts(); - Set<SlotId> requiredSlotIdSet = Sets.newHashSet(); - for (Expr expr : execExprList) { - extractExecSlot(expr, requiredSlotIdSet); - } - Set<SlotId> requiredByProjectSlotIdSet = Sets.newHashSet(requiredSlotIdSet); - for (Expr expr : predicateList) { - extractExecSlot(expr, requiredSlotIdSet); - } + if (inputPlanNode instanceof TableFunctionNode) { TableFunctionNode tableFunctionNode = (TableFunctionNode) inputPlanNode; tableFunctionNode.setOutputSlotIds(Lists.newArrayList(requiredSlotIdSet)); diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/HashJoinNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/HashJoinNode.java index 1f816db3df..02418fc01a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/HashJoinNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/HashJoinNode.java @@ -37,6 +37,7 @@ import org.apache.doris.catalog.TableIf; import org.apache.doris.common.CheckedMath; import org.apache.doris.common.Pair; import org.apache.doris.common.UserException; +import org.apache.doris.nereids.trees.expressions.ExprId; import org.apache.doris.statistics.StatisticalType; import org.apache.doris.thrift.TEqJoinCondition; import org.apache.doris.thrift.TExplainLevel; @@ -47,12 +48,14 @@ import org.apache.doris.thrift.TPlanNodeType; import com.google.common.base.MoreObjects; import com.google.common.base.Preconditions; import com.google.common.collect.Lists; +import com.google.common.collect.Maps; import com.google.common.collect.Sets; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import java.util.ArrayList; import java.util.Collections; +import java.util.HashSet; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; @@ -76,7 +79,9 @@ public class HashJoinNode extends JoinNodeBase { private boolean isColocate = false; //the flag for colocate join private String colocateReason = ""; // if can not do colocate join, set reason here - private List<SlotId> hashOutputSlotIds = new ArrayList<>(); //init for nereids + private Set<SlotId> hashOutputSlotIds = Sets.newHashSet(); //init for nereids + + private Map<ExprId, SlotId> hashOutputExprSlotIdMap = Maps.newHashMap(); /** * Constructor of HashJoinNode. @@ -230,7 +235,15 @@ public class HashJoinNode extends JoinNodeBase { Expr.getIds(otherJoinConjuncts, null, otherAndConjunctSlotIds); Expr.getIds(conjuncts, null, otherAndConjunctSlotIds); hashOutputSlotIdSet.addAll(otherAndConjunctSlotIds); - hashOutputSlotIds = new ArrayList<>(hashOutputSlotIdSet); + hashOutputSlotIds = new HashSet<>(hashOutputSlotIdSet); + } + + public Map<ExprId, SlotId> getHashOutputExprSlotIdMap() { + return hashOutputExprSlotIdMap; + } + + public Set<SlotId> getHashOutputSlotIds() { + return hashOutputSlotIds; } @Override @@ -808,6 +821,10 @@ public class HashJoinNode extends JoinNodeBase { this.otherJoinConjuncts = otherJoinConjuncts; } + public List<Expr> getOtherJoinConjuncts() { + return otherJoinConjuncts; + } + SlotRef getMappedInputSlotRef(SlotRef slotRef) { if (outputSmap != null) { Expr mappedExpr = outputSmap.mappingForRhsExpr(slotRef); --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org