This is an automated email from the ASF dual-hosted git repository. gabriellee pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 3eac53f75d [enhancement](Nereids) optimize bloom filter size reducing strategy (#18596) 3eac53f75d is described below commit 3eac53f75d5f3eb05e958403efeb7578ad86e438 Author: morrySnow <101034200+morrys...@users.noreply.github.com> AuthorDate: Mon Apr 17 10:50:08 2023 +0800 [enhancement](Nereids) optimize bloom filter size reducing strategy (#18596) --- .../glue/translator/RuntimeFilterTranslator.java | 12 +---------- .../processor/post/RuntimeFilterGenerator.java | 25 +++++++++++++++++++--- .../apache/doris/nereids/stats/StatsMathUtil.java | 22 +++++++++++++++++++ .../trees/plans/physical/RuntimeFilter.java | 22 ++++++++++++------- .../java/org/apache/doris/planner/PlanNode.java | 8 +++++++ .../org/apache/doris/planner/RuntimeFilter.java | 18 ++++++++++------ .../doris/planner/RuntimeFilterGenerator.java | 11 ---------- 7 files changed, 78 insertions(+), 40 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/RuntimeFilterTranslator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/RuntimeFilterTranslator.java index 6138596620..8816c9856b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/RuntimeFilterTranslator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/RuntimeFilterTranslator.java @@ -34,7 +34,6 @@ import org.apache.doris.planner.HashJoinNode; import org.apache.doris.planner.HashJoinNode.DistributionMode; import org.apache.doris.planner.JoinNodeBase; import org.apache.doris.planner.RuntimeFilter.RuntimeFilterTarget; -import org.apache.doris.planner.RuntimeFilterGenerator.FilterSizeLimits; import org.apache.doris.planner.ScanNode; import org.apache.doris.qe.ConnectContext; import org.apache.doris.thrift.TRuntimeFilterType; @@ -126,20 +125,11 @@ public class RuntimeFilterTranslator { if (!src.getType().equals(target.getType()) && filter.getType() != TRuntimeFilterType.BITMAP) { targetExpr = new CastExpr(src.getType(), targetExpr); } - FilterSizeLimits filterSizeLimits = context.getLimits(); - if (node instanceof HashJoinNode - && !(((HashJoinNode) node).getDistributionMode() == DistributionMode.BROADCAST) - && ConnectContext.get() != null - && ConnectContext.get().getSessionVariable().enablePipelineEngine() - && ConnectContext.get().getSessionVariable().getParallelExecInstanceNum() > 0) { - filterSizeLimits = filterSizeLimits.adjustForParallel( - ConnectContext.get().getSessionVariable().getParallelExecInstanceNum()); - } org.apache.doris.planner.RuntimeFilter origFilter = org.apache.doris.planner.RuntimeFilter.fromNereidsRuntimeFilter( filter.getId(), node, src, filter.getExprOrder(), targetExpr, ImmutableMap.of(targetTupleId, ImmutableList.of(targetSlotId)), - filter.getType(), filterSizeLimits); + filter.getType(), context.getLimits(), filter.getBuildSideNdv()); if (node instanceof HashJoinNode) { origFilter.setIsBroadcast(((HashJoinNode) node).getDistributionMode() == DistributionMode.BROADCAST); } else { diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/post/RuntimeFilterGenerator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/post/RuntimeFilterGenerator.java index ba239a2e0d..77d3c92046 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/post/RuntimeFilterGenerator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/post/RuntimeFilterGenerator.java @@ -20,6 +20,7 @@ package org.apache.doris.nereids.processor.post; import org.apache.doris.common.IdGenerator; import org.apache.doris.common.Pair; import org.apache.doris.nereids.CascadesContext; +import org.apache.doris.nereids.stats.StatsMathUtil; import org.apache.doris.nereids.trees.expressions.Alias; import org.apache.doris.nereids.trees.expressions.EqualTo; import org.apache.doris.nereids.trees.expressions.Expression; @@ -27,6 +28,7 @@ import org.apache.doris.nereids.trees.expressions.NamedExpression; import org.apache.doris.nereids.trees.expressions.Not; import org.apache.doris.nereids.trees.expressions.Slot; import org.apache.doris.nereids.trees.expressions.functions.scalar.BitmapContains; +import org.apache.doris.nereids.trees.plans.AbstractPlan; import org.apache.doris.nereids.trees.plans.JoinType; import org.apache.doris.nereids.trees.plans.ObjectId; import org.apache.doris.nereids.trees.plans.Plan; @@ -46,6 +48,7 @@ import com.google.common.collect.ImmutableSet; import java.util.Arrays; import java.util.List; import java.util.Map; +import java.util.Objects; import java.util.Set; import java.util.stream.Collectors; @@ -111,8 +114,24 @@ public class RuntimeFilterGenerator extends PlanPostProcessor { continue; } Slot olapScanSlot = aliasTransferMap.get(unwrappedSlot).second; + long buildSideNdv = -1L; + AbstractPlan right = (AbstractPlan) join.right(); + if (right.getStats() != null) { + List<Double> ndvs = join.getHashJoinConjuncts().stream() + .map(Expression::getInputSlots) + .flatMap(Set::stream) + .filter(s -> right.getOutputExprIdSet().contains(s.getExprId())) + .map(s -> right.getStats().columnStatistics().get(s)) + .filter(Objects::nonNull) + .map(cs -> cs.ndv) + .collect(Collectors.toList()); + buildSideNdv = (long) StatsMathUtil.jointNdv(ndvs); + if (buildSideNdv <= 0 || buildSideNdv > right.getStats().getRowCount()) { + buildSideNdv = (long) right.getStats().getRowCount(); + } + } RuntimeFilter filter = new RuntimeFilter(generator.getNextId(), - equalTo.right(), olapScanSlot, type, i, join); + equalTo.right(), olapScanSlot, type, i, join, buildSideNdv); ctx.addJoinToTargetMap(join, olapScanSlot.getExprId()); ctx.setTargetExprIdToFilter(olapScanSlot.getExprId(), filter); ctx.setTargetsOnScanNode(aliasTransferMap.get(unwrappedSlot).first, olapScanSlot); @@ -150,7 +169,7 @@ public class RuntimeFilterGenerator extends PlanPostProcessor { for (int i = 0; i < bitmapRFCount; i++) { Expression bitmapRuntimeFilterCondition = bitmapRuntimeFilterConditions.get(i); boolean isNot = bitmapRuntimeFilterCondition instanceof Not; - BitmapContains bitmapContains = null; + BitmapContains bitmapContains; if (bitmapRuntimeFilterCondition instanceof Not) { bitmapContains = (BitmapContains) bitmapRuntimeFilterCondition.child(0); } else { @@ -163,7 +182,7 @@ public class RuntimeFilterGenerator extends PlanPostProcessor { Slot olapScanSlot = aliasTransferMap.get(targetSlot).second; RuntimeFilter filter = new RuntimeFilter(generator.getNextId(), bitmapContains.child(0), olapScanSlot, - bitmapContains.child(1), type, i, join, isNot); + bitmapContains.child(1), type, i, join, isNot, -1L); ctx.addJoinToTargetMap(join, olapScanSlot.getExprId()); ctx.setTargetExprIdToFilter(olapScanSlot.getExprId(), filter); ctx.setTargetsOnScanNode(aliasTransferMap.get(targetSlot).first, olapScanSlot); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsMathUtil.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsMathUtil.java index d70893ab96..24405c2c43 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsMathUtil.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsMathUtil.java @@ -17,6 +17,11 @@ package org.apache.doris.nereids.stats; +import org.apache.commons.collections.CollectionUtils; + +import java.util.Collections; +import java.util.List; + /** * Math util for statistics derivation */ @@ -59,4 +64,21 @@ public class StatsMathUtil { return a / nonZeroDivisor(b); } + /** + * compute the multi columns unite ndv + */ + public static double jointNdv(List<Double> ndvs) { + if (CollectionUtils.isEmpty(ndvs)) { + return -1; + } + if (ndvs.stream().anyMatch(n -> n <= 0)) { + return -1; + } + ndvs.sort(Collections.reverseOrder()); + double multiNdv = 1; + for (int i = 0; i < ndvs.size(); i++) { + multiNdv = multiNdv * Math.pow(ndvs.get(i), 1 / Math.pow(2, i)); + } + return multiNdv; + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/RuntimeFilter.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/RuntimeFilter.java index 239b9d6737..12a545a22f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/RuntimeFilter.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/RuntimeFilter.java @@ -32,27 +32,29 @@ public class RuntimeFilter { private final Expression srcSlot; //bitmap filter support target expression like k1+1, abs(k1) //targetExpression is an expression on targetSlot, in which there is only one non-const slot - private Expression targetExpression; - private Slot targetSlot; + private final Expression targetExpression; + private final Slot targetSlot; private final int exprOrder; - private AbstractPhysicalJoin builderNode; + private final AbstractPhysicalJoin builderNode; - private boolean bitmapFilterNotIn; + private final boolean bitmapFilterNotIn; + + private final long buildSideNdv; /** * constructor */ public RuntimeFilter(RuntimeFilterId id, Expression src, Slot target, TRuntimeFilterType type, - int exprOrder, AbstractPhysicalJoin builderNode) { - this(id, src, target, target, type, exprOrder, builderNode, false); + int exprOrder, AbstractPhysicalJoin builderNode, long buildSideNdv) { + this(id, src, target, target, type, exprOrder, builderNode, false, buildSideNdv); } /** * constructor */ public RuntimeFilter(RuntimeFilterId id, Expression src, Slot target, Expression targetExpression, - TRuntimeFilterType type, - int exprOrder, AbstractPhysicalJoin builderNode, boolean bitmapFilterNotIn) { + TRuntimeFilterType type, int exprOrder, AbstractPhysicalJoin builderNode, boolean bitmapFilterNotIn, + long buildSideNdv) { this.id = id; this.srcSlot = src; this.targetSlot = target; @@ -61,6 +63,7 @@ public class RuntimeFilter { this.exprOrder = exprOrder; this.builderNode = builderNode; this.bitmapFilterNotIn = bitmapFilterNotIn; + this.buildSideNdv = buildSideNdv <= 0 ? -1L : buildSideNdv; } public Expression getSrcExpr() { @@ -95,4 +98,7 @@ public class RuntimeFilter { return targetExpression; } + public long getBuildSideNdv() { + return buildSideNdv; + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/PlanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/PlanNode.java index 4e3412b760..7ab8963de3 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/PlanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/PlanNode.java @@ -317,6 +317,14 @@ public abstract class PlanNode extends TreeNode<PlanNode> implements PlanStats { return cardinality; } + public long getCardinalityAfterFilter() { + if (cardinalityAfterFilter < 0) { + return cardinality; + } else { + return cardinalityAfterFilter; + } + } + public int getNumNodes() { return numNodes; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/RuntimeFilter.java b/fe/fe-core/src/main/java/org/apache/doris/planner/RuntimeFilter.java index 58d0086ed6..2a1f13b9c1 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/RuntimeFilter.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/RuntimeFilter.java @@ -134,8 +134,8 @@ public final class RuntimeFilter { } private RuntimeFilter(RuntimeFilterId filterId, PlanNode filterSrcNode, Expr srcExpr, int exprOrder, - Expr origTargetExpr, Map<TupleId, List<SlotId>> targetSlots, - TRuntimeFilterType type, RuntimeFilterGenerator.FilterSizeLimits filterSizeLimits) { + Expr origTargetExpr, Map<TupleId, List<SlotId>> targetSlots, TRuntimeFilterType type, + RuntimeFilterGenerator.FilterSizeLimits filterSizeLimits, long buildSizeNdv) { this.id = filterId; this.builderNode = filterSrcNode; this.srcExpr = srcExpr; @@ -143,6 +143,7 @@ public final class RuntimeFilter { this.origTargetExpr = origTargetExpr; this.targetSlotsByTid = targetSlots; this.runtimeFilterType = type; + this.ndvEstimate = buildSizeNdv; computeNdvEstimate(); calculateFilterSize(filterSizeLimits); } @@ -150,8 +151,9 @@ public final class RuntimeFilter { // only for nereids planner public static RuntimeFilter fromNereidsRuntimeFilter(RuntimeFilterId id, JoinNodeBase node, Expr srcExpr, int exprOrder, Expr origTargetExpr, Map<TupleId, List<SlotId>> targetSlots, - TRuntimeFilterType type, RuntimeFilterGenerator.FilterSizeLimits filterSizeLimits) { - return new RuntimeFilter(id, node, srcExpr, exprOrder, origTargetExpr, targetSlots, type, filterSizeLimits); + TRuntimeFilterType type, RuntimeFilterGenerator.FilterSizeLimits filterSizeLimits, long buildSizeNdv) { + return new RuntimeFilter(id, node, srcExpr, exprOrder, origTargetExpr, + targetSlots, type, filterSizeLimits, buildSizeNdv); } @Override @@ -306,7 +308,7 @@ public final class RuntimeFilter { } return new RuntimeFilter(idGen.getNextId(), filterSrcNode, srcExpr, exprOrder, - targetExpr, targetSlots, type, filterSizeLimits); + targetExpr, targetSlots, type, filterSizeLimits, -1L); } public static RuntimeFilter create(IdGenerator<RuntimeFilterId> idGen, Analyzer analyzer, Expr joinPredicate, @@ -343,7 +345,7 @@ public final class RuntimeFilter { RuntimeFilter runtimeFilter = new RuntimeFilter(idGen.getNextId(), filterSrcNode, srcExpr, exprOrder, targetExpr, targetSlots, - type, filterSizeLimits); + type, filterSizeLimits, -1L); runtimeFilter.setBitmapFilterNotIn(((BitmapFilterPredicate) joinPredicate).isNotIn()); return runtimeFilter; } @@ -515,7 +517,9 @@ public final class RuntimeFilter { } public void computeNdvEstimate() { - ndvEstimate = builderNode.getChild(1).getCardinality(); + if (ndvEstimate < 0) { + ndvEstimate = builderNode.getChild(1).getCardinalityAfterFilter(); + } } public void extractTargetsPosition() { diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/RuntimeFilterGenerator.java b/fe/fe-core/src/main/java/org/apache/doris/planner/RuntimeFilterGenerator.java index 2d4e57702c..a88f5d73bf 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/RuntimeFilterGenerator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/RuntimeFilterGenerator.java @@ -117,17 +117,6 @@ public final class RuntimeFilterGenerator { defaultValue = Math.max(defaultValue, minVal); defaultVal = BitUtil.roundUpToPowerOf2(Math.min(defaultValue, maxVal)); } - - private FilterSizeLimits(long maxVal, long minVal, long defaultVal) { - this.maxVal = BitUtil.roundUpToPowerOf2(maxVal); - this.minVal = BitUtil.roundUpToPowerOf2(minVal); - defaultVal = Math.max(defaultVal, this.minVal); - this.defaultVal = BitUtil.roundUpToPowerOf2(Math.min(defaultVal, this.maxVal)); - } - - public FilterSizeLimits adjustForParallel(int parallel) { - return new FilterSizeLimits(maxVal / parallel, minVal / parallel, defaultVal / parallel); - } } // Contains size limits for bloom filters. --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org