morrySnow commented on code in PR #16927: URL: https://github.com/apache/doris/pull/16927#discussion_r1117017858
########## fe/fe-core/src/main/java/org/apache/doris/nereids/processor/post/RuntimeFilterGenerator.java: ########## @@ -94,6 +97,9 @@ public PhysicalPlan visitPhysicalHashJoin(PhysicalHashJoin<? extends Plan, ? ext EqualTo equalTo = ((EqualTo) JoinUtils.swapEqualToForChildrenOrder( (EqualTo) join.getHashJoinConjuncts().get(i), join.left().getOutputSet())); for (TRuntimeFilterType type : legalTypes) { + if (type == TRuntimeFilterType.BITMAP) { + continue; + } Review Comment: add a comment to explain why continue ########## fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/logical/InApplyToJoin.java: ########## @@ -45,22 +52,52 @@ public class InApplyToJoin extends OneRewriteRuleFactory { @Override public Rule build() { return logicalApply().when(LogicalApply::isIn).then(apply -> { - Expression predicate; + if (needBitmapUnoin(apply)) { + /* + select t1.k1 from bigtable t1 where t1.k1 in (select t2.k2 from bitmap_table t2); + => + select t1.k1 from bigtable t1 where t1.k1 in (select bitmap_union(k2) from bitmap_table t2); + => + select t1.k1 from bigtable t1 left semi join (select bitmap_union(k2) x from bitmap_table ) t2 + on bitmap_contains(x, t1.k1); + */ + List<Expression> groupExpressions = Lists.newArrayList(); + Expression bitmapCol = apply.right().getOutput().get(0); + BitmapUnion union = new BitmapUnion(bitmapCol); + Alias alias = new Alias(union, union.toSql()); + List<NamedExpression> outputExpressions = Lists.newArrayList(alias); + + LogicalAggregate agg = new LogicalAggregate(groupExpressions, outputExpressions, apply.right()); + Expression compareExpr = ((InSubquery) apply.getSubqueryExpr()).getCompareExpr(); + if (!compareExpr.getDataType().isBigIntType()) { + //this rule is after type coercion, we need to add cast by hand + compareExpr = new Cast(compareExpr, BigIntType.INSTANCE); + } + Expression expr = new BitmapContains(agg.getOutput().get(0), compareExpr); + if (((InSubquery) apply.getSubqueryExpr()).isNot()) { + expr = new Not(expr); + } + return new LogicalJoin<>(JoinType.LEFT_SEMI_JOIN, Lists.newArrayList(), + Lists.newArrayList(expr), + JoinHint.NONE, + apply.left(), agg); + } + + //in-predicate to equal + Expression predicate = new EqualTo(((InSubquery) apply.getSubqueryExpr()).getCompareExpr(), + apply.right().getOutput().get(0)); Review Comment: why move else block outside? ########## fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalFilter.java: ########## @@ -150,4 +150,9 @@ public boolean isSingleTableExpressionExtracted() { return singleTableExpressionExtracted; } + public Plan withConjuncts(Set<Expression> conjuncts) { + return new LogicalFilter(conjuncts, this.groupExpression, Review Comment: add comment to explain why reserve groupExpression ########## fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/join/JoinCommute.java: ########## @@ -93,4 +99,21 @@ private static boolean containJoin(GroupPlan groupPlan) { List<Slot> output = groupPlan.getOutput(); return !output.stream().map(Slot::getQualifier).allMatch(output.get(0).getQualifier()::equals); } + + private boolean disableCommunicateForBitmapRuntimeFilter(LogicalJoin<GroupPlan, GroupPlan> join) { Review Comment: double negation is hard to understand. change function name to `couldCommunicateForBitmapRuntimeFilter` ########## fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/logical/InApplyToJoin.java: ########## @@ -45,22 +52,52 @@ public class InApplyToJoin extends OneRewriteRuleFactory { @Override public Rule build() { return logicalApply().when(LogicalApply::isIn).then(apply -> { - Expression predicate; + if (needBitmapUnoin(apply)) { + /* + select t1.k1 from bigtable t1 where t1.k1 in (select t2.k2 from bitmap_table t2); + => + select t1.k1 from bigtable t1 where t1.k1 in (select bitmap_union(k2) from bitmap_table t2); + => + select t1.k1 from bigtable t1 left semi join (select bitmap_union(k2) x from bitmap_table ) t2 + on bitmap_contains(x, t1.k1); + */ + List<Expression> groupExpressions = Lists.newArrayList(); Review Comment: ```suggestion List<Expression> groupExpressions = ImmutableList.of(); ``` ########## fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/logical/InApplyToJoin.java: ########## @@ -77,4 +114,22 @@ public Rule build() { } }).toRule(RuleType.IN_APPLY_TO_JOIN); } + + private boolean needBitmapUnoin(LogicalApply<GroupPlan, GroupPlan> apply) { + return apply.right().getOutput().get(0).getDataType().isBitmapType() + && !((InSubquery) apply.getSubqueryExpr()).getCompareExpr().getDataType().isBitmapType(); + } Review Comment: so, we generate bitmap union for both in and not in? ########## fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/logical/InApplyToJoin.java: ########## @@ -45,22 +52,52 @@ public class InApplyToJoin extends OneRewriteRuleFactory { @Override public Rule build() { return logicalApply().when(LogicalApply::isIn).then(apply -> { - Expression predicate; + if (needBitmapUnoin(apply)) { + /* + select t1.k1 from bigtable t1 where t1.k1 in (select t2.k2 from bitmap_table t2); + => + select t1.k1 from bigtable t1 where t1.k1 in (select bitmap_union(k2) from bitmap_table t2); + => + select t1.k1 from bigtable t1 left semi join (select bitmap_union(k2) x from bitmap_table ) t2 + on bitmap_contains(x, t1.k1); + */ + List<Expression> groupExpressions = Lists.newArrayList(); + Expression bitmapCol = apply.right().getOutput().get(0); + BitmapUnion union = new BitmapUnion(bitmapCol); + Alias alias = new Alias(union, union.toSql()); + List<NamedExpression> outputExpressions = Lists.newArrayList(alias); Review Comment: ```suggestion List<NamedExpression> outputExpressions = ImmutableList.of(alias); ``` ########## fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalNestedLoopJoin.java: ########## @@ -42,6 +44,13 @@ RIGHT_CHILD_TYPE extends Plan> extends AbstractPhysicalJoin<LEFT_CHILD_TYPE, RIGHT_CHILD_TYPE> { + /* + bitmap_contains(...) or Not(bitmap_contains(...)) can be used as bitmap runtime filter condition + bitmapRF is different from other RF in that scan node must wait for it. + if a condition is used in rf, it can be removed from join conditions. we collect these conditions here. + */ + private Set<Expression> bitMapRuntimeFilterConditions = Sets.newHashSet(); Review Comment: final ########## fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/RuntimeFilter.java: ########## @@ -30,21 +30,37 @@ public class RuntimeFilter { private final RuntimeFilterId id; private final TRuntimeFilterType type; private final Expression srcSlot; + //bitmap filter support target expression like k1+1, abs(k1) + //targetExpression is an expression on targetSlot, in which there is only one non-const slot + private Expression targetExpression; private Slot targetSlot; private final int exprOrder; - private PhysicalHashJoin builderNode; + private AbstractPhysicalJoin builderNode; + + private boolean bitmapFilterNotIn; Review Comment: the attr name could be more generation such as `reverse` or `anti` ########## fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/logical/InApplyToJoin.java: ########## @@ -45,22 +52,52 @@ public class InApplyToJoin extends OneRewriteRuleFactory { @Override public Rule build() { return logicalApply().when(LogicalApply::isIn).then(apply -> { - Expression predicate; + if (needBitmapUnoin(apply)) { + /* + select t1.k1 from bigtable t1 where t1.k1 in (select t2.k2 from bitmap_table t2); + => + select t1.k1 from bigtable t1 where t1.k1 in (select bitmap_union(k2) from bitmap_table t2); + => + select t1.k1 from bigtable t1 left semi join (select bitmap_union(k2) x from bitmap_table ) t2 + on bitmap_contains(x, t1.k1); Review Comment: add not in case ########## fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java: ########## @@ -1170,15 +1184,17 @@ public PlanFragment visitPhysicalNestedLoopJoin( nestedLoopJoinNode.setvIntermediateTupleDescList(Lists.newArrayList(intermediateDescriptor)); rightFragment.getPlanRoot().setCompactData(false); - if (needNewRootFragment) { - connectChildFragment(nestedLoopJoinNode, 0, joinFragment, leftFragment, context); - } else { - nestedLoopJoinNode.setChild(0, leftFragment.getPlanRoot()); - joinFragment.setPlanRoot(nestedLoopJoinNode); - } + connectChildFragment(nestedLoopJoinNode, 1, joinFragment, rightFragment, context); List<Expr> joinConjuncts = nestedLoopJoin.getOtherJoinConjuncts().stream() + .filter(e -> !nestedLoopJoin.isBitmapRuntimeFilterCondition(e)) Review Comment: why add this, as we discussion, we do not need to remove any conjunct? ########## regression-test/suites/query_p0/join/test_bitmap_filter_nereids.groovy: ########## @@ -0,0 +1,95 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_bitmap_filter_nereids") { + def tbl1 = "test_query_db.bigtable" + def tbl2 = "bitmap_table_nereids" + def tbl3 = "test_query_db.baseall" + + sql "set runtime_filter_type = 16" + + sql "DROP TABLE IF EXISTS ${tbl2}" + sql """ + CREATE TABLE ${tbl2} ( + `k1` int(11) NULL, + `k2` bitmap BITMAP_UNION NULL, + `k3` bitmap BITMAP_UNION NULL + ) ENGINE=OLAP + AGGREGATE KEY(`k1`) + COMMENT 'OLAP' + DISTRIBUTED BY HASH(`k1`) BUCKETS 2 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1" + ); + """ + sql """ + insert into ${tbl2} values + (1, bitmap_from_string('1, 3, 5, 7, 9, 11, 13, 99, 19910811, 20150402'), + bitmap_from_string('32767, 1985, 255, 789, 1991')), + (2, bitmap_from_string('10, 11, 12, 13, 14'), bitmap_empty());""" + + sql "set enable_nereids_planner=true;" + sql "set enable_fallback_to_original_planner=false;" + + qt_sql1 "select k1, k2 from ${tbl1} where k1 in (select k2 from ${tbl2}) order by k1;" Review Comment: add correlation subquery cases ########## fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/logical/InApplyToJoin.java: ########## @@ -45,22 +52,52 @@ public class InApplyToJoin extends OneRewriteRuleFactory { @Override public Rule build() { return logicalApply().when(LogicalApply::isIn).then(apply -> { - Expression predicate; + if (needBitmapUnoin(apply)) { + /* + select t1.k1 from bigtable t1 where t1.k1 in (select t2.k2 from bitmap_table t2); + => + select t1.k1 from bigtable t1 where t1.k1 in (select bitmap_union(k2) from bitmap_table t2); + => + select t1.k1 from bigtable t1 left semi join (select bitmap_union(k2) x from bitmap_table ) t2 + on bitmap_contains(x, t1.k1); + */ + List<Expression> groupExpressions = Lists.newArrayList(); + Expression bitmapCol = apply.right().getOutput().get(0); + BitmapUnion union = new BitmapUnion(bitmapCol); + Alias alias = new Alias(union, union.toSql()); + List<NamedExpression> outputExpressions = Lists.newArrayList(alias); + + LogicalAggregate agg = new LogicalAggregate(groupExpressions, outputExpressions, apply.right()); + Expression compareExpr = ((InSubquery) apply.getSubqueryExpr()).getCompareExpr(); + if (!compareExpr.getDataType().isBigIntType()) { + //this rule is after type coercion, we need to add cast by hand + compareExpr = new Cast(compareExpr, BigIntType.INSTANCE); + } + Expression expr = new BitmapContains(agg.getOutput().get(0), compareExpr); + if (((InSubquery) apply.getSubqueryExpr()).isNot()) { + expr = new Not(expr); + } + return new LogicalJoin<>(JoinType.LEFT_SEMI_JOIN, Lists.newArrayList(), + Lists.newArrayList(expr), + JoinHint.NONE, + apply.left(), agg); + } + + //in-predicate to equal + Expression predicate = new EqualTo(((InSubquery) apply.getSubqueryExpr()).getCompareExpr(), + apply.right().getOutput().get(0)); if (apply.isCorrelated()) { predicate = ExpressionUtils.and( new EqualTo(((InSubquery) apply.getSubqueryExpr()).getCompareExpr(), apply.right().getOutput().get(0)), apply.getCorrelationFilter().get()); - } else { - predicate = new EqualTo(((InSubquery) apply.getSubqueryExpr()).getCompareExpr(), - apply.right().getOutput().get(0)); } - - //TODO nereids should support bitmap runtime filter in future List<Expression> conjuncts = ExpressionUtils.extractConjunction(predicate); - if (conjuncts.stream().anyMatch(expression -> expression.children().stream() - .anyMatch(expr -> expr.getDataType() == BitmapType.INSTANCE))) { - throw new AnalysisException("nereids don't support bitmap runtime filter"); + if (predicate instanceof BitmapContains) { Review Comment: where generate BitmapContains? i think we handle all case in if(needBitmapUnion), so this code block could be removed? ########## fe/fe-core/src/main/java/org/apache/doris/nereids/util/JoinUtils.java: ########## @@ -136,6 +139,44 @@ public static Pair<List<Expression>, List<Expression>> extractExpressionForHashT ); } + public static boolean hasBitmapSlot(Expression expr) { + return expr.getInputSlots().stream() + .anyMatch(slot -> slot.getDataType() == BitmapType.INSTANCE); + } Review Comment: not use any more ########## fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java: ########## @@ -101,6 +101,7 @@ public enum RuleType { INFER_FILTER_NOT_NULL(RuleTypeClass.REWRITE), INFER_JOIN_NOT_NULL(RuleTypeClass.REWRITE), // subquery analyze + ANALYZE_FILTER_SUBQUERY_REWRITE_BITMAP_IN(RuleTypeClass.REWRITE), Review Comment: not use type ########## fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/logical/InApplyToJoin.java: ########## @@ -77,4 +114,22 @@ public Rule build() { } }).toRule(RuleType.IN_APPLY_TO_JOIN); } + + private boolean needBitmapUnoin(LogicalApply<GroupPlan, GroupPlan> apply) { + return apply.right().getOutput().get(0).getDataType().isBitmapType() + && !((InSubquery) apply.getSubqueryExpr()).getCompareExpr().getDataType().isBitmapType(); + } + + private Expression convertInPredicateToJoinConjunct(LogicalApply<GroupPlan, GroupPlan> apply) { Review Comment: this function is not use anymore ########## fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalNestedLoopJoin.java: ########## @@ -137,4 +146,16 @@ public PhysicalNestedLoopJoin<LEFT_CHILD_TYPE, RIGHT_CHILD_TYPE> withPhysicalPro hashJoinConjuncts, otherJoinConjuncts, Optional.empty(), getLogicalProperties(), physicalProperties, statsDeriveResult, left(), right()); } + + public void addBitmapRuntimeFilterCondition(Expression expr) { + bitMapRuntimeFilterConditions.add(expr); + } + + public boolean isBitmapRuntimeFilterCondition(Expression expr) { + return bitMapRuntimeFilterConditions.contains(expr); + } + + public boolean isBitMapRuntimeFilterConditionsEmpty() { + return bitMapRuntimeFilterConditions.isEmpty(); + } Review Comment: bitmap subquery filter could only convert to nested loop join? if subquery is correlation, should it be hash join? ########## fe/fe-core/src/main/java/org/apache/doris/nereids/util/JoinUtils.java: ########## @@ -136,6 +139,44 @@ public static Pair<List<Expression>, List<Expression>> extractExpressionForHashT ); } + public static boolean hasBitmapSlot(Expression expr) { + return expr.getInputSlots().stream() + .anyMatch(slot -> slot.getDataType() == BitmapType.INSTANCE); + } + + /** + * This is used for bitmap runtime filter only. + * Extract bitmap_contains conjunct: + * like: bitmap_contains(a, b) and ..., Not(bitmap_contains(a, b)) and ..., + * where `a` and `b` are from right child and left child, respectively. + * + * @return condition for bitmap runtime filter: bitmap_contains + */ + public static List<Expression> extractBitmapRuntimeFilterConditions(List<Slot> leftSlots, + List<Slot> rightSlots, List<Expression> onConditions) { + List<Expression> result = Lists.newArrayList(); + for (Expression expr : onConditions) { + BitmapContains bitmapContains = null; + if (expr instanceof Not) { + List<Expression> notChildren = ExpressionUtils.extractConjunction(expr.child(0)); + if (notChildren.size() == 1 && notChildren.get(0) instanceof BitmapContains) { + bitmapContains = (BitmapContains) notChildren.get(0); + } + } else if (expr instanceof BitmapContains) { + bitmapContains = (BitmapContains) expr; + } + if (bitmapContains == null) { + continue; + } + //first child in right, second child in left + if (leftSlots.containsAll(bitmapContains.child(1).collect(Slot.class::isInstance)) + && rightSlots.containsAll(bitmapContains.child(0).collect(Slot.class::isInstance))) { + result.add(expr); + } Review Comment: add a `left contains child(0) && right contains child(1)` branch is more robustness -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org