morrySnow commented on code in PR #19288: URL: https://github.com/apache/doris/pull/19288#discussion_r1188234618
########## fe/fe-core/src/main/java/org/apache/doris/nereids/util/PlanTypeUtils.java: ########## @@ -0,0 +1,151 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.util; + +import org.apache.doris.nereids.trees.plans.logical.LogicalAggregate; +import org.apache.doris.nereids.trees.plans.logical.LogicalFilter; +import org.apache.doris.nereids.trees.plans.logical.LogicalHaving; +import org.apache.doris.nereids.trees.plans.logical.LogicalJoin; +import org.apache.doris.nereids.trees.plans.logical.LogicalLimit; +import org.apache.doris.nereids.trees.plans.logical.LogicalPlan; +import org.apache.doris.nereids.trees.plans.logical.LogicalProject; +import org.apache.doris.nereids.trees.plans.logical.LogicalRelation; +import org.apache.doris.nereids.trees.plans.logical.LogicalSetOperation; +import org.apache.doris.nereids.trees.plans.logical.LogicalSort; +import org.apache.doris.nereids.trees.plans.logical.LogicalSubQueryAlias; +import org.apache.doris.nereids.trees.plans.logical.LogicalWindow; + +import com.google.common.collect.ImmutableSet; +import com.google.common.collect.Lists; + +import java.util.List; +import java.util.Set; + +/** + * Judgment for plan tree pattern, such as spj plan, etc. + */ +public class PlanTypeUtils { + + private static final Set<Class<? extends LogicalPlan>> SPJ_PLAN = ImmutableSet.of( + LogicalRelation.class, + LogicalJoin.class, + LogicalFilter.class, + LogicalProject.class, + LogicalSubQueryAlias.class // FIXME Review Comment: add more comments to explain fix what ########## fe/fe-core/src/main/java/org/apache/doris/nereids/util/PlanTypeUtils.java: ########## @@ -0,0 +1,151 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.util; + +import org.apache.doris.nereids.trees.plans.logical.LogicalAggregate; +import org.apache.doris.nereids.trees.plans.logical.LogicalFilter; +import org.apache.doris.nereids.trees.plans.logical.LogicalHaving; +import org.apache.doris.nereids.trees.plans.logical.LogicalJoin; +import org.apache.doris.nereids.trees.plans.logical.LogicalLimit; +import org.apache.doris.nereids.trees.plans.logical.LogicalPlan; +import org.apache.doris.nereids.trees.plans.logical.LogicalProject; +import org.apache.doris.nereids.trees.plans.logical.LogicalRelation; +import org.apache.doris.nereids.trees.plans.logical.LogicalSetOperation; +import org.apache.doris.nereids.trees.plans.logical.LogicalSort; +import org.apache.doris.nereids.trees.plans.logical.LogicalSubQueryAlias; +import org.apache.doris.nereids.trees.plans.logical.LogicalWindow; + +import com.google.common.collect.ImmutableSet; +import com.google.common.collect.Lists; + +import java.util.List; +import java.util.Set; + +/** + * Judgment for plan tree pattern, such as spj plan, etc. + */ +public class PlanTypeUtils { + + private static final Set<Class<? extends LogicalPlan>> SPJ_PLAN = ImmutableSet.of( + LogicalRelation.class, + LogicalJoin.class, + LogicalFilter.class, + LogicalProject.class, + LogicalSubQueryAlias.class // FIXME + ); + + private static final Set<Class<? extends LogicalPlan>> SUPPORTED_PLAN = ImmutableSet.of( + // TODO: Set related ops + LogicalRelation.class, + LogicalJoin.class, + LogicalFilter.class, + LogicalProject.class, + LogicalWindow.class, + LogicalAggregate.class, + LogicalHaving.class, + LogicalSort.class, + LogicalLimit.class, + LogicalSubQueryAlias.class + ); + + public static boolean isSpj(LogicalPlan rootPlan) { + List<LogicalPlan> plans = Lists.newArrayList(); + plans.addAll(rootPlan.collect(LogicalPlan.class::isInstance)); + return isSpj(plans); + } + + public static boolean isSpj(List<LogicalPlan> plans) { + return plans.stream().anyMatch(p -> SPJ_PLAN.stream().anyMatch(c -> c.isInstance(p))); Review Comment: i think should be allMatch? ```suggestion return plans.stream().allMatch(p -> SPJ_PLAN.stream().anyMatch(c -> c.isInstance(p))); ``` ########## fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/SubqueryExpr.java: ########## @@ -19,49 +19,95 @@ import org.apache.doris.nereids.exceptions.UnboundException; import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor; +import org.apache.doris.nereids.trees.plans.logical.LogicalFilter; import org.apache.doris.nereids.trees.plans.logical.LogicalPlan; import org.apache.doris.nereids.types.DataType; +import org.apache.doris.nereids.util.PlanTypeUtils; import org.apache.doris.nereids.util.Utils; import com.google.common.collect.ImmutableList; +import java.util.ArrayList; +import java.util.HashSet; import java.util.List; import java.util.Objects; import java.util.Optional; +import java.util.Set; /** * Subquery Expression. */ public abstract class SubqueryExpr extends Expression { + protected final LogicalPlan queryPlan; protected final List<Slot> correlateSlots; - protected final Optional<Expression> typeCoercionExpr; + protected final Optional<Expression> subQueryCombinedHavingExpr; + protected final boolean isSpj; public SubqueryExpr(LogicalPlan subquery) { this.queryPlan = Objects.requireNonNull(subquery, "subquery can not be null"); this.correlateSlots = ImmutableList.of(); this.typeCoercionExpr = Optional.empty(); + this.subQueryCombinedHavingExpr = Optional.empty(); + this.isSpj = PlanTypeUtils.isSpj(queryPlan); } public SubqueryExpr(LogicalPlan subquery, List<Slot> correlateSlots, Optional<Expression> typeCoercionExpr) { this.queryPlan = Objects.requireNonNull(subquery, "subquery can not be null"); this.correlateSlots = ImmutableList.copyOf(correlateSlots); this.typeCoercionExpr = typeCoercionExpr; + this.subQueryCombinedHavingExpr = Optional.empty(); + this.isSpj = PlanTypeUtils.isSpj(queryPlan); + } + + public SubqueryExpr(LogicalPlan subquery, List<Slot> correlateSlots, Optional<Expression> typeCoercionExpr, + Optional<Expression> subQueryCombinedHavingExpr) { + this.queryPlan = Objects.requireNonNull(subquery, "subquery can not be null"); + this.correlateSlots = ImmutableList.copyOf(correlateSlots); + this.typeCoercionExpr = typeCoercionExpr; + this.subQueryCombinedHavingExpr = subQueryCombinedHavingExpr; + this.isSpj = PlanTypeUtils.isSpj(queryPlan); } public List<Slot> getCorrelateSlots() { return correlateSlots; } + public List<Expression> getCorrelateExpressions() { + List<Expression> expressionList = new ArrayList<>(); + expressionList.addAll(correlateSlots); + return expressionList; + } + public Optional<Expression> getTypeCoercionExpr() { return typeCoercionExpr; } + public Optional<Expression> getSubQueryCombinedHavingExpr() { + return subQueryCombinedHavingExpr; + } + public Expression getSubqueryOutput() { return typeCoercionExpr.orElseGet(() -> queryPlan.getOutput().get(0)); } + public boolean isSpj() { + return this.isSpj; + } + + /** + * Get spj related predicates. If not spj, return empty set. + */ + public Set<Expression> getSpjPredicate() { + if (!isSpj) { + return new HashSet<>(); Review Comment: Collections.emptySet(); ########## fe/fe-core/src/main/java/org/apache/doris/nereids/util/PlanTypeUtils.java: ########## @@ -0,0 +1,151 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.util; + +import org.apache.doris.nereids.trees.plans.logical.LogicalAggregate; +import org.apache.doris.nereids.trees.plans.logical.LogicalFilter; +import org.apache.doris.nereids.trees.plans.logical.LogicalHaving; +import org.apache.doris.nereids.trees.plans.logical.LogicalJoin; +import org.apache.doris.nereids.trees.plans.logical.LogicalLimit; +import org.apache.doris.nereids.trees.plans.logical.LogicalPlan; +import org.apache.doris.nereids.trees.plans.logical.LogicalProject; +import org.apache.doris.nereids.trees.plans.logical.LogicalRelation; +import org.apache.doris.nereids.trees.plans.logical.LogicalSetOperation; +import org.apache.doris.nereids.trees.plans.logical.LogicalSort; +import org.apache.doris.nereids.trees.plans.logical.LogicalSubQueryAlias; +import org.apache.doris.nereids.trees.plans.logical.LogicalWindow; + +import com.google.common.collect.ImmutableSet; +import com.google.common.collect.Lists; + +import java.util.List; +import java.util.Set; + +/** + * Judgment for plan tree pattern, such as spj plan, etc. + */ +public class PlanTypeUtils { + + private static final Set<Class<? extends LogicalPlan>> SPJ_PLAN = ImmutableSet.of( + LogicalRelation.class, + LogicalJoin.class, + LogicalFilter.class, + LogicalProject.class, + LogicalSubQueryAlias.class // FIXME + ); + + private static final Set<Class<? extends LogicalPlan>> SUPPORTED_PLAN = ImmutableSet.of( + // TODO: Set related ops + LogicalRelation.class, + LogicalJoin.class, + LogicalFilter.class, + LogicalProject.class, + LogicalWindow.class, + LogicalAggregate.class, + LogicalHaving.class, + LogicalSort.class, + LogicalLimit.class, + LogicalSubQueryAlias.class + ); + + public static boolean isSpj(LogicalPlan rootPlan) { + List<LogicalPlan> plans = Lists.newArrayList(); + plans.addAll(rootPlan.collect(LogicalPlan.class::isInstance)); + return isSpj(plans); + } + + public static boolean isSpj(List<LogicalPlan> plans) { + return plans.stream().anyMatch(p -> SPJ_PLAN.stream().anyMatch(c -> c.isInstance(p))); + } + + public static boolean isSupportedPlan(LogicalPlan rootPlan) { + List<LogicalPlan> plans = Lists.newArrayList(); + plans.addAll(rootPlan.collect(LogicalPlan.class::isInstance)); + return isSupportedPlan(plans); + } + + public static boolean isSupportedPlan(List<LogicalPlan> plans) { + return plans.stream().anyMatch(p -> SUPPORTED_PLAN.stream().anyMatch(c -> c.isInstance(p))); + } + + public static LogicalFilter getSpjFilter(LogicalPlan rootPlan) { Review Comment: if this method named `getSpjFilter`, it should determine whether SPJ in itself. Or rename this method to `getFilter` ########## fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalJoin.java: ########## @@ -121,6 +134,27 @@ private LogicalJoin( RIGHT_CHILD_TYPE rightChild) { super(PlanType.LOGICAL_JOIN, groupExpression, logicalProperties, leftChild, rightChild); this.joinType = Objects.requireNonNull(joinType, "joinType can not be null"); + this.subQueryCombinedHavingExpr = Optional.empty(); + this.hashJoinConjuncts = ImmutableList.copyOf(hashJoinConjuncts); + this.otherJoinConjuncts = ImmutableList.copyOf(otherJoinConjuncts); + this.hint = Objects.requireNonNull(hint, "hint can not be null"); + this.markJoinSlotReference = markJoinSlotReference; Review Comment: use this(...) to avoid redundant code ########## fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/SubqueryToApply.java: ########## @@ -191,9 +193,15 @@ private LogicalPlan addApply(SubqueryExpr subquery, LogicalPlan childPlan, ctx.setSubqueryExprIsAnalyzed(subquery, true); boolean needAddSubOutputToProjects = isScalarAndFilterContainsSubqueryOutput( subquery, conjunct, isProject, singleSubquery); + Optional<Expression> subQueryCombinedHavingExpr = subquery.getSubQueryCombinedHavingExpr(); + boolean needAddSubqCombinedHaving = subQueryCombinedHavingExpr.isPresent(); + Set<SlotReference> extractedSlotsInHaving = new HashSet<>(); + if (needAddSubqCombinedHaving) { + extractedSlotsInHaving = ExpressionUtils.extractSlotToSet(subQueryCombinedHavingExpr.get()); Review Comment: ```java Set<SlotReference> extractedSlotsInHaving = subquery.getSubQueryCombinedHavingExpr() .map(ExpressionUtils::extractSlotToSet) .orElse(ImmutableSet.of()); ``` ########## fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/SubqueryCombine.java: ########## @@ -0,0 +1,253 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.rules.analysis; + +import org.apache.doris.catalog.KeysType; +import org.apache.doris.nereids.jobs.JobContext; +import org.apache.doris.nereids.trees.expressions.CaseWhen; +import org.apache.doris.nereids.trees.expressions.EqualTo; +import org.apache.doris.nereids.trees.expressions.Exists; +import org.apache.doris.nereids.trees.expressions.Expression; +import org.apache.doris.nereids.trees.expressions.SubqueryExpr; +import org.apache.doris.nereids.trees.expressions.WhenClause; +import org.apache.doris.nereids.trees.expressions.functions.agg.Sum; +import org.apache.doris.nereids.trees.expressions.literal.BigIntLiteral; +import org.apache.doris.nereids.trees.expressions.literal.TinyIntLiteral; +import org.apache.doris.nereids.trees.expressions.visitor.SubqueryContainmentChecker; +import org.apache.doris.nereids.trees.plans.Plan; +import org.apache.doris.nereids.trees.plans.logical.LogicalFilter; +import org.apache.doris.nereids.trees.plans.logical.LogicalHaving; +import org.apache.doris.nereids.trees.plans.logical.LogicalOlapScan; +import org.apache.doris.nereids.trees.plans.logical.LogicalPlan; +import org.apache.doris.nereids.trees.plans.logical.LogicalRelation; +import org.apache.doris.nereids.trees.plans.visitor.CustomRewriter; +import org.apache.doris.nereids.trees.plans.visitor.DefaultPlanRewriter; +import org.apache.doris.nereids.util.ExpressionUtils; +import org.apache.doris.nereids.util.Utils; + +import com.google.common.collect.ImmutableSet; +import com.google.common.collect.Lists; + +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; + +/** + * Combine subQueries having containment relationship. + * Only support exists-notexists currently, the rest types will + * be supported in the future. + * Will put this rule into cost based transformation framework. + */ +public class SubqueryCombine extends DefaultPlanRewriter<JobContext> implements CustomRewriter { + + /** + * Types of subquery combinations. + */ + public enum CombinePattern { + EXISTS_NOTEXISTS_COMBINE, + EXISTS_EXISTS_COMBINE, + NOTEXISTS_NOTEXISTS_COMBINE, + UNKNOWN + } + + @Override + public Plan rewriteRoot(Plan plan, JobContext jobContext) { + boolean preCheck = doPreCheck((LogicalPlan) plan); + return preCheck ? plan.accept(this, jobContext) : plan; + } + + @Override + public Plan visitLogicalFilter(LogicalFilter<? extends Plan> filter, JobContext context) { + CombinePattern pattern = getCombinePattern(filter); + LogicalFilter newFilter = filter; + switch (pattern) { + case EXISTS_NOTEXISTS_COMBINE: + Set<Expression> newConjuncts = new HashSet<>(); + Set<Expression> otherConjuncts = new HashSet<>(); + SubqueryExpr existsExpr = null; + SubqueryExpr notExistsExpr = null; + SubqueryContainmentChecker checker = new SubqueryContainmentChecker(); + for (Expression expression : filter.getConjuncts()) { + if (isExists(expression)) { + existsExpr = (SubqueryExpr) expression; + } else if (isNotExists(expression)) { + notExistsExpr = (SubqueryExpr) expression; + } else { + otherConjuncts.add(expression); + } + } + boolean isValidForCombine = checkValidForSuqueryCombine(existsExpr, notExistsExpr, pattern, checker); + if (isValidForCombine) { + Exists newExists = doSubqueryCombine(existsExpr, checker); + newConjuncts.addAll(otherConjuncts); + newConjuncts.add(newExists); + newFilter = new LogicalFilter<>(newConjuncts, filter.child()); Review Comment: add `withConjuncts` to LogicalFilter and use it here: ```java newFilter = filter.withConjuncts(newConjuncts); ``` ########## fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/logical/SemiToInner.java: ########## @@ -0,0 +1,250 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.rules.rewrite.logical; + +import org.apache.doris.catalog.Column; +import org.apache.doris.catalog.KeysType; +import org.apache.doris.catalog.OlapTable; +import org.apache.doris.nereids.exceptions.TransformException; +import org.apache.doris.nereids.jobs.JobContext; +import org.apache.doris.nereids.rules.rewrite.logical.SemiToInner.SemiToInnerContext; +import org.apache.doris.nereids.trees.expressions.Alias; +import org.apache.doris.nereids.trees.expressions.EqualTo; +import org.apache.doris.nereids.trees.expressions.Expression; +import org.apache.doris.nereids.trees.expressions.Slot; +import org.apache.doris.nereids.trees.expressions.literal.BigIntLiteral; +import org.apache.doris.nereids.trees.plans.JoinHint; +import org.apache.doris.nereids.trees.plans.JoinType; +import org.apache.doris.nereids.trees.plans.Plan; +import org.apache.doris.nereids.trees.plans.logical.LogicalAggregate; +import org.apache.doris.nereids.trees.plans.logical.LogicalFilter; +import org.apache.doris.nereids.trees.plans.logical.LogicalJoin; +import org.apache.doris.nereids.trees.plans.logical.LogicalOlapScan; +import org.apache.doris.nereids.trees.plans.logical.LogicalPlan; +import org.apache.doris.nereids.trees.plans.logical.LogicalProject; +import org.apache.doris.nereids.trees.plans.visitor.CustomRewriter; +import org.apache.doris.nereids.trees.plans.visitor.DefaultPlanRewriter; + +import com.google.common.collect.ImmutableList; + +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Optional; +import java.util.Set; + +/** + * Transform semi join to inner join. + * TODO: only support semi -> inner + gby pattern currently + * Will support pure inner and gby + inner patterns. + * Will put into cost based transformation framework. + */ +public class SemiToInner extends DefaultPlanRewriter<SemiToInnerContext> implements CustomRewriter { Review Comment: add an annotation `@DependsRules({SubQeueryCombine.class, UncorrelatedApplyHavingProjectFilter.class})` ########## fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/SubqueryExpr.java: ########## @@ -19,49 +19,95 @@ import org.apache.doris.nereids.exceptions.UnboundException; import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor; +import org.apache.doris.nereids.trees.plans.logical.LogicalFilter; import org.apache.doris.nereids.trees.plans.logical.LogicalPlan; import org.apache.doris.nereids.types.DataType; +import org.apache.doris.nereids.util.PlanTypeUtils; import org.apache.doris.nereids.util.Utils; import com.google.common.collect.ImmutableList; +import java.util.ArrayList; +import java.util.HashSet; import java.util.List; import java.util.Objects; import java.util.Optional; +import java.util.Set; /** * Subquery Expression. */ public abstract class SubqueryExpr extends Expression { + protected final LogicalPlan queryPlan; protected final List<Slot> correlateSlots; - protected final Optional<Expression> typeCoercionExpr; + protected final Optional<Expression> subQueryCombinedHavingExpr; + protected final boolean isSpj; public SubqueryExpr(LogicalPlan subquery) { this.queryPlan = Objects.requireNonNull(subquery, "subquery can not be null"); this.correlateSlots = ImmutableList.of(); this.typeCoercionExpr = Optional.empty(); + this.subQueryCombinedHavingExpr = Optional.empty(); + this.isSpj = PlanTypeUtils.isSpj(queryPlan); } public SubqueryExpr(LogicalPlan subquery, List<Slot> correlateSlots, Optional<Expression> typeCoercionExpr) { this.queryPlan = Objects.requireNonNull(subquery, "subquery can not be null"); this.correlateSlots = ImmutableList.copyOf(correlateSlots); this.typeCoercionExpr = typeCoercionExpr; + this.subQueryCombinedHavingExpr = Optional.empty(); + this.isSpj = PlanTypeUtils.isSpj(queryPlan); + } + + public SubqueryExpr(LogicalPlan subquery, List<Slot> correlateSlots, Optional<Expression> typeCoercionExpr, + Optional<Expression> subQueryCombinedHavingExpr) { + this.queryPlan = Objects.requireNonNull(subquery, "subquery can not be null"); + this.correlateSlots = ImmutableList.copyOf(correlateSlots); + this.typeCoercionExpr = typeCoercionExpr; + this.subQueryCombinedHavingExpr = subQueryCombinedHavingExpr; + this.isSpj = PlanTypeUtils.isSpj(queryPlan); } public List<Slot> getCorrelateSlots() { return correlateSlots; } + public List<Expression> getCorrelateExpressions() { + List<Expression> expressionList = new ArrayList<>(); + expressionList.addAll(correlateSlots); + return expressionList; Review Comment: why do copy? ########## fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/SubqueryExpr.java: ########## @@ -19,49 +19,95 @@ import org.apache.doris.nereids.exceptions.UnboundException; import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor; +import org.apache.doris.nereids.trees.plans.logical.LogicalFilter; import org.apache.doris.nereids.trees.plans.logical.LogicalPlan; import org.apache.doris.nereids.types.DataType; +import org.apache.doris.nereids.util.PlanTypeUtils; import org.apache.doris.nereids.util.Utils; import com.google.common.collect.ImmutableList; +import java.util.ArrayList; +import java.util.HashSet; import java.util.List; import java.util.Objects; import java.util.Optional; +import java.util.Set; /** * Subquery Expression. */ public abstract class SubqueryExpr extends Expression { + protected final LogicalPlan queryPlan; protected final List<Slot> correlateSlots; - protected final Optional<Expression> typeCoercionExpr; + protected final Optional<Expression> subQueryCombinedHavingExpr; + protected final boolean isSpj; public SubqueryExpr(LogicalPlan subquery) { this.queryPlan = Objects.requireNonNull(subquery, "subquery can not be null"); this.correlateSlots = ImmutableList.of(); this.typeCoercionExpr = Optional.empty(); + this.subQueryCombinedHavingExpr = Optional.empty(); + this.isSpj = PlanTypeUtils.isSpj(queryPlan); } public SubqueryExpr(LogicalPlan subquery, List<Slot> correlateSlots, Optional<Expression> typeCoercionExpr) { this.queryPlan = Objects.requireNonNull(subquery, "subquery can not be null"); this.correlateSlots = ImmutableList.copyOf(correlateSlots); this.typeCoercionExpr = typeCoercionExpr; + this.subQueryCombinedHavingExpr = Optional.empty(); + this.isSpj = PlanTypeUtils.isSpj(queryPlan); + } Review Comment: use `this(...)` ########## fe/fe-core/src/main/java/org/apache/doris/nereids/util/PlanTypeUtils.java: ########## @@ -0,0 +1,151 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.util; + +import org.apache.doris.nereids.trees.plans.logical.LogicalAggregate; +import org.apache.doris.nereids.trees.plans.logical.LogicalFilter; +import org.apache.doris.nereids.trees.plans.logical.LogicalHaving; +import org.apache.doris.nereids.trees.plans.logical.LogicalJoin; +import org.apache.doris.nereids.trees.plans.logical.LogicalLimit; +import org.apache.doris.nereids.trees.plans.logical.LogicalPlan; +import org.apache.doris.nereids.trees.plans.logical.LogicalProject; +import org.apache.doris.nereids.trees.plans.logical.LogicalRelation; +import org.apache.doris.nereids.trees.plans.logical.LogicalSetOperation; +import org.apache.doris.nereids.trees.plans.logical.LogicalSort; +import org.apache.doris.nereids.trees.plans.logical.LogicalSubQueryAlias; +import org.apache.doris.nereids.trees.plans.logical.LogicalWindow; + +import com.google.common.collect.ImmutableSet; +import com.google.common.collect.Lists; + +import java.util.List; +import java.util.Set; + +/** + * Judgment for plan tree pattern, such as spj plan, etc. + */ +public class PlanTypeUtils { + + private static final Set<Class<? extends LogicalPlan>> SPJ_PLAN = ImmutableSet.of( + LogicalRelation.class, + LogicalJoin.class, + LogicalFilter.class, + LogicalProject.class, + LogicalSubQueryAlias.class // FIXME + ); + + private static final Set<Class<? extends LogicalPlan>> SUPPORTED_PLAN = ImmutableSet.of( + // TODO: Set related ops + LogicalRelation.class, + LogicalJoin.class, + LogicalFilter.class, + LogicalProject.class, + LogicalWindow.class, + LogicalAggregate.class, + LogicalHaving.class, + LogicalSort.class, + LogicalLimit.class, + LogicalSubQueryAlias.class + ); + + public static boolean isSpj(LogicalPlan rootPlan) { + List<LogicalPlan> plans = Lists.newArrayList(); + plans.addAll(rootPlan.collect(LogicalPlan.class::isInstance)); + return isSpj(plans); + } + + public static boolean isSpj(List<LogicalPlan> plans) { + return plans.stream().anyMatch(p -> SPJ_PLAN.stream().anyMatch(c -> c.isInstance(p))); + } + + public static boolean isSupportedPlan(LogicalPlan rootPlan) { + List<LogicalPlan> plans = Lists.newArrayList(); + plans.addAll(rootPlan.collect(LogicalPlan.class::isInstance)); + return isSupportedPlan(plans); + } + + public static boolean isSupportedPlan(List<LogicalPlan> plans) { + return plans.stream().anyMatch(p -> SUPPORTED_PLAN.stream().anyMatch(c -> c.isInstance(p))); Review Comment: ```suggestion return plans.stream().allMatch(p -> SUPPORTED_PLAN.stream().anyMatch(c -> c.isInstance(p))); ``` ########## fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalJoin.java: ########## @@ -52,15 +52,14 @@ */ public class LogicalJoin<LEFT_CHILD_TYPE extends Plan, RIGHT_CHILD_TYPE extends Plan> extends LogicalBinary<LEFT_CHILD_TYPE, RIGHT_CHILD_TYPE> implements Join { - private final JoinType joinType; private final List<Expression> otherJoinConjuncts; private final List<Expression> hashJoinConjuncts; private final JoinHint hint; // When the predicate condition contains subqueries and disjunctions, the join will be marked as MarkJoin. private final Optional<MarkJoinSlotReference> markJoinSlotReference; - + private final Optional<Expression> subQueryCombinedHavingExpr; Review Comment: add comment to explain what it is and where to use it ########## fe/fe-core/src/main/java/org/apache/doris/nereids/util/PlanTypeUtils.java: ########## @@ -0,0 +1,151 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.util; + +import org.apache.doris.nereids.trees.plans.logical.LogicalAggregate; +import org.apache.doris.nereids.trees.plans.logical.LogicalFilter; +import org.apache.doris.nereids.trees.plans.logical.LogicalHaving; +import org.apache.doris.nereids.trees.plans.logical.LogicalJoin; +import org.apache.doris.nereids.trees.plans.logical.LogicalLimit; +import org.apache.doris.nereids.trees.plans.logical.LogicalPlan; +import org.apache.doris.nereids.trees.plans.logical.LogicalProject; +import org.apache.doris.nereids.trees.plans.logical.LogicalRelation; +import org.apache.doris.nereids.trees.plans.logical.LogicalSetOperation; +import org.apache.doris.nereids.trees.plans.logical.LogicalSort; +import org.apache.doris.nereids.trees.plans.logical.LogicalSubQueryAlias; +import org.apache.doris.nereids.trees.plans.logical.LogicalWindow; + +import com.google.common.collect.ImmutableSet; +import com.google.common.collect.Lists; + +import java.util.List; +import java.util.Set; + +/** + * Judgment for plan tree pattern, such as spj plan, etc. + */ +public class PlanTypeUtils { + + private static final Set<Class<? extends LogicalPlan>> SPJ_PLAN = ImmutableSet.of( + LogicalRelation.class, + LogicalJoin.class, + LogicalFilter.class, + LogicalProject.class, + LogicalSubQueryAlias.class // FIXME + ); + + private static final Set<Class<? extends LogicalPlan>> SUPPORTED_PLAN = ImmutableSet.of( + // TODO: Set related ops + LogicalRelation.class, + LogicalJoin.class, + LogicalFilter.class, + LogicalProject.class, + LogicalWindow.class, + LogicalAggregate.class, + LogicalHaving.class, + LogicalSort.class, + LogicalLimit.class, + LogicalSubQueryAlias.class + ); + + public static boolean isSpj(LogicalPlan rootPlan) { + List<LogicalPlan> plans = Lists.newArrayList(); + plans.addAll(rootPlan.collect(LogicalPlan.class::isInstance)); + return isSpj(plans); + } + + public static boolean isSpj(List<LogicalPlan> plans) { + return plans.stream().anyMatch(p -> SPJ_PLAN.stream().anyMatch(c -> c.isInstance(p))); + } + + public static boolean isSupportedPlan(LogicalPlan rootPlan) { + List<LogicalPlan> plans = Lists.newArrayList(); + plans.addAll(rootPlan.collect(LogicalPlan.class::isInstance)); + return isSupportedPlan(plans); + } + + public static boolean isSupportedPlan(List<LogicalPlan> plans) { + return plans.stream().anyMatch(p -> SUPPORTED_PLAN.stream().anyMatch(c -> c.isInstance(p))); + } + + public static LogicalFilter getSpjFilter(LogicalPlan rootPlan) { + List<LogicalPlan> plans = Lists.newArrayList(); + plans.addAll(rootPlan.collect(LogicalFilter.class::isInstance)); + return !plans.isEmpty() ? ((LogicalFilter) plans.get(0)) : null; + } + + public static boolean hasFrom(LogicalPlan rootPlan) { + List<LogicalPlan> plans = Lists.newArrayList(); + plans.addAll(rootPlan.collect(LogicalPlan.class::isInstance)); + return plans.stream().anyMatch(p -> p instanceof LogicalRelation); + } + + public static boolean hasJoin(LogicalPlan rootPlan) { + List<LogicalPlan> plans = Lists.newArrayList(); + plans.addAll(rootPlan.collect(LogicalPlan.class::isInstance)); + return plans.stream().anyMatch(p -> p instanceof LogicalJoin); + } + + public static boolean hasFilter(LogicalPlan rootPlan) { Review Comment: u could use `org.apache.doris.nereids.trees.TreeNode#containsType` ``` rootPlan.containsType(LogicalFilter.class) ``` ########## fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/SubqueryCombine.java: ########## @@ -0,0 +1,253 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.rules.analysis; + +import org.apache.doris.catalog.KeysType; +import org.apache.doris.nereids.jobs.JobContext; +import org.apache.doris.nereids.trees.expressions.CaseWhen; +import org.apache.doris.nereids.trees.expressions.EqualTo; +import org.apache.doris.nereids.trees.expressions.Exists; +import org.apache.doris.nereids.trees.expressions.Expression; +import org.apache.doris.nereids.trees.expressions.SubqueryExpr; +import org.apache.doris.nereids.trees.expressions.WhenClause; +import org.apache.doris.nereids.trees.expressions.functions.agg.Sum; +import org.apache.doris.nereids.trees.expressions.literal.BigIntLiteral; +import org.apache.doris.nereids.trees.expressions.literal.TinyIntLiteral; +import org.apache.doris.nereids.trees.expressions.visitor.SubqueryContainmentChecker; +import org.apache.doris.nereids.trees.plans.Plan; +import org.apache.doris.nereids.trees.plans.logical.LogicalFilter; +import org.apache.doris.nereids.trees.plans.logical.LogicalHaving; +import org.apache.doris.nereids.trees.plans.logical.LogicalOlapScan; +import org.apache.doris.nereids.trees.plans.logical.LogicalPlan; +import org.apache.doris.nereids.trees.plans.logical.LogicalRelation; +import org.apache.doris.nereids.trees.plans.visitor.CustomRewriter; +import org.apache.doris.nereids.trees.plans.visitor.DefaultPlanRewriter; +import org.apache.doris.nereids.util.ExpressionUtils; +import org.apache.doris.nereids.util.Utils; + +import com.google.common.collect.ImmutableSet; +import com.google.common.collect.Lists; + +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; + +/** + * Combine subQueries having containment relationship. + * Only support exists-notexists currently, the rest types will + * be supported in the future. + * Will put this rule into cost based transformation framework. + */ +public class SubqueryCombine extends DefaultPlanRewriter<JobContext> implements CustomRewriter { + + /** + * Types of subquery combinations. + */ + public enum CombinePattern { + EXISTS_NOTEXISTS_COMBINE, + EXISTS_EXISTS_COMBINE, + NOTEXISTS_NOTEXISTS_COMBINE, + UNKNOWN + } + + @Override + public Plan rewriteRoot(Plan plan, JobContext jobContext) { + boolean preCheck = doPreCheck((LogicalPlan) plan); + return preCheck ? plan.accept(this, jobContext) : plan; + } + + @Override + public Plan visitLogicalFilter(LogicalFilter<? extends Plan> filter, JobContext context) { + CombinePattern pattern = getCombinePattern(filter); + LogicalFilter newFilter = filter; + switch (pattern) { + case EXISTS_NOTEXISTS_COMBINE: + Set<Expression> newConjuncts = new HashSet<>(); + Set<Expression> otherConjuncts = new HashSet<>(); + SubqueryExpr existsExpr = null; + SubqueryExpr notExistsExpr = null; + SubqueryContainmentChecker checker = new SubqueryContainmentChecker(); + for (Expression expression : filter.getConjuncts()) { + if (isExists(expression)) { + existsExpr = (SubqueryExpr) expression; + } else if (isNotExists(expression)) { + notExistsExpr = (SubqueryExpr) expression; + } else { + otherConjuncts.add(expression); + } + } + boolean isValidForCombine = checkValidForSuqueryCombine(existsExpr, notExistsExpr, pattern, checker); + if (isValidForCombine) { + Exists newExists = doSubqueryCombine(existsExpr, checker); + newConjuncts.addAll(otherConjuncts); + newConjuncts.add(newExists); + newFilter = new LogicalFilter<>(newConjuncts, filter.child()); + } + break; + case EXISTS_EXISTS_COMBINE: + case NOTEXISTS_NOTEXISTS_COMBINE: + // TODO: support exists-exists and exists-notexists pattern + break; + case UNKNOWN: + default: + break; + } + return newFilter; + } + + private boolean doPreCheck(LogicalPlan plan) { + // ALL pre-checking can be put here + return checkAllTablesUnderUKModel(plan); + } + + private CombinePattern getCombinePattern(LogicalFilter filter) { + if (checkMatchExistNotExists(filter.getConjuncts())) { + return CombinePattern.EXISTS_NOTEXISTS_COMBINE; + } else if (checkMatchExistsExists(filter.getConjuncts())) { + return CombinePattern.EXISTS_EXISTS_COMBINE; + } else if (checkMatchNotExistsNotExists(filter.getConjuncts())) { + return CombinePattern.NOTEXISTS_NOTEXISTS_COMBINE; + } else { + return CombinePattern.UNKNOWN; + } + } + + /** + * Check all plan accessed tables are all in unique key model. + */ + private boolean checkAllTablesUnderUKModel(LogicalPlan plan) { + List<LogicalPlan> plans = Lists.newArrayList(); + plans.addAll(plan.collect(LogicalPlan.class::isInstance)); Review Comment: just collect LogicalRelation ########## fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/logical/SemiToInner.java: ########## @@ -0,0 +1,250 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.rules.rewrite.logical; + +import org.apache.doris.catalog.Column; +import org.apache.doris.catalog.KeysType; +import org.apache.doris.catalog.OlapTable; +import org.apache.doris.nereids.exceptions.TransformException; +import org.apache.doris.nereids.jobs.JobContext; +import org.apache.doris.nereids.rules.rewrite.logical.SemiToInner.SemiToInnerContext; +import org.apache.doris.nereids.trees.expressions.Alias; +import org.apache.doris.nereids.trees.expressions.EqualTo; +import org.apache.doris.nereids.trees.expressions.Expression; +import org.apache.doris.nereids.trees.expressions.Slot; +import org.apache.doris.nereids.trees.expressions.literal.BigIntLiteral; +import org.apache.doris.nereids.trees.plans.JoinHint; +import org.apache.doris.nereids.trees.plans.JoinType; +import org.apache.doris.nereids.trees.plans.Plan; +import org.apache.doris.nereids.trees.plans.logical.LogicalAggregate; +import org.apache.doris.nereids.trees.plans.logical.LogicalFilter; +import org.apache.doris.nereids.trees.plans.logical.LogicalJoin; +import org.apache.doris.nereids.trees.plans.logical.LogicalOlapScan; +import org.apache.doris.nereids.trees.plans.logical.LogicalPlan; +import org.apache.doris.nereids.trees.plans.logical.LogicalProject; +import org.apache.doris.nereids.trees.plans.visitor.CustomRewriter; +import org.apache.doris.nereids.trees.plans.visitor.DefaultPlanRewriter; + +import com.google.common.collect.ImmutableList; + +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Optional; +import java.util.Set; + +/** + * Transform semi join to inner join. + * TODO: only support semi -> inner + gby pattern currently + * Will support pure inner and gby + inner patterns. + * Will put into cost based transformation framework. + */ +public class SemiToInner extends DefaultPlanRewriter<SemiToInnerContext> implements CustomRewriter { + + /** + * Types of semi join to inner join pattern. + */ + public enum TransformPattern { + INNER, + GBY_INNER, + INNER_GBY, + UNKNOWN + } + + @Override + public Plan rewriteRoot(Plan plan, JobContext jobContext) { + return plan.accept(this, new SemiToInnerContext(false, TransformPattern.UNKNOWN, + new HashSet<>(), new ArrayList<>(), + Optional.empty())); Review Comment: the code is not easy for read, we could use a constructor with no paramters and init all attribute in itself ########## fe/fe-core/src/main/java/org/apache/doris/nereids/util/PlanTypeUtils.java: ########## @@ -0,0 +1,151 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.util; + +import org.apache.doris.nereids.trees.plans.logical.LogicalAggregate; +import org.apache.doris.nereids.trees.plans.logical.LogicalFilter; +import org.apache.doris.nereids.trees.plans.logical.LogicalHaving; +import org.apache.doris.nereids.trees.plans.logical.LogicalJoin; +import org.apache.doris.nereids.trees.plans.logical.LogicalLimit; +import org.apache.doris.nereids.trees.plans.logical.LogicalPlan; +import org.apache.doris.nereids.trees.plans.logical.LogicalProject; +import org.apache.doris.nereids.trees.plans.logical.LogicalRelation; +import org.apache.doris.nereids.trees.plans.logical.LogicalSetOperation; +import org.apache.doris.nereids.trees.plans.logical.LogicalSort; +import org.apache.doris.nereids.trees.plans.logical.LogicalSubQueryAlias; +import org.apache.doris.nereids.trees.plans.logical.LogicalWindow; + +import com.google.common.collect.ImmutableSet; +import com.google.common.collect.Lists; + +import java.util.List; +import java.util.Set; + +/** + * Judgment for plan tree pattern, such as spj plan, etc. + */ +public class PlanTypeUtils { + + private static final Set<Class<? extends LogicalPlan>> SPJ_PLAN = ImmutableSet.of( + LogicalRelation.class, + LogicalJoin.class, + LogicalFilter.class, + LogicalProject.class, + LogicalSubQueryAlias.class // FIXME + ); + + private static final Set<Class<? extends LogicalPlan>> SUPPORTED_PLAN = ImmutableSet.of( + // TODO: Set related ops + LogicalRelation.class, + LogicalJoin.class, + LogicalFilter.class, + LogicalProject.class, + LogicalWindow.class, + LogicalAggregate.class, + LogicalHaving.class, + LogicalSort.class, + LogicalLimit.class, + LogicalSubQueryAlias.class + ); + + public static boolean isSpj(LogicalPlan rootPlan) { + List<LogicalPlan> plans = Lists.newArrayList(); + plans.addAll(rootPlan.collect(LogicalPlan.class::isInstance)); + return isSpj(plans); + } + + public static boolean isSpj(List<LogicalPlan> plans) { + return plans.stream().anyMatch(p -> SPJ_PLAN.stream().anyMatch(c -> c.isInstance(p))); + } + + public static boolean isSupportedPlan(LogicalPlan rootPlan) { + List<LogicalPlan> plans = Lists.newArrayList(); + plans.addAll(rootPlan.collect(LogicalPlan.class::isInstance)); + return isSupportedPlan(plans); + } + + public static boolean isSupportedPlan(List<LogicalPlan> plans) { + return plans.stream().anyMatch(p -> SUPPORTED_PLAN.stream().anyMatch(c -> c.isInstance(p))); + } + + public static LogicalFilter getSpjFilter(LogicalPlan rootPlan) { + List<LogicalPlan> plans = Lists.newArrayList(); + plans.addAll(rootPlan.collect(LogicalFilter.class::isInstance)); Review Comment: if spj tree include join, then we could have more than one filter, so why only return the first filter? ########## fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/logical/UnCorrelatedApplyHavingProjectFilter.java: ########## @@ -0,0 +1,95 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.rules.rewrite.logical; + +import org.apache.doris.nereids.rules.Rule; +import org.apache.doris.nereids.rules.RuleType; +import org.apache.doris.nereids.rules.rewrite.OneRewriteRuleFactory; +import org.apache.doris.nereids.trees.expressions.Expression; +import org.apache.doris.nereids.trees.expressions.NamedExpression; +import org.apache.doris.nereids.trees.expressions.SlotReference; +import org.apache.doris.nereids.trees.plans.Plan; +import org.apache.doris.nereids.trees.plans.logical.LogicalApply; +import org.apache.doris.nereids.trees.plans.logical.LogicalFilter; +import org.apache.doris.nereids.trees.plans.logical.LogicalHaving; +import org.apache.doris.nereids.trees.plans.logical.LogicalProject; +import org.apache.doris.nereids.util.ExpressionUtils; +import org.apache.doris.nereids.util.PlanUtils; +import org.apache.doris.nereids.util.Utils; + +import com.google.common.collect.ImmutableSet; + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.Set; + +/** + * Raise the correlated predicate in filter and having predicate into apply + * before: + * apply + * / \ + * Input(output:b) Having(predicate) + * | + * Project(output:a) + * | + * Filter(Correlated predicate/UnCorrelated predicate) + * | + * child + * + * after: + * apply(Correlated predicate/Having predicate) + * / \ + * Input(output:b) Project(output:a) + * | + * Filter(UnCorrelated predicate) + * | + * child + */ +public class UnCorrelatedApplyHavingProjectFilter extends OneRewriteRuleFactory { + @Override + public Rule build() { + return logicalApply(any(), logicalHaving(logicalProject(logicalFilter()))) Review Comment: add an annotation `@DependsRules({SubqueryCombine.class})` ########## fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalApply.java: ########## @@ -52,6 +52,7 @@ private final SubqueryExpr subqueryExpr; // correlation Conjunction private final Optional<Expression> correlationFilter; + private Optional<Expression> subQueryCombinedHavingExpr; Review Comment: final ########## fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/SubqueryCombine.java: ########## @@ -0,0 +1,253 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.rules.analysis; + +import org.apache.doris.catalog.KeysType; +import org.apache.doris.nereids.jobs.JobContext; +import org.apache.doris.nereids.trees.expressions.CaseWhen; +import org.apache.doris.nereids.trees.expressions.EqualTo; +import org.apache.doris.nereids.trees.expressions.Exists; +import org.apache.doris.nereids.trees.expressions.Expression; +import org.apache.doris.nereids.trees.expressions.SubqueryExpr; +import org.apache.doris.nereids.trees.expressions.WhenClause; +import org.apache.doris.nereids.trees.expressions.functions.agg.Sum; +import org.apache.doris.nereids.trees.expressions.literal.BigIntLiteral; +import org.apache.doris.nereids.trees.expressions.literal.TinyIntLiteral; +import org.apache.doris.nereids.trees.expressions.visitor.SubqueryContainmentChecker; +import org.apache.doris.nereids.trees.plans.Plan; +import org.apache.doris.nereids.trees.plans.logical.LogicalFilter; +import org.apache.doris.nereids.trees.plans.logical.LogicalHaving; +import org.apache.doris.nereids.trees.plans.logical.LogicalOlapScan; +import org.apache.doris.nereids.trees.plans.logical.LogicalPlan; +import org.apache.doris.nereids.trees.plans.logical.LogicalRelation; +import org.apache.doris.nereids.trees.plans.visitor.CustomRewriter; +import org.apache.doris.nereids.trees.plans.visitor.DefaultPlanRewriter; +import org.apache.doris.nereids.util.ExpressionUtils; +import org.apache.doris.nereids.util.Utils; + +import com.google.common.collect.ImmutableSet; +import com.google.common.collect.Lists; + +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; + +/** + * Combine subQueries having containment relationship. + * Only support exists-notexists currently, the rest types will + * be supported in the future. + * Will put this rule into cost based transformation framework. + */ +public class SubqueryCombine extends DefaultPlanRewriter<JobContext> implements CustomRewriter { + + /** + * Types of subquery combinations. + */ + public enum CombinePattern { + EXISTS_NOTEXISTS_COMBINE, + EXISTS_EXISTS_COMBINE, + NOTEXISTS_NOTEXISTS_COMBINE, + UNKNOWN + } + + @Override + public Plan rewriteRoot(Plan plan, JobContext jobContext) { + boolean preCheck = doPreCheck((LogicalPlan) plan); + return preCheck ? plan.accept(this, jobContext) : plan; + } + + @Override + public Plan visitLogicalFilter(LogicalFilter<? extends Plan> filter, JobContext context) { + CombinePattern pattern = getCombinePattern(filter); + LogicalFilter newFilter = filter; + switch (pattern) { + case EXISTS_NOTEXISTS_COMBINE: + Set<Expression> newConjuncts = new HashSet<>(); + Set<Expression> otherConjuncts = new HashSet<>(); + SubqueryExpr existsExpr = null; + SubqueryExpr notExistsExpr = null; + SubqueryContainmentChecker checker = new SubqueryContainmentChecker(); + for (Expression expression : filter.getConjuncts()) { + if (isExists(expression)) { + existsExpr = (SubqueryExpr) expression; + } else if (isNotExists(expression)) { + notExistsExpr = (SubqueryExpr) expression; + } else { + otherConjuncts.add(expression); + } + } + boolean isValidForCombine = checkValidForSuqueryCombine(existsExpr, notExistsExpr, pattern, checker); + if (isValidForCombine) { + Exists newExists = doSubqueryCombine(existsExpr, checker); + newConjuncts.addAll(otherConjuncts); + newConjuncts.add(newExists); + newFilter = new LogicalFilter<>(newConjuncts, filter.child()); + } + break; + case EXISTS_EXISTS_COMBINE: + case NOTEXISTS_NOTEXISTS_COMBINE: + // TODO: support exists-exists and exists-notexists pattern + break; + case UNKNOWN: + default: + break; + } + return newFilter; + } + + private boolean doPreCheck(LogicalPlan plan) { + // ALL pre-checking can be put here + return checkAllTablesUnderUKModel(plan); + } + + private CombinePattern getCombinePattern(LogicalFilter filter) { + if (checkMatchExistNotExists(filter.getConjuncts())) { + return CombinePattern.EXISTS_NOTEXISTS_COMBINE; + } else if (checkMatchExistsExists(filter.getConjuncts())) { + return CombinePattern.EXISTS_EXISTS_COMBINE; + } else if (checkMatchNotExistsNotExists(filter.getConjuncts())) { + return CombinePattern.NOTEXISTS_NOTEXISTS_COMBINE; + } else { + return CombinePattern.UNKNOWN; + } + } + + /** + * Check all plan accessed tables are all in unique key model. + */ + private boolean checkAllTablesUnderUKModel(LogicalPlan plan) { + List<LogicalPlan> plans = Lists.newArrayList(); + plans.addAll(plan.collect(LogicalPlan.class::isInstance)); + List<LogicalRelation> tables = plans.stream().filter(LogicalRelation.class::isInstance) + .map(LogicalRelation.class::cast) + .collect(Collectors.toList()); + return tables.stream().filter(LogicalOlapScan.class::isInstance) + .allMatch(f -> ((LogicalOlapScan) f).getTable().getKeysType() == KeysType.UNIQUE_KEYS + && ((LogicalOlapScan) f).getTable().getKeysNum() != 0); Review Comment: i think could not do `filter(LogicalOlapScan.class::isInstance)`. should return false if any relation is not LogicalOlapScan ########## fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/logical/SemiToInner.java: ########## @@ -0,0 +1,250 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.rules.rewrite.logical; + +import org.apache.doris.catalog.Column; +import org.apache.doris.catalog.KeysType; +import org.apache.doris.catalog.OlapTable; +import org.apache.doris.nereids.exceptions.TransformException; +import org.apache.doris.nereids.jobs.JobContext; +import org.apache.doris.nereids.rules.rewrite.logical.SemiToInner.SemiToInnerContext; +import org.apache.doris.nereids.trees.expressions.Alias; +import org.apache.doris.nereids.trees.expressions.EqualTo; +import org.apache.doris.nereids.trees.expressions.Expression; +import org.apache.doris.nereids.trees.expressions.Slot; +import org.apache.doris.nereids.trees.expressions.literal.BigIntLiteral; +import org.apache.doris.nereids.trees.plans.JoinHint; +import org.apache.doris.nereids.trees.plans.JoinType; +import org.apache.doris.nereids.trees.plans.Plan; +import org.apache.doris.nereids.trees.plans.logical.LogicalAggregate; +import org.apache.doris.nereids.trees.plans.logical.LogicalFilter; +import org.apache.doris.nereids.trees.plans.logical.LogicalJoin; +import org.apache.doris.nereids.trees.plans.logical.LogicalOlapScan; +import org.apache.doris.nereids.trees.plans.logical.LogicalPlan; +import org.apache.doris.nereids.trees.plans.logical.LogicalProject; +import org.apache.doris.nereids.trees.plans.visitor.CustomRewriter; +import org.apache.doris.nereids.trees.plans.visitor.DefaultPlanRewriter; + +import com.google.common.collect.ImmutableList; + +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Optional; +import java.util.Set; + +/** + * Transform semi join to inner join. + * TODO: only support semi -> inner + gby pattern currently + * Will support pure inner and gby + inner patterns. + * Will put into cost based transformation framework. + */ +public class SemiToInner extends DefaultPlanRewriter<SemiToInnerContext> implements CustomRewriter { + + /** + * Types of semi join to inner join pattern. + */ + public enum TransformPattern { + INNER, + GBY_INNER, + INNER_GBY, + UNKNOWN + } + + @Override + public Plan rewriteRoot(Plan plan, JobContext jobContext) { + return plan.accept(this, new SemiToInnerContext(false, TransformPattern.UNKNOWN, + new HashSet<>(), new ArrayList<>(), + Optional.empty())); + } + + @Override + public Plan visit(Plan plan, SemiToInnerContext context) { + plan = super.visit(plan, context); + if (context.visited) { + if (context.pattern == TransformPattern.INNER_GBY) { + return transformToInnerGby((LogicalPlan) plan, context); + } else if (context.pattern == TransformPattern.INNER) { + return transformToInner((LogicalPlan) plan, context); + } else if (context.pattern == TransformPattern.GBY_INNER) { + return transformToGbyInner((LogicalPlan) plan, context); + } else { + return plan; + } + } else { + return plan; + } + } + + @Override + public Plan visitLogicalJoin(LogicalJoin join, SemiToInnerContext context) { + if (join.getJoinType() == JoinType.LEFT_SEMI_JOIN) { + context.pattern = getTransformPattern(join); + if (context.pattern == TransformPattern.INNER_GBY) { + context.visited = true; + context.subQueryCombinedHavingExpr = join.getSubQueryCombinedHavingExpr(); + context.groupByExpressions = getAllUniqueColumnsInJoin((LogicalPlan) join.left()); + return new LogicalJoin<>(JoinType.INNER_JOIN, join.getHashJoinConjuncts(), + join.getOtherJoinConjuncts(), + JoinHint.NONE, + join.getMarkJoinSlotReference(), + Optional.empty(), + (LogicalPlan) join.left(), (LogicalPlan) join.right()); + } else { + return join; + } + } else { + return join; + } + } + + /** + * Semi join to inner transformation context + */ + public static class SemiToInnerContext { + boolean visited; + TransformPattern pattern; + Set<Slot> outerDependantSlots; + List<Expression> groupByExpressions; + Optional<Expression> subQueryCombinedHavingExpr; + + public SemiToInnerContext(boolean visited, + TransformPattern pattern, + Set<Slot> outerDependantSlots, + List<Expression> groupByExpressions, + Optional<Expression> subQueryCombinedHavingExpr) { + this.visited = visited; + this.pattern = pattern; + this.outerDependantSlots = outerDependantSlots; + this.groupByExpressions = groupByExpressions; + this.subQueryCombinedHavingExpr = subQueryCombinedHavingExpr; + } + } + + private TransformPattern getTransformPattern(LogicalJoin join) { + // only support inner gby pattern currently. + if (join.getSubQueryCombinedHavingExpr().isPresent()) { + return TransformPattern.INNER_GBY; + } else { + return TransformPattern.UNKNOWN; + } + } + + private LogicalPlan transformToInnerGby(LogicalPlan plan, SemiToInnerContext context) { + if (!(plan instanceof LogicalProject || plan instanceof LogicalFilter)) { Review Comment: ```java if (plan instanceof LogicalProject || plan instanceof LogicalFilter) { return plan; } ... ``` ########## fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/logical/SemiToInner.java: ########## @@ -0,0 +1,250 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.rules.rewrite.logical; + +import org.apache.doris.catalog.Column; +import org.apache.doris.catalog.KeysType; +import org.apache.doris.catalog.OlapTable; +import org.apache.doris.nereids.exceptions.TransformException; +import org.apache.doris.nereids.jobs.JobContext; +import org.apache.doris.nereids.rules.rewrite.logical.SemiToInner.SemiToInnerContext; +import org.apache.doris.nereids.trees.expressions.Alias; +import org.apache.doris.nereids.trees.expressions.EqualTo; +import org.apache.doris.nereids.trees.expressions.Expression; +import org.apache.doris.nereids.trees.expressions.Slot; +import org.apache.doris.nereids.trees.expressions.literal.BigIntLiteral; +import org.apache.doris.nereids.trees.plans.JoinHint; +import org.apache.doris.nereids.trees.plans.JoinType; +import org.apache.doris.nereids.trees.plans.Plan; +import org.apache.doris.nereids.trees.plans.logical.LogicalAggregate; +import org.apache.doris.nereids.trees.plans.logical.LogicalFilter; +import org.apache.doris.nereids.trees.plans.logical.LogicalJoin; +import org.apache.doris.nereids.trees.plans.logical.LogicalOlapScan; +import org.apache.doris.nereids.trees.plans.logical.LogicalPlan; +import org.apache.doris.nereids.trees.plans.logical.LogicalProject; +import org.apache.doris.nereids.trees.plans.visitor.CustomRewriter; +import org.apache.doris.nereids.trees.plans.visitor.DefaultPlanRewriter; + +import com.google.common.collect.ImmutableList; + +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Optional; +import java.util.Set; + +/** + * Transform semi join to inner join. + * TODO: only support semi -> inner + gby pattern currently + * Will support pure inner and gby + inner patterns. + * Will put into cost based transformation framework. + */ +public class SemiToInner extends DefaultPlanRewriter<SemiToInnerContext> implements CustomRewriter { + + /** + * Types of semi join to inner join pattern. + */ + public enum TransformPattern { + INNER, + GBY_INNER, + INNER_GBY, + UNKNOWN + } + + @Override + public Plan rewriteRoot(Plan plan, JobContext jobContext) { + return plan.accept(this, new SemiToInnerContext(false, TransformPattern.UNKNOWN, + new HashSet<>(), new ArrayList<>(), + Optional.empty())); + } + + @Override + public Plan visit(Plan plan, SemiToInnerContext context) { + plan = super.visit(plan, context); + if (context.visited) { + if (context.pattern == TransformPattern.INNER_GBY) { + return transformToInnerGby((LogicalPlan) plan, context); + } else if (context.pattern == TransformPattern.INNER) { + return transformToInner((LogicalPlan) plan, context); + } else if (context.pattern == TransformPattern.GBY_INNER) { + return transformToGbyInner((LogicalPlan) plan, context); + } else { + return plan; + } + } else { + return plan; + } + } + + @Override + public Plan visitLogicalJoin(LogicalJoin join, SemiToInnerContext context) { + if (join.getJoinType() == JoinType.LEFT_SEMI_JOIN) { + context.pattern = getTransformPattern(join); + if (context.pattern == TransformPattern.INNER_GBY) { + context.visited = true; + context.subQueryCombinedHavingExpr = join.getSubQueryCombinedHavingExpr(); + context.groupByExpressions = getAllUniqueColumnsInJoin((LogicalPlan) join.left()); + return new LogicalJoin<>(JoinType.INNER_JOIN, join.getHashJoinConjuncts(), + join.getOtherJoinConjuncts(), + JoinHint.NONE, + join.getMarkJoinSlotReference(), + Optional.empty(), + (LogicalPlan) join.left(), (LogicalPlan) join.right()); + } else { + return join; + } + } else { + return join; + } + } + + /** + * Semi join to inner transformation context + */ + public static class SemiToInnerContext { + boolean visited; + TransformPattern pattern; + Set<Slot> outerDependantSlots; + List<Expression> groupByExpressions; + Optional<Expression> subQueryCombinedHavingExpr; + + public SemiToInnerContext(boolean visited, + TransformPattern pattern, + Set<Slot> outerDependantSlots, + List<Expression> groupByExpressions, + Optional<Expression> subQueryCombinedHavingExpr) { + this.visited = visited; + this.pattern = pattern; + this.outerDependantSlots = outerDependantSlots; + this.groupByExpressions = groupByExpressions; + this.subQueryCombinedHavingExpr = subQueryCombinedHavingExpr; + } + } + + private TransformPattern getTransformPattern(LogicalJoin join) { + // only support inner gby pattern currently. + if (join.getSubQueryCombinedHavingExpr().isPresent()) { + return TransformPattern.INNER_GBY; + } else { + return TransformPattern.UNKNOWN; + } + } + + private LogicalPlan transformToInnerGby(LogicalPlan plan, SemiToInnerContext context) { + if (!(plan instanceof LogicalProject || plan instanceof LogicalFilter)) { + LogicalPlan topOperator = plan; + LogicalPlan childOperator = (LogicalPlan) topOperator.child(0); + context.outerDependantSlots = topOperator.getInputSlots(); + + LogicalProject project; + if (childOperator instanceof LogicalProject) { + // merge project has been applied + project = (LogicalProject) childOperator; + LogicalFilter filter = (LogicalFilter) project.child(0); + project = new LogicalProject(filter.getOutput(), filter); + } else { + LogicalFilter filter = (LogicalFilter) childOperator; + project = new LogicalProject(filter.getOutput(), filter); + } + // build aggr on new project + Set<Expression> combinedGbyExprs = new HashSet<>(); + Set<Expression> outputExprsAfterGby = new HashSet<>(); + // group by item exprs + combinedGbyExprs.addAll(context.groupByExpressions); + combinedGbyExprs.addAll(context.outerDependantSlots); + + // build (sum(...) as alias) expr + Expression aggrExprFromHavingExpr = extractAggrExprFromHavingExpr(context.subQueryCombinedHavingExpr.get()); + Alias aggrExprAlias = new Alias(aggrExprFromHavingExpr, aggrExprFromHavingExpr.toSql()); + // group by output use alias expr + outputExprsAfterGby.addAll(combinedGbyExprs); + outputExprsAfterGby.add(aggrExprAlias); + + Set<Expression> havingExprAliasList = new HashSet<>(); + // having filter use alias = 0 + EqualTo havingExpr = new EqualTo(aggrExprAlias.toSlot(), new BigIntLiteral(0)); + havingExprAliasList.add(havingExpr); + + LogicalAggregate newAggr = new LogicalAggregate(new ArrayList<>(combinedGbyExprs), + new ArrayList<>(outputExprsAfterGby), project); Review Comment: ```suggestion LogicalAggregate newAggr = new LogicalAggregate(ImmutableList.copyOf(combinedGbyExprs), ImmutableList.copyOf(outputExprsAfterGby), project); ``` ########## fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/logical/SemiToInner.java: ########## @@ -0,0 +1,250 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.rules.rewrite.logical; + +import org.apache.doris.catalog.Column; +import org.apache.doris.catalog.KeysType; +import org.apache.doris.catalog.OlapTable; +import org.apache.doris.nereids.exceptions.TransformException; +import org.apache.doris.nereids.jobs.JobContext; +import org.apache.doris.nereids.rules.rewrite.logical.SemiToInner.SemiToInnerContext; +import org.apache.doris.nereids.trees.expressions.Alias; +import org.apache.doris.nereids.trees.expressions.EqualTo; +import org.apache.doris.nereids.trees.expressions.Expression; +import org.apache.doris.nereids.trees.expressions.Slot; +import org.apache.doris.nereids.trees.expressions.literal.BigIntLiteral; +import org.apache.doris.nereids.trees.plans.JoinHint; +import org.apache.doris.nereids.trees.plans.JoinType; +import org.apache.doris.nereids.trees.plans.Plan; +import org.apache.doris.nereids.trees.plans.logical.LogicalAggregate; +import org.apache.doris.nereids.trees.plans.logical.LogicalFilter; +import org.apache.doris.nereids.trees.plans.logical.LogicalJoin; +import org.apache.doris.nereids.trees.plans.logical.LogicalOlapScan; +import org.apache.doris.nereids.trees.plans.logical.LogicalPlan; +import org.apache.doris.nereids.trees.plans.logical.LogicalProject; +import org.apache.doris.nereids.trees.plans.visitor.CustomRewriter; +import org.apache.doris.nereids.trees.plans.visitor.DefaultPlanRewriter; + +import com.google.common.collect.ImmutableList; + +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Optional; +import java.util.Set; + +/** + * Transform semi join to inner join. + * TODO: only support semi -> inner + gby pattern currently + * Will support pure inner and gby + inner patterns. + * Will put into cost based transformation framework. + */ +public class SemiToInner extends DefaultPlanRewriter<SemiToInnerContext> implements CustomRewriter { + + /** + * Types of semi join to inner join pattern. + */ + public enum TransformPattern { + INNER, + GBY_INNER, + INNER_GBY, + UNKNOWN + } + + @Override + public Plan rewriteRoot(Plan plan, JobContext jobContext) { + return plan.accept(this, new SemiToInnerContext(false, TransformPattern.UNKNOWN, + new HashSet<>(), new ArrayList<>(), + Optional.empty())); + } + + @Override + public Plan visit(Plan plan, SemiToInnerContext context) { + plan = super.visit(plan, context); + if (context.visited) { + if (context.pattern == TransformPattern.INNER_GBY) { + return transformToInnerGby((LogicalPlan) plan, context); + } else if (context.pattern == TransformPattern.INNER) { + return transformToInner((LogicalPlan) plan, context); + } else if (context.pattern == TransformPattern.GBY_INNER) { + return transformToGbyInner((LogicalPlan) plan, context); + } else { + return plan; + } + } else { + return plan; + } + } + + @Override + public Plan visitLogicalJoin(LogicalJoin join, SemiToInnerContext context) { + if (join.getJoinType() == JoinType.LEFT_SEMI_JOIN) { + context.pattern = getTransformPattern(join); + if (context.pattern == TransformPattern.INNER_GBY) { + context.visited = true; + context.subQueryCombinedHavingExpr = join.getSubQueryCombinedHavingExpr(); + context.groupByExpressions = getAllUniqueColumnsInJoin((LogicalPlan) join.left()); + return new LogicalJoin<>(JoinType.INNER_JOIN, join.getHashJoinConjuncts(), + join.getOtherJoinConjuncts(), + JoinHint.NONE, + join.getMarkJoinSlotReference(), + Optional.empty(), + (LogicalPlan) join.left(), (LogicalPlan) join.right()); + } else { + return join; + } + } else { + return join; + } + } + + /** + * Semi join to inner transformation context + */ + public static class SemiToInnerContext { + boolean visited; + TransformPattern pattern; + Set<Slot> outerDependantSlots; + List<Expression> groupByExpressions; + Optional<Expression> subQueryCombinedHavingExpr; + + public SemiToInnerContext(boolean visited, + TransformPattern pattern, + Set<Slot> outerDependantSlots, + List<Expression> groupByExpressions, + Optional<Expression> subQueryCombinedHavingExpr) { + this.visited = visited; + this.pattern = pattern; + this.outerDependantSlots = outerDependantSlots; + this.groupByExpressions = groupByExpressions; + this.subQueryCombinedHavingExpr = subQueryCombinedHavingExpr; + } + } + + private TransformPattern getTransformPattern(LogicalJoin join) { + // only support inner gby pattern currently. + if (join.getSubQueryCombinedHavingExpr().isPresent()) { + return TransformPattern.INNER_GBY; + } else { + return TransformPattern.UNKNOWN; + } + } + + private LogicalPlan transformToInnerGby(LogicalPlan plan, SemiToInnerContext context) { + if (!(plan instanceof LogicalProject || plan instanceof LogicalFilter)) { + LogicalPlan topOperator = plan; + LogicalPlan childOperator = (LogicalPlan) topOperator.child(0); + context.outerDependantSlots = topOperator.getInputSlots(); + + LogicalProject project; + if (childOperator instanceof LogicalProject) { + // merge project has been applied + project = (LogicalProject) childOperator; + LogicalFilter filter = (LogicalFilter) project.child(0); + project = new LogicalProject(filter.getOutput(), filter); + } else { + LogicalFilter filter = (LogicalFilter) childOperator; + project = new LogicalProject(filter.getOutput(), filter); + } + // build aggr on new project + Set<Expression> combinedGbyExprs = new HashSet<>(); + Set<Expression> outputExprsAfterGby = new HashSet<>(); + // group by item exprs + combinedGbyExprs.addAll(context.groupByExpressions); + combinedGbyExprs.addAll(context.outerDependantSlots); + + // build (sum(...) as alias) expr + Expression aggrExprFromHavingExpr = extractAggrExprFromHavingExpr(context.subQueryCombinedHavingExpr.get()); + Alias aggrExprAlias = new Alias(aggrExprFromHavingExpr, aggrExprFromHavingExpr.toSql()); + // group by output use alias expr + outputExprsAfterGby.addAll(combinedGbyExprs); + outputExprsAfterGby.add(aggrExprAlias); + + Set<Expression> havingExprAliasList = new HashSet<>(); + // having filter use alias = 0 + EqualTo havingExpr = new EqualTo(aggrExprAlias.toSlot(), new BigIntLiteral(0)); + havingExprAliasList.add(havingExpr); + + LogicalAggregate newAggr = new LogicalAggregate(new ArrayList<>(combinedGbyExprs), + new ArrayList<>(outputExprsAfterGby), project); + LogicalFilter havingFilter = new LogicalFilter(havingExprAliasList, newAggr); + LogicalProject newProject = new LogicalProject(new ArrayList<>(combinedGbyExprs), havingFilter); Review Comment: ```suggestion LogicalProject newProject = new LogicalProject(ImmutableList.copyOf(combinedGbyExprs), havingFilter); ``` ########## fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/logical/SemiToInner.java: ########## @@ -0,0 +1,250 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.rules.rewrite.logical; + +import org.apache.doris.catalog.Column; +import org.apache.doris.catalog.KeysType; +import org.apache.doris.catalog.OlapTable; +import org.apache.doris.nereids.exceptions.TransformException; +import org.apache.doris.nereids.jobs.JobContext; +import org.apache.doris.nereids.rules.rewrite.logical.SemiToInner.SemiToInnerContext; +import org.apache.doris.nereids.trees.expressions.Alias; +import org.apache.doris.nereids.trees.expressions.EqualTo; +import org.apache.doris.nereids.trees.expressions.Expression; +import org.apache.doris.nereids.trees.expressions.Slot; +import org.apache.doris.nereids.trees.expressions.literal.BigIntLiteral; +import org.apache.doris.nereids.trees.plans.JoinHint; +import org.apache.doris.nereids.trees.plans.JoinType; +import org.apache.doris.nereids.trees.plans.Plan; +import org.apache.doris.nereids.trees.plans.logical.LogicalAggregate; +import org.apache.doris.nereids.trees.plans.logical.LogicalFilter; +import org.apache.doris.nereids.trees.plans.logical.LogicalJoin; +import org.apache.doris.nereids.trees.plans.logical.LogicalOlapScan; +import org.apache.doris.nereids.trees.plans.logical.LogicalPlan; +import org.apache.doris.nereids.trees.plans.logical.LogicalProject; +import org.apache.doris.nereids.trees.plans.visitor.CustomRewriter; +import org.apache.doris.nereids.trees.plans.visitor.DefaultPlanRewriter; + +import com.google.common.collect.ImmutableList; + +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Optional; +import java.util.Set; + +/** + * Transform semi join to inner join. + * TODO: only support semi -> inner + gby pattern currently + * Will support pure inner and gby + inner patterns. + * Will put into cost based transformation framework. + */ +public class SemiToInner extends DefaultPlanRewriter<SemiToInnerContext> implements CustomRewriter { + + /** + * Types of semi join to inner join pattern. + */ + public enum TransformPattern { + INNER, + GBY_INNER, + INNER_GBY, + UNKNOWN + } + + @Override + public Plan rewriteRoot(Plan plan, JobContext jobContext) { + return plan.accept(this, new SemiToInnerContext(false, TransformPattern.UNKNOWN, + new HashSet<>(), new ArrayList<>(), + Optional.empty())); + } + + @Override + public Plan visit(Plan plan, SemiToInnerContext context) { + plan = super.visit(plan, context); + if (context.visited) { + if (context.pattern == TransformPattern.INNER_GBY) { + return transformToInnerGby((LogicalPlan) plan, context); + } else if (context.pattern == TransformPattern.INNER) { + return transformToInner((LogicalPlan) plan, context); + } else if (context.pattern == TransformPattern.GBY_INNER) { + return transformToGbyInner((LogicalPlan) plan, context); + } else { + return plan; + } + } else { + return plan; + } + } + + @Override + public Plan visitLogicalJoin(LogicalJoin join, SemiToInnerContext context) { + if (join.getJoinType() == JoinType.LEFT_SEMI_JOIN) { + context.pattern = getTransformPattern(join); + if (context.pattern == TransformPattern.INNER_GBY) { + context.visited = true; + context.subQueryCombinedHavingExpr = join.getSubQueryCombinedHavingExpr(); + context.groupByExpressions = getAllUniqueColumnsInJoin((LogicalPlan) join.left()); + return new LogicalJoin<>(JoinType.INNER_JOIN, join.getHashJoinConjuncts(), + join.getOtherJoinConjuncts(), + JoinHint.NONE, + join.getMarkJoinSlotReference(), + Optional.empty(), + (LogicalPlan) join.left(), (LogicalPlan) join.right()); + } else { + return join; + } + } else { + return join; + } + } + + /** + * Semi join to inner transformation context + */ + public static class SemiToInnerContext { + boolean visited; + TransformPattern pattern; + Set<Slot> outerDependantSlots; + List<Expression> groupByExpressions; + Optional<Expression> subQueryCombinedHavingExpr; + + public SemiToInnerContext(boolean visited, + TransformPattern pattern, + Set<Slot> outerDependantSlots, + List<Expression> groupByExpressions, + Optional<Expression> subQueryCombinedHavingExpr) { + this.visited = visited; + this.pattern = pattern; + this.outerDependantSlots = outerDependantSlots; + this.groupByExpressions = groupByExpressions; + this.subQueryCombinedHavingExpr = subQueryCombinedHavingExpr; + } + } + + private TransformPattern getTransformPattern(LogicalJoin join) { + // only support inner gby pattern currently. + if (join.getSubQueryCombinedHavingExpr().isPresent()) { + return TransformPattern.INNER_GBY; + } else { + return TransformPattern.UNKNOWN; + } + } + + private LogicalPlan transformToInnerGby(LogicalPlan plan, SemiToInnerContext context) { + if (!(plan instanceof LogicalProject || plan instanceof LogicalFilter)) { + LogicalPlan topOperator = plan; + LogicalPlan childOperator = (LogicalPlan) topOperator.child(0); + context.outerDependantSlots = topOperator.getInputSlots(); + + LogicalProject project; + if (childOperator instanceof LogicalProject) { + // merge project has been applied + project = (LogicalProject) childOperator; + LogicalFilter filter = (LogicalFilter) project.child(0); + project = new LogicalProject(filter.getOutput(), filter); + } else { + LogicalFilter filter = (LogicalFilter) childOperator; + project = new LogicalProject(filter.getOutput(), filter); + } + // build aggr on new project + Set<Expression> combinedGbyExprs = new HashSet<>(); + Set<Expression> outputExprsAfterGby = new HashSet<>(); + // group by item exprs + combinedGbyExprs.addAll(context.groupByExpressions); + combinedGbyExprs.addAll(context.outerDependantSlots); + + // build (sum(...) as alias) expr + Expression aggrExprFromHavingExpr = extractAggrExprFromHavingExpr(context.subQueryCombinedHavingExpr.get()); + Alias aggrExprAlias = new Alias(aggrExprFromHavingExpr, aggrExprFromHavingExpr.toSql()); + // group by output use alias expr + outputExprsAfterGby.addAll(combinedGbyExprs); + outputExprsAfterGby.add(aggrExprAlias); + + Set<Expression> havingExprAliasList = new HashSet<>(); + // having filter use alias = 0 + EqualTo havingExpr = new EqualTo(aggrExprAlias.toSlot(), new BigIntLiteral(0)); + havingExprAliasList.add(havingExpr); + + LogicalAggregate newAggr = new LogicalAggregate(new ArrayList<>(combinedGbyExprs), + new ArrayList<>(outputExprsAfterGby), project); + LogicalFilter havingFilter = new LogicalFilter(havingExprAliasList, newAggr); + LogicalProject newProject = new LogicalProject(new ArrayList<>(combinedGbyExprs), havingFilter); + context.visited = false; + + return (LogicalPlan) topOperator.withChildren(newProject); + } else { + return plan; + } + } + + private LogicalPlan transformToInner(LogicalPlan plan, SemiToInnerContext context) { + return plan; + } + + private LogicalPlan transformToGbyInner(LogicalPlan plan, SemiToInnerContext context) { + return plan; + } Review Comment: add TODO ########## fe/fe-core/src/main/java/org/apache/doris/nereids/util/PlanTypeUtils.java: ########## @@ -0,0 +1,151 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.util; + +import org.apache.doris.nereids.trees.plans.logical.LogicalAggregate; +import org.apache.doris.nereids.trees.plans.logical.LogicalFilter; +import org.apache.doris.nereids.trees.plans.logical.LogicalHaving; +import org.apache.doris.nereids.trees.plans.logical.LogicalJoin; +import org.apache.doris.nereids.trees.plans.logical.LogicalLimit; +import org.apache.doris.nereids.trees.plans.logical.LogicalPlan; +import org.apache.doris.nereids.trees.plans.logical.LogicalProject; +import org.apache.doris.nereids.trees.plans.logical.LogicalRelation; +import org.apache.doris.nereids.trees.plans.logical.LogicalSetOperation; +import org.apache.doris.nereids.trees.plans.logical.LogicalSort; +import org.apache.doris.nereids.trees.plans.logical.LogicalSubQueryAlias; +import org.apache.doris.nereids.trees.plans.logical.LogicalWindow; + +import com.google.common.collect.ImmutableSet; +import com.google.common.collect.Lists; + +import java.util.List; +import java.util.Set; + +/** + * Judgment for plan tree pattern, such as spj plan, etc. + */ +public class PlanTypeUtils { + + private static final Set<Class<? extends LogicalPlan>> SPJ_PLAN = ImmutableSet.of( + LogicalRelation.class, + LogicalJoin.class, + LogicalFilter.class, + LogicalProject.class, + LogicalSubQueryAlias.class // FIXME + ); + + private static final Set<Class<? extends LogicalPlan>> SUPPORTED_PLAN = ImmutableSet.of( + // TODO: Set related ops + LogicalRelation.class, + LogicalJoin.class, + LogicalFilter.class, + LogicalProject.class, + LogicalWindow.class, + LogicalAggregate.class, + LogicalHaving.class, + LogicalSort.class, + LogicalLimit.class, + LogicalSubQueryAlias.class + ); + + public static boolean isSpj(LogicalPlan rootPlan) { + List<LogicalPlan> plans = Lists.newArrayList(); + plans.addAll(rootPlan.collect(LogicalPlan.class::isInstance)); + return isSpj(plans); + } + + public static boolean isSpj(List<LogicalPlan> plans) { Review Comment: add an allMatch function to TreeNode to avoid collect all plan objects into a list ########## fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/SubqueryCombine.java: ########## @@ -0,0 +1,253 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.rules.analysis; + +import org.apache.doris.catalog.KeysType; +import org.apache.doris.nereids.jobs.JobContext; +import org.apache.doris.nereids.trees.expressions.CaseWhen; +import org.apache.doris.nereids.trees.expressions.EqualTo; +import org.apache.doris.nereids.trees.expressions.Exists; +import org.apache.doris.nereids.trees.expressions.Expression; +import org.apache.doris.nereids.trees.expressions.SubqueryExpr; +import org.apache.doris.nereids.trees.expressions.WhenClause; +import org.apache.doris.nereids.trees.expressions.functions.agg.Sum; +import org.apache.doris.nereids.trees.expressions.literal.BigIntLiteral; +import org.apache.doris.nereids.trees.expressions.literal.TinyIntLiteral; +import org.apache.doris.nereids.trees.expressions.visitor.SubqueryContainmentChecker; +import org.apache.doris.nereids.trees.plans.Plan; +import org.apache.doris.nereids.trees.plans.logical.LogicalFilter; +import org.apache.doris.nereids.trees.plans.logical.LogicalHaving; +import org.apache.doris.nereids.trees.plans.logical.LogicalOlapScan; +import org.apache.doris.nereids.trees.plans.logical.LogicalPlan; +import org.apache.doris.nereids.trees.plans.logical.LogicalRelation; +import org.apache.doris.nereids.trees.plans.visitor.CustomRewriter; +import org.apache.doris.nereids.trees.plans.visitor.DefaultPlanRewriter; +import org.apache.doris.nereids.util.ExpressionUtils; +import org.apache.doris.nereids.util.Utils; + +import com.google.common.collect.ImmutableSet; +import com.google.common.collect.Lists; + +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; + +/** + * Combine subQueries having containment relationship. + * Only support exists-notexists currently, the rest types will + * be supported in the future. + * Will put this rule into cost based transformation framework. + */ +public class SubqueryCombine extends DefaultPlanRewriter<JobContext> implements CustomRewriter { + + /** + * Types of subquery combinations. + */ + public enum CombinePattern { + EXISTS_NOTEXISTS_COMBINE, + EXISTS_EXISTS_COMBINE, + NOTEXISTS_NOTEXISTS_COMBINE, + UNKNOWN + } + + @Override + public Plan rewriteRoot(Plan plan, JobContext jobContext) { + boolean preCheck = doPreCheck((LogicalPlan) plan); + return preCheck ? plan.accept(this, jobContext) : plan; + } + + @Override + public Plan visitLogicalFilter(LogicalFilter<? extends Plan> filter, JobContext context) { + CombinePattern pattern = getCombinePattern(filter); + LogicalFilter newFilter = filter; + switch (pattern) { + case EXISTS_NOTEXISTS_COMBINE: + Set<Expression> newConjuncts = new HashSet<>(); + Set<Expression> otherConjuncts = new HashSet<>(); + SubqueryExpr existsExpr = null; + SubqueryExpr notExistsExpr = null; + SubqueryContainmentChecker checker = new SubqueryContainmentChecker(); + for (Expression expression : filter.getConjuncts()) { + if (isExists(expression)) { + existsExpr = (SubqueryExpr) expression; + } else if (isNotExists(expression)) { + notExistsExpr = (SubqueryExpr) expression; + } else { + otherConjuncts.add(expression); + } + } + boolean isValidForCombine = checkValidForSuqueryCombine(existsExpr, notExistsExpr, pattern, checker); + if (isValidForCombine) { + Exists newExists = doSubqueryCombine(existsExpr, checker); + newConjuncts.addAll(otherConjuncts); + newConjuncts.add(newExists); + newFilter = new LogicalFilter<>(newConjuncts, filter.child()); + } + break; + case EXISTS_EXISTS_COMBINE: + case NOTEXISTS_NOTEXISTS_COMBINE: + // TODO: support exists-exists and exists-notexists pattern + break; + case UNKNOWN: + default: + break; + } + return newFilter; + } + + private boolean doPreCheck(LogicalPlan plan) { + // ALL pre-checking can be put here + return checkAllTablesUnderUKModel(plan); + } + + private CombinePattern getCombinePattern(LogicalFilter filter) { + if (checkMatchExistNotExists(filter.getConjuncts())) { + return CombinePattern.EXISTS_NOTEXISTS_COMBINE; + } else if (checkMatchExistsExists(filter.getConjuncts())) { + return CombinePattern.EXISTS_EXISTS_COMBINE; + } else if (checkMatchNotExistsNotExists(filter.getConjuncts())) { + return CombinePattern.NOTEXISTS_NOTEXISTS_COMBINE; + } else { + return CombinePattern.UNKNOWN; + } + } + + /** + * Check all plan accessed tables are all in unique key model. + */ + private boolean checkAllTablesUnderUKModel(LogicalPlan plan) { + List<LogicalPlan> plans = Lists.newArrayList(); + plans.addAll(plan.collect(LogicalPlan.class::isInstance)); + List<LogicalRelation> tables = plans.stream().filter(LogicalRelation.class::isInstance) + .map(LogicalRelation.class::cast) + .collect(Collectors.toList()); + return tables.stream().filter(LogicalOlapScan.class::isInstance) + .allMatch(f -> ((LogicalOlapScan) f).getTable().getKeysType() == KeysType.UNIQUE_KEYS + && ((LogicalOlapScan) f).getTable().getKeysNum() != 0); + } + + private boolean checkValidForSuqueryCombine(SubqueryExpr existsExpr, SubqueryExpr notExistsExpr, + CombinePattern pattern, SubqueryContainmentChecker checker) { + if (pattern == CombinePattern.EXISTS_NOTEXISTS_COMBINE) { + // spj checking + boolean existIsSpj = existsExpr.isSpj(); + boolean notExistIsSpj = notExistsExpr.isSpj(); + if (!existIsSpj || !notExistIsSpj) { + return false; + } + Set<Expression> existsConjuncts = existsExpr.getSpjPredicate(); + Set<Expression> notExistsConjuncts = notExistsExpr.getSpjPredicate(); + if (existsConjuncts.isEmpty() || notExistsConjuncts.isEmpty()) { + return false; + } + // check correlated filter to make sure semi-inner transformation goes inner-gby pattern + Map<Boolean, List<Expression>> existsSplit = Utils.splitCorrelatedConjuncts( + existsConjuncts, existsExpr.getCorrelateExpressions()); + Map<Boolean, List<Expression>> notExistsSplit = Utils.splitCorrelatedConjuncts( + notExistsConjuncts, notExistsExpr.getCorrelateExpressions()); + if (!existsExpr.getCorrelateSlots().equals(notExistsExpr.getCorrelateSlots())) { + return false; + } + List<Expression> existsCorrelatedPredicate = existsSplit.get(true); + List<Expression> notExistsCorrelatedPredicate = notExistsSplit.get(true); + boolean existsConNonEqual = checkContainNonEqualCondition(existsCorrelatedPredicate); + boolean notExistsConNonEqual = checkContainNonEqualCondition(notExistsCorrelatedPredicate); + if (!existsConNonEqual || !notExistsConNonEqual) { + return false; + } else { + return checker.check(existsExpr, notExistsExpr, true); + } + } else if (pattern == CombinePattern.EXISTS_EXISTS_COMBINE) { + return false; + } else if (pattern == CombinePattern.NOTEXISTS_NOTEXISTS_COMBINE) { + return false; + } else { + return false; + } + } + + private boolean checkContainNonEqualCondition(List<Expression> predicateList) { + boolean containNonEqualCondition = false; + for (Expression expr : predicateList) { + if (!(expr instanceof EqualTo)) { + containNonEqualCondition = true; + break; + } + } + return containNonEqualCondition; + } + + private Expression getExceptConditions(SubqueryContainmentChecker checker) { + // column has been replaced based on the column-mapping + List<Expression> exceptConditions = checker.getExceptConditions(); + return ExpressionUtils.and(exceptConditions); + } + + private Expression buildCaseWhenExpr(Expression filter) { + List<WhenClause> whenClauses = new ArrayList<>(); + WhenClause whenClause = new WhenClause(filter, new TinyIntLiteral((byte) 1)); + Expression defaultOperand = new TinyIntLiteral((byte) 0); + whenClauses.add(whenClause); + return new CaseWhen(whenClauses, defaultOperand); + } + + private Exists doSubqueryCombine(SubqueryExpr existsExpr, SubqueryContainmentChecker checker) { + Expression exceptFilters = getExceptConditions(checker); + // build having (sum(case when except_filters then 1 else 0 end) = 0) + Expression caseWhen = buildCaseWhenExpr(exceptFilters); + Expression sumEqualsZero = new EqualTo(new Sum(caseWhen), new BigIntLiteral(0)); + LogicalPlan oldPlan = existsExpr.getQueryPlan(); + LogicalHaving havingPlan = new LogicalHaving(ImmutableSet.of(sumEqualsZero), oldPlan); + return new Exists(havingPlan, existsExpr.getCorrelateSlots(), existsExpr.getTypeCoercionExpr(), + ExpressionUtils.optionalAnd(sumEqualsZero), false); + } + + private boolean isExists(Expression expr) { + return expr instanceof Exists && !((Exists) expr).isNot(); + } + + private boolean isNotExists(Expression expr) { + return expr instanceof Exists && ((Exists) expr).isNot(); + } + + private boolean checkMatchExistNotExists(Set<Expression> conjuncts) { + Set<Exists> existsSet = new HashSet<>(); + Set<Exists> notExistsSet = new HashSet<>(); + for (Expression expr : conjuncts) { + if (isExists(expr)) { + existsSet.add((Exists) expr); + } else if (isNotExists(expr)) { + notExistsSet.add((Exists) expr); + } + } + return existsSet.size() == 1 && notExistsSet.size() == 1; + } + + private boolean checkMatchExistsExists(Set<Expression> conjuncts) { + return false; Review Comment: add TODO ########## fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/logical/SemiToInner.java: ########## @@ -0,0 +1,250 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.rules.rewrite.logical; + +import org.apache.doris.catalog.Column; +import org.apache.doris.catalog.KeysType; +import org.apache.doris.catalog.OlapTable; +import org.apache.doris.nereids.exceptions.TransformException; +import org.apache.doris.nereids.jobs.JobContext; +import org.apache.doris.nereids.rules.rewrite.logical.SemiToInner.SemiToInnerContext; +import org.apache.doris.nereids.trees.expressions.Alias; +import org.apache.doris.nereids.trees.expressions.EqualTo; +import org.apache.doris.nereids.trees.expressions.Expression; +import org.apache.doris.nereids.trees.expressions.Slot; +import org.apache.doris.nereids.trees.expressions.literal.BigIntLiteral; +import org.apache.doris.nereids.trees.plans.JoinHint; +import org.apache.doris.nereids.trees.plans.JoinType; +import org.apache.doris.nereids.trees.plans.Plan; +import org.apache.doris.nereids.trees.plans.logical.LogicalAggregate; +import org.apache.doris.nereids.trees.plans.logical.LogicalFilter; +import org.apache.doris.nereids.trees.plans.logical.LogicalJoin; +import org.apache.doris.nereids.trees.plans.logical.LogicalOlapScan; +import org.apache.doris.nereids.trees.plans.logical.LogicalPlan; +import org.apache.doris.nereids.trees.plans.logical.LogicalProject; +import org.apache.doris.nereids.trees.plans.visitor.CustomRewriter; +import org.apache.doris.nereids.trees.plans.visitor.DefaultPlanRewriter; + +import com.google.common.collect.ImmutableList; + +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Optional; +import java.util.Set; + +/** + * Transform semi join to inner join. + * TODO: only support semi -> inner + gby pattern currently + * Will support pure inner and gby + inner patterns. + * Will put into cost based transformation framework. + */ +public class SemiToInner extends DefaultPlanRewriter<SemiToInnerContext> implements CustomRewriter { + + /** + * Types of semi join to inner join pattern. + */ + public enum TransformPattern { + INNER, + GBY_INNER, + INNER_GBY, + UNKNOWN + } + + @Override + public Plan rewriteRoot(Plan plan, JobContext jobContext) { + return plan.accept(this, new SemiToInnerContext(false, TransformPattern.UNKNOWN, + new HashSet<>(), new ArrayList<>(), + Optional.empty())); + } + + @Override + public Plan visit(Plan plan, SemiToInnerContext context) { + plan = super.visit(plan, context); + if (context.visited) { + if (context.pattern == TransformPattern.INNER_GBY) { + return transformToInnerGby((LogicalPlan) plan, context); + } else if (context.pattern == TransformPattern.INNER) { + return transformToInner((LogicalPlan) plan, context); + } else if (context.pattern == TransformPattern.GBY_INNER) { + return transformToGbyInner((LogicalPlan) plan, context); + } else { + return plan; + } + } else { + return plan; + } + } + + @Override + public Plan visitLogicalJoin(LogicalJoin join, SemiToInnerContext context) { + if (join.getJoinType() == JoinType.LEFT_SEMI_JOIN) { + context.pattern = getTransformPattern(join); + if (context.pattern == TransformPattern.INNER_GBY) { + context.visited = true; + context.subQueryCombinedHavingExpr = join.getSubQueryCombinedHavingExpr(); + context.groupByExpressions = getAllUniqueColumnsInJoin((LogicalPlan) join.left()); + return new LogicalJoin<>(JoinType.INNER_JOIN, join.getHashJoinConjuncts(), + join.getOtherJoinConjuncts(), + JoinHint.NONE, + join.getMarkJoinSlotReference(), + Optional.empty(), + (LogicalPlan) join.left(), (LogicalPlan) join.right()); + } else { + return join; + } + } else { + return join; + } + } + + /** + * Semi join to inner transformation context + */ + public static class SemiToInnerContext { + boolean visited; + TransformPattern pattern; + Set<Slot> outerDependantSlots; + List<Expression> groupByExpressions; + Optional<Expression> subQueryCombinedHavingExpr; + + public SemiToInnerContext(boolean visited, + TransformPattern pattern, + Set<Slot> outerDependantSlots, + List<Expression> groupByExpressions, + Optional<Expression> subQueryCombinedHavingExpr) { + this.visited = visited; + this.pattern = pattern; + this.outerDependantSlots = outerDependantSlots; + this.groupByExpressions = groupByExpressions; + this.subQueryCombinedHavingExpr = subQueryCombinedHavingExpr; + } + } + + private TransformPattern getTransformPattern(LogicalJoin join) { + // only support inner gby pattern currently. + if (join.getSubQueryCombinedHavingExpr().isPresent()) { + return TransformPattern.INNER_GBY; + } else { + return TransformPattern.UNKNOWN; + } + } + + private LogicalPlan transformToInnerGby(LogicalPlan plan, SemiToInnerContext context) { + if (!(plan instanceof LogicalProject || plan instanceof LogicalFilter)) { + LogicalPlan topOperator = plan; + LogicalPlan childOperator = (LogicalPlan) topOperator.child(0); + context.outerDependantSlots = topOperator.getInputSlots(); + + LogicalProject project; + if (childOperator instanceof LogicalProject) { + // merge project has been applied + project = (LogicalProject) childOperator; + LogicalFilter filter = (LogicalFilter) project.child(0); + project = new LogicalProject(filter.getOutput(), filter); + } else { + LogicalFilter filter = (LogicalFilter) childOperator; + project = new LogicalProject(filter.getOutput(), filter); + } + // build aggr on new project + Set<Expression> combinedGbyExprs = new HashSet<>(); + Set<Expression> outputExprsAfterGby = new HashSet<>(); + // group by item exprs + combinedGbyExprs.addAll(context.groupByExpressions); + combinedGbyExprs.addAll(context.outerDependantSlots); + + // build (sum(...) as alias) expr + Expression aggrExprFromHavingExpr = extractAggrExprFromHavingExpr(context.subQueryCombinedHavingExpr.get()); + Alias aggrExprAlias = new Alias(aggrExprFromHavingExpr, aggrExprFromHavingExpr.toSql()); + // group by output use alias expr + outputExprsAfterGby.addAll(combinedGbyExprs); + outputExprsAfterGby.add(aggrExprAlias); + + Set<Expression> havingExprAliasList = new HashSet<>(); + // having filter use alias = 0 + EqualTo havingExpr = new EqualTo(aggrExprAlias.toSlot(), new BigIntLiteral(0)); + havingExprAliasList.add(havingExpr); Review Comment: ```suggestion // having filter use alias = 0 EqualTo havingExpr = new EqualTo(aggrExprAlias.toSlot(), new BigIntLiteral(0)); Set<Expression> havingExprAliasList = ImmutableSet.of(havingExpr); ``` ########## fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/SubqueryContainmentChecker.java: ########## @@ -0,0 +1,209 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.trees.expressions.visitor; + +import org.apache.doris.nereids.trees.expressions.EqualTo; +import org.apache.doris.nereids.trees.expressions.Expression; +import org.apache.doris.nereids.trees.expressions.SlotReference; +import org.apache.doris.nereids.trees.expressions.SubqueryExpr; +import org.apache.doris.nereids.trees.expressions.literal.TinyIntLiteral; +import org.apache.doris.nereids.trees.plans.JoinType; +import org.apache.doris.nereids.trees.plans.Plan; +import org.apache.doris.nereids.trees.plans.logical.LogicalFilter; +import org.apache.doris.nereids.trees.plans.logical.LogicalJoin; +import org.apache.doris.nereids.trees.plans.logical.LogicalPlan; +import org.apache.doris.nereids.trees.plans.logical.LogicalProject; +import org.apache.doris.nereids.trees.plans.logical.LogicalRelation; +import org.apache.doris.nereids.util.ExpressionUtils; +import org.apache.doris.nereids.util.PlanTypeUtils; +import org.apache.doris.nereids.util.PlanUtils; + +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; +import com.google.common.collect.Sets; + +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; + +/** + * Judgment for the containment between subqueries. + * TODO: enhance the plan type support. + */ +public class SubqueryContainmentChecker { + + private final List<LogicalPlan> selfPlans = Lists.newArrayList(); + private final List<LogicalPlan> otherPlans = Lists.newArrayList(); + private final List<Expression> exceptConditions = Lists.newArrayList(); + private LogicalPlan selfTopLogicalPlan = null; + private LogicalPlan otherTopLogicalPlan = null; + private boolean isForSpj = false; + private Map<Expression, Expression> otherToSelfSlotMap = Maps.newHashMap(); + + /** + * Check whether self is contained by other subquery. + */ + public boolean check(SubqueryExpr self, SubqueryExpr other, boolean isForSpj) { + init(self, other, isForSpj); + if (!(selfTopLogicalPlan instanceof LogicalProject) || !(otherTopLogicalPlan instanceof LogicalProject)) { + return false; + } else if (isForSpj) { + return checkPlanType() + && checkFromList() + && checkJoinType() + && checkConditions(); + } else { + return checkPlanType() + && checkFromList() + && checkJoinType() + && checkConditions() + && checkGroupBy() + && checkHaving() + && checkWindowFunctions() + && checkSelectItems() + && checkDistinct() + && checkLimit(); + } + } + + private void init(SubqueryExpr self, SubqueryExpr other, boolean isForSpj) { + this.selfPlans.addAll(self.getQueryPlan().collect(LogicalPlan.class::isInstance)); + this.otherPlans.addAll(other.getQueryPlan().collect(LogicalPlan.class::isInstance)); + this.selfTopLogicalPlan = self.getQueryPlan(); + this.otherTopLogicalPlan = other.getQueryPlan(); + this.isForSpj = isForSpj; + } + + /** + * Return except conditions if containment checking succeed. + */ + public List<Expression> getExceptConditions() { + return exceptConditions; + } + + private boolean checkPlanType() { + return this.isForSpj ? PlanTypeUtils.isSpj(selfPlans) && PlanTypeUtils.isSpj(otherPlans) : + PlanTypeUtils.isSupportedPlan(selfPlans) && PlanTypeUtils.isSupportedPlan(otherPlans); + } + + private boolean checkFromList() { + // TODO: only basic table, exclude subquery + List<LogicalRelation> selfTables = selfPlans.stream().filter(LogicalRelation.class::isInstance) + .map(LogicalRelation.class::cast) + .collect(Collectors.toList()); + List<LogicalRelation> otherTables = otherPlans.stream().filter(LogicalRelation.class::isInstance) + .map(LogicalRelation.class::cast) + .collect(Collectors.toList()); + List<Long> selfIds = selfTables.stream().map(node -> node.getTable().getId()).collect(Collectors.toList()); + List<Long> otherIds = otherTables.stream().map(node -> node.getTable().getId()).collect(Collectors.toList()); + if (Sets.newHashSet(selfIds).size() != selfIds.size() || Sets.newHashSet(otherIds).size() != otherIds.size()) { + return false; + } else if (selfIds.size() != otherIds.size()) { + return false; + } else { + otherToSelfSlotMap = PlanUtils.createSlotMapping(otherTables, selfTables); + return selfIds.stream().allMatch(e -> otherIds.contains(e)); + } + } + + private boolean checkJoinType() { + // TODO: support other join type + return selfPlans.stream() + .filter(LogicalJoin.class::isInstance) + .map(p -> (LogicalJoin<Plan, Plan>) p) + .allMatch(e -> (e.getJoinType() == JoinType.INNER_JOIN && e.getOnClauseCondition().isPresent())) + && otherPlans.stream() + .filter(LogicalJoin.class::isInstance) + .map(p -> (LogicalJoin<Plan, Plan>) p) + .allMatch(e -> (e.getJoinType() == JoinType.INNER_JOIN && e.getOnClauseCondition().isPresent())); + } + + private boolean excludeSpecialFilter(LogicalFilter filter) { + // exclude __DORIS_DELETE_SIGN__ == 0/1 predicate + return !(filter.getConjuncts().size() == 1 + && filter.getConjuncts().iterator().next() instanceof EqualTo + && ((EqualTo) filter.getConjuncts().iterator().next()).child(0).getDataType().isTinyIntType() + && ((TinyIntLiteral) ((EqualTo) filter.getConjuncts().iterator().next()).child(0)).getValue() == 0 + && ((EqualTo) filter.getConjuncts().iterator().next()).child(1).isSlot() + && ((SlotReference) ((EqualTo) filter.getConjuncts().iterator().next()).child(1)).getName() + .equals("__DORIS_DELETE_SIGN__")); Review Comment: we could rewrite `__DORIS_DELETE_SIGN__ = 0/1` in sql explicitly, and all unique table has this column, to why need to exclude it? maybe move add `__DORIS_DELETE_SIGN__ = 0/1` rule after this rule is better? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org