This is an automated email from the ASF dual-hosted git repository. joemcdonnell pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit b195b12cab1821f3ab657c37a2ee3a8afcd5bf98 Author: Steve Carlin <[email protected]> AuthorDate: Wed Oct 16 09:35:40 2024 -0700 IMPALA-13455: Put expressions in CNF form for performance. The optimizer now runs a rule to put expressions in conjunctive normal form. This commit will allow tpcds and tpch tests to run through without hanging, specifically queries 13 and 48. Change-Id: Iceca22f3b2d2b59ab21591f21c07650bbd8efb3c Reviewed-on: http://gerrit.cloudera.org:8080/21938 Reviewed-by: Michael Smith <[email protected]> Tested-by: Michael Smith <[email protected]> --- .../impala/calcite/rules/ConvertToCNFRules.java | 137 +++++++++++++++++++++ .../impala/calcite/service/CalciteOptimizer.java | 25 +++- 2 files changed, 161 insertions(+), 1 deletion(-) diff --git a/java/calcite-planner/src/main/java/org/apache/impala/calcite/rules/ConvertToCNFRules.java b/java/calcite-planner/src/main/java/org/apache/impala/calcite/rules/ConvertToCNFRules.java new file mode 100644 index 000000000..5d9582ed3 --- /dev/null +++ b/java/calcite-planner/src/main/java/org/apache/impala/calcite/rules/ConvertToCNFRules.java @@ -0,0 +1,137 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.impala.calcite.rules; + +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableList; +import org.apache.calcite.plan.RelOptRule; +import org.apache.calcite.plan.RelOptRuleCall; +import org.apache.calcite.rel.logical.LogicalFilter; +import org.apache.calcite.rel.logical.LogicalJoin; +import org.apache.calcite.rel.logical.LogicalProject; +import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.rex.RexCall; +import org.apache.calcite.rex.RexLiteral; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.rex.RexShuttle; +import org.apache.calcite.rex.RexUtil; + +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; + +/** + * Rules to convert RexNode expressions into conjunctive normal form. These + * are required rules for tpcds queries. Without these rules, the performance for + * tpcds slows down drastically. + */ +public class ConvertToCNFRules { + + public static class FilterConvertToCNFRule extends RelOptRule { + public FilterConvertToCNFRule() { + super(operand(LogicalFilter.class, none())); + } + + @Override + public void onMatch(RelOptRuleCall call) { + final LogicalFilter filter = call.rel(0); + + RexBuilder rexBuilder = filter.getCluster().getRexBuilder(); + RexNode condition = filter.getCondition(); + RexNode newCondition = toCnf(rexBuilder, condition); + if (!newCondition.equals(condition)) { + LogicalFilter newFilter = LogicalFilter.create(filter.getInput(0), newCondition); + call.transformTo(newFilter); + } + } + } + + public static class ProjectConvertToCNFRule extends RelOptRule { + public ProjectConvertToCNFRule() { + super(operand(LogicalProject.class, none())); + } + + @Override + public void onMatch(RelOptRuleCall call) { + final LogicalProject project = call.rel(0); + + List<RexNode> newProjects = new ArrayList<>(); + boolean projectChanged = false; + RexBuilder rexBuilder = project.getCluster().getRexBuilder(); + for (RexNode rexNode : project.getProjects()) { + RexNode newProject = toCnf(rexBuilder, rexNode); + newProjects.add(newProject); + projectChanged |= (!newProject.equals(rexNode)); + } + if (projectChanged) { + LogicalProject newProject = LogicalProject.create(project.getInput(0), + project.getHints(), newProjects, + project.getRowType().getFieldNames(), project.getVariablesSet()); + call.transformTo(newProject); + } + } + } + + public static class JoinConvertToCNFRule extends RelOptRule { + + public JoinConvertToCNFRule() { + super(operand(LogicalJoin.class, none())); + } + + @Override + public void onMatch(RelOptRuleCall call) { + final LogicalJoin join = call.rel(0); + RexBuilder rexBuilder = join.getCluster().getRexBuilder(); + RexNode condition = join.getCondition(); + RexNode newCondition = toCnf(rexBuilder, condition); + if (!newCondition.equals(condition)) { + LogicalJoin newJoin = LogicalJoin.create(join.getInput(0), join.getInput(1), + join.getHints(), newCondition, new HashSet<>(), join.getJoinType(), + join.isSemiJoinDone(), ImmutableList.copyOf(join.getSystemFieldList())); + call.transformTo(newJoin); + } + } + } + + private static class ConvertToCNFShuttle extends RexShuttle { + private final RexBuilder rexBuilder; + public boolean fixedOperator = false; + + public ConvertToCNFShuttle (RexBuilder rexBuilder) { + this.rexBuilder = rexBuilder; + } + + @Override + public RexNode visitCall(RexCall call) { + RexCall cnfCall = (RexCall) RexUtil.toCnf(rexBuilder, call); + return super.visitCall(cnfCall); + } + } + + private static RexNode toCnf(RexBuilder rexBuilder, RexNode rexNode) { + if (rexNode instanceof RexLiteral) { + return rexNode; + } + RexNode expandedCondition = RexUtil.expandSearch(rexBuilder, null, rexNode); + RexNode newCondition = RexUtil.pullFactors(rexBuilder, expandedCondition); + // TODO: IMPALA-13436: use max_cnf_exprs query option instead of hardcoded 100. + // The default for max_cnf_exprs is 200, but we use 100 here because tpcds + // q41 is super slow when the value is at 200. + return RexUtil.toCnf(rexBuilder, 100, newCondition); + } +} diff --git a/java/calcite-planner/src/main/java/org/apache/impala/calcite/service/CalciteOptimizer.java b/java/calcite-planner/src/main/java/org/apache/impala/calcite/service/CalciteOptimizer.java index 60f4d89fe..fe0f74407 100644 --- a/java/calcite-planner/src/main/java/org/apache/impala/calcite/service/CalciteOptimizer.java +++ b/java/calcite-planner/src/main/java/org/apache/impala/calcite/service/CalciteOptimizer.java @@ -33,6 +33,7 @@ import org.apache.calcite.tools.RelBuilder; import org.apache.impala.calcite.coercenodes.CoerceNodes; import org.apache.impala.calcite.rel.node.ConvertToImpalaRelRules; import org.apache.impala.calcite.rel.node.ImpalaPlanRel; +import org.apache.impala.calcite.rules.ConvertToCNFRules; import org.apache.impala.common.ImpalaException; import org.slf4j.Logger; @@ -57,7 +58,12 @@ public class CalciteOptimizer implements CompilerStep { RelBuilder relBuilder = RelFactories.LOGICAL_BUILDER.create(logPlan.getCluster(), validator_.getCatalogReader()); - RelNode optimizedPlan = runJoinProgram(relBuilder, logPlan); + // Run some essential rules needed to create working RelNodes before doing + // optimization + RelNode expandedNodesPlan = runExpandNodesProgram(relBuilder, logPlan); + + // Run essential Join Node rules + RelNode optimizedPlan = runJoinProgram(relBuilder, expandedNodesPlan); ImpalaPlanRel finalOptimizedPlan = runImpalaConvertProgram(relBuilder, optimizedPlan); @@ -65,6 +71,23 @@ public class CalciteOptimizer implements CompilerStep { return finalOptimizedPlan; } + public RelNode runExpandNodesProgram(RelBuilder relBuilder, + RelNode plan) throws ImpalaException { + + HepProgramBuilder builder = new HepProgramBuilder(); + + builder.addMatchOrder(HepMatchOrder.BOTTOM_UP); + builder.addRuleCollection(ImmutableList.of( + new ConvertToCNFRules.FilterConvertToCNFRule(), + new ConvertToCNFRules.JoinConvertToCNFRule(), + new ConvertToCNFRules.ProjectConvertToCNFRule() + )); + + builder.addMatchOrder(HepMatchOrder.BOTTOM_UP); + + return runProgram(plan, builder.build()); + } + /** * Run the rules specifically for join ordering. *
