This is an automated email from the ASF dual-hosted git repository.

joemcdonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit b195b12cab1821f3ab657c37a2ee3a8afcd5bf98
Author: Steve Carlin <[email protected]>
AuthorDate: Wed Oct 16 09:35:40 2024 -0700

    IMPALA-13455: Put expressions in CNF form for performance.
    
    The optimizer now runs a rule to put expressions in conjunctive
    normal form.
    
    This commit will allow tpcds and tpch tests to run through without
    hanging, specifically queries 13 and 48.
    
    Change-Id: Iceca22f3b2d2b59ab21591f21c07650bbd8efb3c
    Reviewed-on: http://gerrit.cloudera.org:8080/21938
    Reviewed-by: Michael Smith <[email protected]>
    Tested-by: Michael Smith <[email protected]>
---
 .../impala/calcite/rules/ConvertToCNFRules.java    | 137 +++++++++++++++++++++
 .../impala/calcite/service/CalciteOptimizer.java   |  25 +++-
 2 files changed, 161 insertions(+), 1 deletion(-)

diff --git 
a/java/calcite-planner/src/main/java/org/apache/impala/calcite/rules/ConvertToCNFRules.java
 
b/java/calcite-planner/src/main/java/org/apache/impala/calcite/rules/ConvertToCNFRules.java
new file mode 100644
index 000000000..5d9582ed3
--- /dev/null
+++ 
b/java/calcite-planner/src/main/java/org/apache/impala/calcite/rules/ConvertToCNFRules.java
@@ -0,0 +1,137 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.impala.calcite.rules;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.ImmutableList;
+import org.apache.calcite.plan.RelOptRule;
+import org.apache.calcite.plan.RelOptRuleCall;
+import org.apache.calcite.rel.logical.LogicalFilter;
+import org.apache.calcite.rel.logical.LogicalJoin;
+import org.apache.calcite.rel.logical.LogicalProject;
+import org.apache.calcite.rex.RexBuilder;
+import org.apache.calcite.rex.RexCall;
+import org.apache.calcite.rex.RexLiteral;
+import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.rex.RexShuttle;
+import org.apache.calcite.rex.RexUtil;
+
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+
+/**
+ * Rules to convert RexNode expressions into conjunctive normal form. These
+ * are required rules for tpcds queries. Without these rules, the performance 
for
+ * tpcds slows down drastically.
+ */
+public class ConvertToCNFRules {
+
+  public static class FilterConvertToCNFRule extends RelOptRule {
+    public FilterConvertToCNFRule() {
+      super(operand(LogicalFilter.class, none()));
+    }
+
+    @Override
+    public void onMatch(RelOptRuleCall call) {
+      final LogicalFilter filter = call.rel(0);
+
+      RexBuilder rexBuilder = filter.getCluster().getRexBuilder();
+      RexNode condition = filter.getCondition();
+      RexNode newCondition = toCnf(rexBuilder, condition);
+      if (!newCondition.equals(condition)) {
+        LogicalFilter newFilter = LogicalFilter.create(filter.getInput(0), 
newCondition);
+        call.transformTo(newFilter);
+      }
+    }
+  }
+
+  public static class ProjectConvertToCNFRule extends RelOptRule {
+    public ProjectConvertToCNFRule() {
+        super(operand(LogicalProject.class, none()));
+    }
+
+    @Override
+    public void onMatch(RelOptRuleCall call) {
+      final LogicalProject project = call.rel(0);
+
+      List<RexNode> newProjects = new ArrayList<>();
+      boolean projectChanged = false;
+      RexBuilder rexBuilder = project.getCluster().getRexBuilder();
+      for (RexNode rexNode : project.getProjects()) {
+        RexNode newProject = toCnf(rexBuilder, rexNode);
+        newProjects.add(newProject);
+        projectChanged |= (!newProject.equals(rexNode));
+      }
+      if (projectChanged) {
+        LogicalProject newProject = LogicalProject.create(project.getInput(0),
+             project.getHints(), newProjects,
+             project.getRowType().getFieldNames(), project.getVariablesSet());
+        call.transformTo(newProject);
+      }
+    }
+  }
+
+  public static class JoinConvertToCNFRule extends RelOptRule {
+
+    public JoinConvertToCNFRule() {
+        super(operand(LogicalJoin.class, none()));
+    }
+
+    @Override
+    public void onMatch(RelOptRuleCall call) {
+      final LogicalJoin join = call.rel(0);
+      RexBuilder rexBuilder = join.getCluster().getRexBuilder();
+      RexNode condition = join.getCondition();
+      RexNode newCondition = toCnf(rexBuilder, condition);
+      if (!newCondition.equals(condition)) {
+        LogicalJoin newJoin = LogicalJoin.create(join.getInput(0), 
join.getInput(1),
+            join.getHints(), newCondition, new HashSet<>(), join.getJoinType(),
+            join.isSemiJoinDone(), 
ImmutableList.copyOf(join.getSystemFieldList()));
+        call.transformTo(newJoin);
+      }
+    }
+  }
+
+  private static class ConvertToCNFShuttle extends RexShuttle {
+    private final RexBuilder rexBuilder;
+    public boolean fixedOperator = false;
+
+    public ConvertToCNFShuttle (RexBuilder rexBuilder) {
+      this.rexBuilder = rexBuilder;
+    }
+
+    @Override
+    public RexNode visitCall(RexCall call) {
+      RexCall cnfCall = (RexCall) RexUtil.toCnf(rexBuilder, call);
+      return super.visitCall(cnfCall);
+    }
+  }
+
+  private static RexNode toCnf(RexBuilder rexBuilder, RexNode rexNode) {
+    if (rexNode instanceof RexLiteral) {
+      return rexNode;
+    }
+    RexNode expandedCondition = RexUtil.expandSearch(rexBuilder, null, 
rexNode);
+    RexNode newCondition = RexUtil.pullFactors(rexBuilder, expandedCondition);
+    // TODO: IMPALA-13436: use max_cnf_exprs query option instead of hardcoded 
100.
+    // The default for max_cnf_exprs is 200, but we use 100 here because tpcds
+    // q41 is super slow when the value is at 200.
+    return RexUtil.toCnf(rexBuilder, 100, newCondition);
+  }
+}
diff --git 
a/java/calcite-planner/src/main/java/org/apache/impala/calcite/service/CalciteOptimizer.java
 
b/java/calcite-planner/src/main/java/org/apache/impala/calcite/service/CalciteOptimizer.java
index 60f4d89fe..fe0f74407 100644
--- 
a/java/calcite-planner/src/main/java/org/apache/impala/calcite/service/CalciteOptimizer.java
+++ 
b/java/calcite-planner/src/main/java/org/apache/impala/calcite/service/CalciteOptimizer.java
@@ -33,6 +33,7 @@ import org.apache.calcite.tools.RelBuilder;
 import org.apache.impala.calcite.coercenodes.CoerceNodes;
 import org.apache.impala.calcite.rel.node.ConvertToImpalaRelRules;
 import org.apache.impala.calcite.rel.node.ImpalaPlanRel;
+import org.apache.impala.calcite.rules.ConvertToCNFRules;
 import org.apache.impala.common.ImpalaException;
 
 import org.slf4j.Logger;
@@ -57,7 +58,12 @@ public class CalciteOptimizer implements CompilerStep {
     RelBuilder relBuilder = 
RelFactories.LOGICAL_BUILDER.create(logPlan.getCluster(),
         validator_.getCatalogReader());
 
-    RelNode optimizedPlan = runJoinProgram(relBuilder, logPlan);
+    // Run some essential rules needed to create working RelNodes before doing
+    // optimization
+    RelNode expandedNodesPlan = runExpandNodesProgram(relBuilder, logPlan);
+
+    // Run essential Join Node rules
+    RelNode optimizedPlan = runJoinProgram(relBuilder, expandedNodesPlan);
 
     ImpalaPlanRel finalOptimizedPlan =
         runImpalaConvertProgram(relBuilder, optimizedPlan);
@@ -65,6 +71,23 @@ public class CalciteOptimizer implements CompilerStep {
     return finalOptimizedPlan;
   }
 
+  public RelNode runExpandNodesProgram(RelBuilder relBuilder,
+      RelNode plan) throws ImpalaException {
+
+    HepProgramBuilder builder = new HepProgramBuilder();
+
+    builder.addMatchOrder(HepMatchOrder.BOTTOM_UP);
+    builder.addRuleCollection(ImmutableList.of(
+        new ConvertToCNFRules.FilterConvertToCNFRule(),
+        new ConvertToCNFRules.JoinConvertToCNFRule(),
+        new ConvertToCNFRules.ProjectConvertToCNFRule()
+        ));
+
+    builder.addMatchOrder(HepMatchOrder.BOTTOM_UP);
+
+    return runProgram(plan, builder.build());
+  }
+
   /**
    * Run the rules specifically for join ordering.
    *

Reply via email to