This is an automated email from the ASF dual-hosted git repository.

michaelsmith pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git


The following commit(s) were added to refs/heads/master by this push:
     new da9400d63 IMPALA-13302: Restore registering all conjuncts
da9400d63 is described below

commit da9400d63ce62e37ea531e7fe6564bf4ac2e0e45
Author: Michael Smith <[email protected]>
AuthorDate: Thu Aug 15 16:03:30 2024 -0700

    IMPALA-13302: Restore registering all conjuncts
    
    Reverts an optimization in IMPALA-12164 that skipped registering
    remaining conjuncts if they were expected to be removed by a rewrite
    rule, then call markConjunctsAssigned on them.
    
    In some cases the rewrite rule is not applied in the first pass, only
    during reAnalyze. During the first pass, the Expr would have
    registerConjunct called and an ID assigned because there were no
    constant false conjuncts.
    
    During reAnalyze, there would be a constant false conjunct after
    rewrite, so the Expr would skip registerConjunct but then have
    markConjunctsAssigned with its existing ID. Later a new Expr gets the
    same ID from registerConjunct and skips markConjunctsAssigned because
    it's believed to already be assigned, skipping slot materialization.
    
    Incomplete rule rewriting is handled separately as IMPALA-13344.
    
    Logs tuple id for eqJoinConjunct and cleans up logging calls with
    parameter substitution. Also returns more slot context on Precondition.
    
    Adds ExprRewriteRules tests that previously hit the new Precondition in
    markConjunctAssigned, and rewrite PlannerTest.
    
    Change-Id: I5959a3b3e18302e00b1d37e5f50410ebdb224cb0
    Reviewed-on: http://gerrit.cloudera.org:8080/21684
    Reviewed-by: Riza Suminto <[email protected]>
    Tested-by: Impala Public Jenkins <[email protected]>
---
 .../java/org/apache/impala/analysis/Analyzer.java  |  25 +++-
 .../java/org/apache/impala/analysis/SlotRef.java   |   5 +-
 .../impala/analysis/ExprRewriteRulesTest.java      |  37 ++++++
 .../org/apache/impala/planner/PlannerTest.java     |   5 +
 .../queries/PlannerTest/rewrite.test               | 129 +++++++++++++++++++++
 5 files changed, 193 insertions(+), 8 deletions(-)

diff --git a/fe/src/main/java/org/apache/impala/analysis/Analyzer.java 
b/fe/src/main/java/org/apache/impala/analysis/Analyzer.java
index d7a5894a5..839e0a853 100644
--- a/fe/src/main/java/org/apache/impala/analysis/Analyzer.java
+++ b/fe/src/main/java/org/apache/impala/analysis/Analyzer.java
@@ -1972,8 +1972,11 @@ public class Analyzer {
     boolean foundConstantFalse = false;
     for (Expr conjunct : e.getConjuncts()) {
       registerConjunct(conjunct);
-      foundConstantFalse = markConstantConjunct(conjunct, fromHavingClause);
-      if (foundConstantFalse) break;
+      // IMPALA-13302 TODO: review whether markConstantConjunct can be skipped 
once
+      // foundConstantFalse=true.
+      if (markConstantConjunct(conjunct, fromHavingClause)) {
+        foundConstantFalse = true;
+      }
     }
     // If a constant FALSE conjunct is found, we don't need to evaluate the 
other
     // conjuncts in the same list. By marking the conjunct as assigned, the
@@ -2050,8 +2053,8 @@ public class Analyzer {
     }
 
     if (LOG.isTraceEnabled()) {
-      LOG.trace("register tuple/slotConjunct: " + 
Integer.toString(e.getId().asInt())
-          + " " + e.toSql() + " " + e.debugString());
+      LOG.trace("register tuple/slotConjunct: {} {} {}", e.getId(),
+          e.toSql(), e.debugString());
     }
 
     if (!(e instanceof BinaryPredicate)) return;
@@ -2081,7 +2084,8 @@ public class Analyzer {
           globalState_.eqJoinConjuncts.get(tupleIds.get(0)).add(e.getId());
         }
         binaryPred.setIsEqJoinConjunct(true);
-        LOG.trace("register eqJoinConjunct: " + 
Integer.toString(e.getId().asInt()));
+        LOG.trace("register eqJoinConjunct {} with tuple id {}",
+            e.getId(), tupleIds.get(0));
       }
     }
   }
@@ -3344,6 +3348,17 @@ public class Analyzer {
    * Mark predicate as assigned.
    */
   public void markConjunctAssigned(Expr conjunct) {
+    // If the supplied conjunct is not registered with globalState_, it was 
created
+    // earlier and this is re-analysis. If we're marking it now, that can 
cause conflicts
+    // with an Expr that gets assigned the same ID, usually skipping slot 
materialization.
+    // Detect that early; it usually means a new predicate was created but not 
analyzed.
+    // TODO IMPALA-13365: switch to
+    //   conjunct.equals(globalState_.conjuncts.get(conjunct.getId()))
+    // and figure out why we get conjuncts that don't satisfy that condition.
+    Preconditions.checkArgument(
+        conjunct.getId() == null || 
globalState_.conjuncts.containsKey(conjunct.getId()),
+        "conjunct is not registered in current analyzer: %s", conjunct);
+
     globalState_.assignedConjuncts.add(conjunct.getId());
     if (Predicate.isEquivalencePredicate(conjunct)) {
       BinaryPredicate binaryPred = (BinaryPredicate) conjunct;
diff --git a/fe/src/main/java/org/apache/impala/analysis/SlotRef.java 
b/fe/src/main/java/org/apache/impala/analysis/SlotRef.java
index b6dbd1fa9..f1ede3235 100644
--- a/fe/src/main/java/org/apache/impala/analysis/SlotRef.java
+++ b/fe/src/main/java/org/apache/impala/analysis/SlotRef.java
@@ -328,9 +328,8 @@ public class SlotRef extends Expr {
     msg.node_type = TExprNodeType.SLOT_REF;
     msg.slot_ref = new 
TSlotRef(serialCtx.translateSlotId(desc_.getId()).asInt());
     // we shouldn't be sending exprs over non-materialized slots
-    Preconditions.checkState(desc_.isMaterialized(), String.format(
-        "Illegal reference to non-materialized slot: tid=%s sid=%s",
-        desc_.getParent().getId(), desc_.getId()));
+    Preconditions.checkState(desc_.isMaterialized(),
+        "Illegal reference to non-materialized slot: %s", this);
     Preconditions.checkState(desc_.getByteOffset() >= 0);
     // check that the tuples associated with this slot are executable
     desc_.getParent().checkIsExecutable();
diff --git 
a/fe/src/test/java/org/apache/impala/analysis/ExprRewriteRulesTest.java 
b/fe/src/test/java/org/apache/impala/analysis/ExprRewriteRulesTest.java
index 65fccab41..4472cc61b 100644
--- a/fe/src/test/java/org/apache/impala/analysis/ExprRewriteRulesTest.java
+++ b/fe/src/test/java/org/apache/impala/analysis/ExprRewriteRulesTest.java
@@ -451,6 +451,43 @@ public class ExprRewriteRulesTest extends FrontendTestBase 
{
     RewritesOk("true AND id = 0 OR false", rules, "id = 0");
   }
 
+  /**
+   * Sets up a framework for re-analysis that triggers conjunct ID conflicts 
if rewrite
+   * rules don't analyze new predicates. It requires a union of SELECT WHERE 
clauses,
+   * with one of the clauses requiring multiple rewrite passes.
+   */
+  private String rewriteTemplate(String whereClause) {
+    return "SELECT 1 FROM functional.alltypes t WHERE t.id = 1 UNION ALL " +
+        "SELECT 1 FROM functional.alltypes t WHERE " + whereClause;
+  }
+
+  /**
+   * IMPALA-13302: Test that compound predicate rewrites - combined with 
rewrites that
+   * produce a simplifiable predicate - don't leave unanalyzed conjuncts for 
re-analysis.
+   */
+  @Test
+  public void testCompoundPredicateWithRewriteAndReanalyze() throws 
ImpalaException {
+    AnalysisContext ctx = createAnalysisCtx();
+    ctx.getQueryOptions().setEnable_expr_rewrites(true);
+
+    String[] cases = {
+      // NormalizeExprsRule should simplify to FALSE AND ...
+      rewriteTemplate("t.id = 1 AND t.id = 1 AND false"),
+      // ExtractCommonConjunctRule subset should simplify to FALSE AND ...
+      rewriteTemplate("((t.id = 1 AND false) or (t.id = 1 AND false)) AND t.id 
= 1"),
+      // ExtractCommonConjunctRule distjunct should simplify to FALSE AND ...
+      rewriteTemplate("((t.id = 1 AND false) or (t.id = 2 AND false)) AND t.id 
= 1"),
+      // BetweenToCompoundRule should simplify to FALSE AND ...
+      rewriteTemplate("(1 BETWEEN 2 AND 3) AND t.id = 1 AND t.id = 1"),
+      // SimplifyDistinctFromRule should simplify to FALSE AND ...
+      rewriteTemplate("t.id IS DISTINCT FROM t.id AND t.id = 1")
+    };
+
+    for (String sql : cases) {
+      AnalyzesOk(sql, ctx);
+    }
+  }
+
   @Test
   public void testCaseWithExpr() throws ImpalaException {
     ExprRewriteRule rule = SimplifyConditionalsRule.INSTANCE;
diff --git a/fe/src/test/java/org/apache/impala/planner/PlannerTest.java 
b/fe/src/test/java/org/apache/impala/planner/PlannerTest.java
index 014712758..8c27338a9 100644
--- a/fe/src/test/java/org/apache/impala/planner/PlannerTest.java
+++ b/fe/src/test/java/org/apache/impala/planner/PlannerTest.java
@@ -491,6 +491,11 @@ public class PlannerTest extends PlannerTestBase {
     runPlannerTestFile("setoperation-rewrite", options);
   }
 
+  @Test
+  public void testRewrite() {
+    runPlannerTestFile("rewrite");
+  }
+
   @Test
   public void testValues() {
     runPlannerTestFile("values");
diff --git 
a/testdata/workloads/functional-planner/queries/PlannerTest/rewrite.test 
b/testdata/workloads/functional-planner/queries/PlannerTest/rewrite.test
new file mode 100644
index 000000000..d2e71fa9c
--- /dev/null
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/rewrite.test
@@ -0,0 +1,129 @@
+# IMPALA-13302: ExtractCommonConjunctRule led to non-materialized slot
+WITH v AS (SELECT 1 FROM functional.alltypes t
+    WHERE ((t.id = 1 and false)
+        or (t.id = 1 and false))
+      AND t.id = 1
+      AND t.id = 1),
+  w AS (SELECT 1 FROM functional.alltypes t1 JOIN functional.alltypes t2 ON 
t1.id = t2.id)
+SELECT 1 FROM v UNION ALL SELECT 1 FROM v
+UNION ALL SELECT 1 FROM w UNION ALL SELECT 1 FROM w
+---- PLAN
+PLAN-ROOT SINK
+|
+00:UNION
+|  row-size=1B cardinality=12.25K
+|
+|--08:HASH JOIN [INNER JOIN]
+|  |  hash predicates: t1.id = t2.id
+|  |  runtime filters: RF002 <- t2.id
+|  |  row-size=8B cardinality=6.12K
+|  |
+|  |--07:SCAN HDFS [functional.alltypes t2]
+|  |     HDFS partitions=24/24 files=24 size=478.45KB
+|  |     row-size=4B cardinality=6.12K
+|  |
+|  06:SCAN HDFS [functional.alltypes t1]
+|     HDFS partitions=24/24 files=24 size=478.45KB
+|     runtime filters: RF002 -> t1.id
+|     row-size=4B cardinality=6.12K
+|
+05:HASH JOIN [INNER JOIN]
+|  hash predicates: t1.id = t2.id
+|  runtime filters: RF000 <- t2.id
+|  row-size=8B cardinality=6.12K
+|
+|--04:SCAN HDFS [functional.alltypes t2]
+|     HDFS partitions=24/24 files=24 size=478.45KB
+|     row-size=4B cardinality=6.12K
+|
+03:SCAN HDFS [functional.alltypes t1]
+   HDFS partitions=24/24 files=24 size=478.45KB
+   runtime filters: RF000 -> t1.id
+   row-size=4B cardinality=6.12K
+====
+# IMPALA-13203: NormalizeExprsRule led to non-materialized slot
+WITH v AS (SELECT 1 FROM functional.alltypes t
+    WHERE t.id = 1
+      AND t.id = 1
+      AND t.id = 1
+      AND false),
+  w AS (SELECT 1 FROM functional.alltypes t1 JOIN functional.alltypes t2 ON 
t1.id = t2.id)
+SELECT 1 FROM v UNION ALL SELECT 1 FROM v
+UNION ALL SELECT 1 FROM w UNION ALL SELECT 1 FROM w
+---- PLAN
+PLAN-ROOT SINK
+|
+00:UNION
+|  row-size=1B cardinality=12.25K
+|
+|--08:HASH JOIN [INNER JOIN]
+|  |  hash predicates: t1.id = t2.id
+|  |  runtime filters: RF002 <- t2.id
+|  |  row-size=8B cardinality=6.12K
+|  |
+|  |--07:SCAN HDFS [functional.alltypes t2]
+|  |     HDFS partitions=24/24 files=24 size=478.45KB
+|  |     row-size=4B cardinality=6.12K
+|  |
+|  06:SCAN HDFS [functional.alltypes t1]
+|     HDFS partitions=24/24 files=24 size=478.45KB
+|     runtime filters: RF002 -> t1.id
+|     row-size=4B cardinality=6.12K
+|
+05:HASH JOIN [INNER JOIN]
+|  hash predicates: t1.id = t2.id
+|  runtime filters: RF000 <- t2.id
+|  row-size=8B cardinality=6.12K
+|
+|--04:SCAN HDFS [functional.alltypes t2]
+|     HDFS partitions=24/24 files=24 size=478.45KB
+|     row-size=4B cardinality=6.12K
+|
+03:SCAN HDFS [functional.alltypes t1]
+   HDFS partitions=24/24 files=24 size=478.45KB
+   runtime filters: RF000 -> t1.id
+   row-size=4B cardinality=6.12K
+====
+# IMPALA-13203: NormalizeExprsRule led to non-materialized slot
+WITH v AS (SELECT 1 FROM functional.alltypes t
+    WHERE false
+      AND t.id = 1
+      AND t.id = 1
+      AND t.id = 1),
+  w AS (SELECT 1 FROM functional.alltypes t1 JOIN functional.alltypes t2 ON 
t1.id = t2.id)
+SELECT 1 FROM v UNION ALL SELECT 1 FROM v
+UNION ALL SELECT 1 FROM w UNION ALL SELECT 1 FROM w
+---- PLAN
+PLAN-ROOT SINK
+|
+00:UNION
+|  row-size=1B cardinality=12.25K
+|
+|--08:HASH JOIN [INNER JOIN]
+|  |  hash predicates: t1.id = t2.id
+|  |  runtime filters: RF002 <- t2.id
+|  |  row-size=8B cardinality=6.12K
+|  |
+|  |--07:SCAN HDFS [functional.alltypes t2]
+|  |     HDFS partitions=24/24 files=24 size=478.45KB
+|  |     row-size=4B cardinality=6.12K
+|  |
+|  06:SCAN HDFS [functional.alltypes t1]
+|     HDFS partitions=24/24 files=24 size=478.45KB
+|     runtime filters: RF002 -> t1.id
+|     row-size=4B cardinality=6.12K
+|
+05:HASH JOIN [INNER JOIN]
+|  hash predicates: t1.id = t2.id
+|  runtime filters: RF000 <- t2.id
+|  row-size=8B cardinality=6.12K
+|
+|--04:SCAN HDFS [functional.alltypes t2]
+|     HDFS partitions=24/24 files=24 size=478.45KB
+|     row-size=4B cardinality=6.12K
+|
+03:SCAN HDFS [functional.alltypes t1]
+   HDFS partitions=24/24 files=24 size=478.45KB
+   runtime filters: RF000 -> t1.id
+   row-size=4B cardinality=6.12K
+====

Reply via email to