This is an automated email from the ASF dual-hosted git repository.

stigahuang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 919d08a84f217d1d33391d1cd9e34065465d4312
Author: ttttttz <[email protected]>
AuthorDate: Thu May 25 16:18:54 2023 +0800

    IMPALA-12164: Avoid referencing non-materialized slots in analytic limit 
pushdown
    
    When creating single-node analytic plan, if the plan node is an
    EmptySetNode, its tuple ids should not be considered. Also, when
    registering conjuncts, if a constant FALSE conjunct is found, the
    other conjuncts in the same list should be marked as assigned.
    
    Tests:
     - Add FE and e2e regression tests
    
    Change-Id: I9e078f48863c38062e1e624a1ff3e9317092466f
    Reviewed-on: http://gerrit.cloudera.org:8080/19937
    Reviewed-by: Quanlong Huang <[email protected]>
    Tested-by: Impala Public Jenkins <[email protected]>
---
 .../java/org/apache/impala/analysis/Analyzer.java  |  32 +-
 .../org/apache/impala/planner/AnalyticPlanner.java |   9 +-
 .../limit-pushdown-partitioned-top-n.test          | 331 +++++++++++++++++++++
 .../queries/QueryTest/partitioned-top-n.test       | 162 ++++++++++
 4 files changed, 526 insertions(+), 8 deletions(-)

diff --git a/fe/src/main/java/org/apache/impala/analysis/Analyzer.java 
b/fe/src/main/java/org/apache/impala/analysis/Analyzer.java
index edcb9c003..ef4dd77cc 100644
--- a/fe/src/main/java/org/apache/impala/analysis/Analyzer.java
+++ b/fe/src/main/java/org/apache/impala/analysis/Analyzer.java
@@ -1865,6 +1865,7 @@ public class Analyzer {
         globalState_.conjunctsByOjClause.put(rhsRef.getId(), ojConjuncts);
       }
     }
+    boolean foundConstantFalse = false;
     for (Expr conjunct: conjuncts) {
       conjunct.setIsOnClauseConjunct(true);
       registerConjunct(conjunct);
@@ -1878,7 +1879,16 @@ public class Analyzer {
       if (rhsRef.getJoinOp().isInnerJoin()) {
         globalState_.ijClauseByConjunct.put(conjunct.getId(), rhsRef);
       }
-      markConstantConjunct(conjunct, false);
+      if (markConstantConjunct(conjunct, false)) {
+        foundConstantFalse = true;
+      }
+    }
+    // If a constant FALSE conjunct is found, we don't need to evaluate the 
other
+    // conjuncts in the same list. By marking the conjunct as assigned, the
+    // getUnassignedConjuncts() method will not return it, which means we won't
+    // consider it anymore.
+    if (foundConstantFalse) {
+      markConjunctsAssigned(conjuncts);
     }
   }
 
@@ -1888,9 +1898,18 @@ public class Analyzer {
    */
   public void registerConjuncts(Expr e, boolean fromHavingClause)
       throws AnalysisException {
-    for (Expr conjunct: e.getConjuncts()) {
+    boolean foundConstantFalse = false;
+    for (Expr conjunct : e.getConjuncts()) {
       registerConjunct(conjunct);
-      markConstantConjunct(conjunct, fromHavingClause);
+      foundConstantFalse = markConstantConjunct(conjunct, fromHavingClause);
+      if (foundConstantFalse) break;
+    }
+    // If a constant FALSE conjunct is found, we don't need to evaluate the 
other
+    // conjuncts in the same list. By marking the conjunct as assigned, the
+    // getUnassignedConjuncts() method will not return it, which means we won't
+    // consider it anymore.
+    if (foundConstantFalse) {
+      markConjunctsAssigned(e.getConjuncts());
     }
   }
 
@@ -1900,11 +1919,12 @@ public class Analyzer {
    * block as having an empty result set or as having an empty 
select-project-join
    * portion, if fromHavingClause is true or false, respectively.
    * No-op if the conjunct is not constant or is outer joined.
+   * Return true, if conjunct is constant FALSE.
    * Throws an AnalysisException if there is an error evaluating `conjunct`
    */
-  private void markConstantConjunct(Expr conjunct, boolean fromHavingClause)
+  private boolean markConstantConjunct(Expr conjunct, boolean fromHavingClause)
       throws AnalysisException {
-    if (!conjunct.isConstant() || isOjConjunct(conjunct)) return;
+    if (!conjunct.isConstant() || isOjConjunct(conjunct)) return false;
     markConjunctAssigned(conjunct);
     if ((!fromHavingClause && !hasEmptySpjResultSet_)
         || (fromHavingClause && !hasEmptyResultSet_)) {
@@ -1925,11 +1945,13 @@ public class Analyzer {
           } else {
             hasEmptySpjResultSet_ = true;
           }
+          return true;
         }
       } catch (InternalException ex) {
         throw new AnalysisException("Error evaluating \"" + conjunct.toSql() + 
"\"", ex);
       }
     }
+    return false;
   }
 
   /**
diff --git a/fe/src/main/java/org/apache/impala/planner/AnalyticPlanner.java 
b/fe/src/main/java/org/apache/impala/planner/AnalyticPlanner.java
index 77e0b8c7a..739f02330 100644
--- a/fe/src/main/java/org/apache/impala/planner/AnalyticPlanner.java
+++ b/fe/src/main/java/org/apache/impala/planner/AnalyticPlanner.java
@@ -102,10 +102,13 @@ public class AnalyticPlanner {
    */
   public PlanNode createSingleNodePlan(PlanNode root,
       List<Expr> groupingExprs, List<Expr> inputPartitionExprs) throws 
ImpalaException {
-    // Identify predicates that reference the logical analytic tuple (this 
logical
-    // analytic tuple is replaced by different physical ones during planning)
+    // If the plan node is not an EmptySetNode, identify predicates that 
reference the
+    // logical analytic tuple (this logical analytic tuple is replaced by 
different
+    // physical ones during planning)
     List<TupleId> tids = new ArrayList<>();
-    tids.addAll(root.getTupleIds());
+    if (!(root instanceof EmptySetNode)) {
+      tids.addAll(root.getTupleIds());
+    }
     tids.add(analyticInfo_.getOutputTupleId());
     List<Expr> analyticConjs = analyzer_.getUnassignedConjuncts(tids);
     if (LOG.isTraceEnabled()) {
diff --git 
a/testdata/workloads/functional-planner/queries/PlannerTest/limit-pushdown-partitioned-top-n.test
 
b/testdata/workloads/functional-planner/queries/PlannerTest/limit-pushdown-partitioned-top-n.test
index 3b9c65263..5cd4123f8 100644
--- 
a/testdata/workloads/functional-planner/queries/PlannerTest/limit-pushdown-partitioned-top-n.test
+++ 
b/testdata/workloads/functional-planner/queries/PlannerTest/limit-pushdown-partitioned-top-n.test
@@ -923,3 +923,334 @@ PLAN-ROOT SINK
    HDFS partitions=11/11 files=11 size=814.73KB
    row-size=4B cardinality=11.00K
 ====
+# Regression test for IMPALA-12164. Test non-materialized slots
+# with BinaryPredicate.
+select id,
+       RANK() OVER(ORDER BY id DESC) AS rank_id
+  from (SELECT id
+          FROM functional.alltypesagg
+        WHERE int_col = 0 and false) alias_0
+  order by id
+---- PLAN
+PLAN-ROOT SINK
+|
+03:SORT
+|  order by: id ASC
+|  row-size=12B cardinality=0
+|
+02:ANALYTIC
+|  functions: rank()
+|  order by: id DESC
+|  window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
+|  row-size=12B cardinality=0
+|
+01:SORT
+|  order by: id DESC
+|  row-size=4B cardinality=0
+|
+00:EMPTYSET
+====
+# Test non-materialized slots with BinaryPredicate and INNER-JOIN.
+select id,
+       RANK() OVER(ORDER BY id DESC) AS rank_id
+  from (SELECT tbl_0.id
+          FROM functional.alltypesagg tbl_0
+          JOIN functional.alltypesagg tbl_1
+          ON tbl_0.id = tbl_1.id
+        WHERE tbl_0.int_col =0 and false) alias_0
+  order by id
+---- PLAN
+PLAN-ROOT SINK
+|
+03:SORT
+|  order by: id ASC
+|  row-size=12B cardinality=0
+|
+02:ANALYTIC
+|  functions: rank()
+|  order by: id DESC
+|  window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
+|  row-size=12B cardinality=0
+|
+01:SORT
+|  order by: id DESC
+|  row-size=4B cardinality=0
+|
+00:EMPTYSET
+====
+# Test non-materialized slots with BinaryPredicate and LEFT-JOIN.
+select id,
+       RANK() OVER(ORDER BY id DESC) AS rank_id
+  from (SELECT tbl_0.id
+          FROM functional.alltypesagg tbl_0
+          LEFT JOIN functional.alltypesagg tbl_1
+          ON tbl_0.id = tbl_1.id
+        WHERE tbl_0.int_col =0 and false) alias_0
+  order by id
+---- PLAN
+PLAN-ROOT SINK
+|
+03:SORT
+|  order by: id ASC
+|  row-size=12B cardinality=0
+|
+02:ANALYTIC
+|  functions: rank()
+|  order by: id DESC
+|  window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
+|  row-size=12B cardinality=0
+|
+01:SORT
+|  order by: id DESC
+|  row-size=4B cardinality=0
+|
+00:EMPTYSET
+====
+# Test non-materialized slots with BinaryPredicate and FULL-JOIN.
+select id,
+       RANK() OVER(ORDER BY id DESC) AS rank_id
+  from (SELECT tbl_0.id
+          FROM functional.alltypesagg tbl_0
+          FULL JOIN functional.alltypesagg tbl_1
+          ON tbl_0.id = tbl_1.id
+        WHERE tbl_0.int_col =0 and false) alias_0
+  order by id
+---- PLAN
+PLAN-ROOT SINK
+|
+03:SORT
+|  order by: id ASC
+|  row-size=12B cardinality=0
+|
+02:ANALYTIC
+|  functions: rank()
+|  order by: id DESC
+|  window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
+|  row-size=12B cardinality=0
+|
+01:SORT
+|  order by: id DESC
+|  row-size=4B cardinality=0
+|
+00:EMPTYSET
+====
+# Test non-materialized slots with BetweenPredicate.
+select id,
+       RANK() OVER(ORDER BY id DESC) AS rank_id
+  from (SELECT id
+          FROM functional.alltypesagg
+        WHERE int_col between 0 and 10 and false) alias_0
+  order by id
+---- PLAN
+PLAN-ROOT SINK
+|
+03:SORT
+|  order by: id ASC
+|  row-size=12B cardinality=0
+|
+02:ANALYTIC
+|  functions: rank()
+|  order by: id DESC
+|  window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
+|  row-size=12B cardinality=0
+|
+01:SORT
+|  order by: id DESC
+|  row-size=4B cardinality=0
+|
+00:EMPTYSET
+====
+# Test non-materialized slots with BetweenPredicate and INNER-JOIN.
+select id,
+       RANK() OVER(ORDER BY id DESC) AS rank_id
+  from (SELECT tbl_0.id
+          FROM functional.alltypesagg tbl_0
+          JOIN functional.alltypesagg tbl_1
+          ON tbl_0.id = tbl_1.id
+        WHERE tbl_0.int_col between 0 and 10 and false) alias_0
+  order by id
+---- PLAN
+PLAN-ROOT SINK
+|
+03:SORT
+|  order by: id ASC
+|  row-size=12B cardinality=0
+|
+02:ANALYTIC
+|  functions: rank()
+|  order by: id DESC
+|  window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
+|  row-size=12B cardinality=0
+|
+01:SORT
+|  order by: id DESC
+|  row-size=4B cardinality=0
+|
+00:EMPTYSET
+====
+# Test non-materialized slots with BetweenPredicate and LEFT-JOIN.
+select id,
+       RANK() OVER(ORDER BY id DESC) AS rank_id
+  from (SELECT tbl_0.id
+          FROM functional.alltypesagg tbl_0
+          LEFT JOIN functional.alltypesagg tbl_1
+          ON tbl_0.id = tbl_1.id
+        WHERE tbl_0.int_col between 0 and 10 and false) alias_0
+  order by id
+---- PLAN
+PLAN-ROOT SINK
+|
+03:SORT
+|  order by: id ASC
+|  row-size=12B cardinality=0
+|
+02:ANALYTIC
+|  functions: rank()
+|  order by: id DESC
+|  window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
+|  row-size=12B cardinality=0
+|
+01:SORT
+|  order by: id DESC
+|  row-size=4B cardinality=0
+|
+00:EMPTYSET
+====
+# Test non-materialized slots with BetweenPredicate and FULL-JOIN.
+select id,
+       RANK() OVER(ORDER BY id DESC) AS rank_id
+  from (SELECT tbl_0.id
+          FROM functional.alltypesagg tbl_0
+          FULL JOIN functional.alltypesagg tbl_1
+          ON tbl_0.id = tbl_1.id
+        WHERE tbl_0.int_col between 0 and 10 and false) alias_0
+  order by id
+---- PLAN
+PLAN-ROOT SINK
+|
+03:SORT
+|  order by: id ASC
+|  row-size=12B cardinality=0
+|
+02:ANALYTIC
+|  functions: rank()
+|  order by: id DESC
+|  window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
+|  row-size=12B cardinality=0
+|
+01:SORT
+|  order by: id DESC
+|  row-size=4B cardinality=0
+|
+00:EMPTYSET
+====
+# Test non-materialized slots with InPredicate.
+select id,
+       RANK() OVER(ORDER BY id DESC) AS rank_id
+  from (SELECT id
+          FROM functional.alltypesagg
+        WHERE int_col in(1, 2, 3) and false) alias_0
+  order by id
+---- PLAN
+PLAN-ROOT SINK
+|
+03:SORT
+|  order by: id ASC
+|  row-size=12B cardinality=0
+|
+02:ANALYTIC
+|  functions: rank()
+|  order by: id DESC
+|  window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
+|  row-size=12B cardinality=0
+|
+01:SORT
+|  order by: id DESC
+|  row-size=4B cardinality=0
+|
+00:EMPTYSET
+====
+# Test non-materialized slots with InPredicate and INNER-JOIN.
+select id,
+       RANK() OVER(ORDER BY id DESC) AS rank_id
+  from (SELECT tbl_0.id
+          FROM functional.alltypesagg tbl_0
+          JOIN functional.alltypesagg tbl_1
+          ON tbl_0.id = tbl_1.id
+        WHERE tbl_0.int_col in(1, 2, 3) and false) alias_0
+  order by id
+---- PLAN
+PLAN-ROOT SINK
+|
+03:SORT
+|  order by: id ASC
+|  row-size=12B cardinality=0
+|
+02:ANALYTIC
+|  functions: rank()
+|  order by: id DESC
+|  window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
+|  row-size=12B cardinality=0
+|
+01:SORT
+|  order by: id DESC
+|  row-size=4B cardinality=0
+|
+00:EMPTYSET
+====
+# Test non-materialized slots with InPredicate and LEFT-JOIN.
+select id,
+       RANK() OVER(ORDER BY id DESC) AS rank_id
+  from (SELECT tbl_0.id
+          FROM functional.alltypesagg tbl_0
+          LEFT JOIN functional.alltypesagg tbl_1
+          ON tbl_0.id = tbl_1.id
+        WHERE tbl_0.int_col in(1, 2, 3) and false) alias_0
+  order by id
+---- PLAN
+PLAN-ROOT SINK
+|
+03:SORT
+|  order by: id ASC
+|  row-size=12B cardinality=0
+|
+02:ANALYTIC
+|  functions: rank()
+|  order by: id DESC
+|  window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
+|  row-size=12B cardinality=0
+|
+01:SORT
+|  order by: id DESC
+|  row-size=4B cardinality=0
+|
+00:EMPTYSET
+====
+# Test non-materialized slots with InPredicate and FULL-JOIN.
+select id,
+       RANK() OVER(ORDER BY id DESC) AS rank_id
+  from (SELECT tbl_0.id
+          FROM functional.alltypesagg tbl_0
+          FULL JOIN functional.alltypesagg tbl_1
+          ON tbl_0.id = tbl_1.id
+        WHERE tbl_0.int_col in(1, 2, 3) and false) alias_0
+  order by id
+---- PLAN
+PLAN-ROOT SINK
+|
+03:SORT
+|  order by: id ASC
+|  row-size=12B cardinality=0
+|
+02:ANALYTIC
+|  functions: rank()
+|  order by: id DESC
+|  window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
+|  row-size=12B cardinality=0
+|
+01:SORT
+|  order by: id DESC
+|  row-size=4B cardinality=0
+|
+00:EMPTYSET
+====
\ No newline at end of file
diff --git 
a/testdata/workloads/functional-query/queries/QueryTest/partitioned-top-n.test 
b/testdata/workloads/functional-query/queries/QueryTest/partitioned-top-n.test
index 538d289f8..08a161d37 100644
--- 
a/testdata/workloads/functional-query/queries/QueryTest/partitioned-top-n.test
+++ 
b/testdata/workloads/functional-query/queries/QueryTest/partitioned-top-n.test
@@ -132,3 +132,165 @@ where id = max_id and rn < 10
 ---- TYPES
 INT,BIGINT
 ====
+---- QUERY
+# IMPALA-12164: test non-materialized slots with BinaryPredicate
+select id,
+       RANK() OVER(ORDER BY id DESC) AS rank_id
+  from (SELECT id
+          FROM functional.alltypesagg
+        WHERE int_col = 0 and false) alias_0
+  order by id
+---- RESULTS
+---- TYPES
+INT,BIGINT
+====
+---- QUERY
+# IMPALA-12164: test non-materialized slots with BinaryPredicate and INNER-JOIN
+select id,
+       RANK() OVER(ORDER BY id DESC) AS rank_id
+  from (SELECT tbl_0.id
+          FROM functional.alltypesagg tbl_0
+          JOIN functional.alltypesagg tbl_1
+          ON tbl_0.id = tbl_1.id
+        WHERE tbl_0.int_col =0 and false) alias_0
+  order by id
+---- RESULTS
+---- TYPES
+INT,BIGINT
+====
+---- QUERY
+# IMPALA-12164: test non-materialized slots with BinaryPredicate and LEFT-JOIN
+select id,
+       RANK() OVER(ORDER BY id DESC) AS rank_id
+  from (SELECT tbl_0.id
+          FROM functional.alltypesagg tbl_0
+          LEFT JOIN functional.alltypesagg tbl_1
+          ON tbl_0.id = tbl_1.id
+        WHERE tbl_0.int_col =0 and false) alias_0
+  order by id
+---- RESULTS
+---- TYPES
+INT,BIGINT
+====
+---- QUERY
+# IMPALA-12164: test non-materialized slots with BinaryPredicate and FULL-JOIN
+select id,
+       RANK() OVER(ORDER BY id DESC) AS rank_id
+  from (SELECT tbl_0.id
+          FROM functional.alltypesagg tbl_0
+          FULL JOIN functional.alltypesagg tbl_1
+          ON tbl_0.id = tbl_1.id
+        WHERE tbl_0.int_col =0 and false) alias_0
+  order by id
+---- RESULTS
+---- TYPES
+INT,BIGINT
+====
+---- QUERY
+# IMPALA-12164: test non-materialized slots with BetweenPredicate
+select id,
+       RANK() OVER(ORDER BY id DESC) AS rank_id
+  from (SELECT id
+          FROM functional.alltypesagg
+        WHERE int_col between 0 and 10 and false) alias_0
+  order by id
+---- RESULTS
+---- TYPES
+INT,BIGINT
+====
+---- QUERY
+# IMPALA-12164: test non-materialized slots with BetweenPredicate and 
INNER-JOIN
+select id,
+       RANK() OVER(ORDER BY id DESC) AS rank_id
+  from (SELECT tbl_0.id
+          FROM functional.alltypesagg tbl_0
+          JOIN functional.alltypesagg tbl_1
+          ON tbl_0.id = tbl_1.id
+        WHERE tbl_0.int_col between 0 and 10 and false) alias_0
+  order by id
+---- RESULTS
+---- TYPES
+INT,BIGINT
+====
+---- QUERY
+# IMPALA-12164: test non-materialized slots with BetweenPredicate and LEFT-JOIN
+select id,
+       RANK() OVER(ORDER BY id DESC) AS rank_id
+  from (SELECT tbl_0.id
+          FROM functional.alltypesagg tbl_0
+          LEFT JOIN functional.alltypesagg tbl_1
+          ON tbl_0.id = tbl_1.id
+        WHERE tbl_0.int_col between 0 and 10 and false) alias_0
+  order by id
+---- RESULTS
+---- TYPES
+INT,BIGINT
+====
+---- QUERY
+# IMPALA-12164: test non-materialized slots with BetweenPredicate and FULL-JOIN
+select id,
+       RANK() OVER(ORDER BY id DESC) AS rank_id
+  from (SELECT tbl_0.id
+          FROM functional.alltypesagg tbl_0
+          FULL JOIN functional.alltypesagg tbl_1
+          ON tbl_0.id = tbl_1.id
+        WHERE tbl_0.int_col between 0 and 10 and false) alias_0
+  order by id
+---- RESULTS
+---- TYPES
+INT,BIGINT
+====
+---- QUERY
+# IMPALA-12164: test non-materialized slots with InPredicate
+select id,
+       RANK() OVER(ORDER BY id DESC) AS rank_id
+  from (SELECT id
+          FROM functional.alltypesagg
+        WHERE int_col in(1, 2, 3) and false) alias_0
+  order by id
+---- RESULTS
+---- TYPES
+INT,BIGINT
+====
+---- QUERY
+# IMPALA-12164: test non-materialized slots with InPredicate and INNER-JOIN
+select id,
+       RANK() OVER(ORDER BY id DESC) AS rank_id
+  from (SELECT tbl_0.id
+          FROM functional.alltypesagg tbl_0
+          JOIN functional.alltypesagg tbl_1
+          ON tbl_0.id = tbl_1.id
+        WHERE tbl_0.int_col in(1, 2, 3) and false) alias_0
+  order by id
+---- RESULTS
+---- TYPES
+INT,BIGINT
+====
+---- QUERY
+# IMPALA-12164: test non-materialized slots with InPredicate and LEFT-JOIN
+select id,
+       RANK() OVER(ORDER BY id DESC) AS rank_id
+  from (SELECT tbl_0.id
+          FROM functional.alltypesagg tbl_0
+          LEFT JOIN functional.alltypesagg tbl_1
+          ON tbl_0.id = tbl_1.id
+        WHERE tbl_0.int_col in(1, 2, 3) and false) alias_0
+  order by id
+---- RESULTS
+---- TYPES
+INT,BIGINT
+====
+---- QUERY
+# IMPALA-12164: test non-materialized slots with InPredicate and FULL-JOIN
+select id,
+       RANK() OVER(ORDER BY id DESC) AS rank_id
+  from (SELECT tbl_0.id
+          FROM functional.alltypesagg tbl_0
+          FULL JOIN functional.alltypesagg tbl_1
+          ON tbl_0.id = tbl_1.id
+        WHERE tbl_0.int_col in(1, 2, 3) and false) alias_0
+  order by id
+---- RESULTS
+---- TYPES
+INT,BIGINT
+====

Reply via email to