This is an automated email from the ASF dual-hosted git repository. stigahuang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit 919d08a84f217d1d33391d1cd9e34065465d4312 Author: ttttttz <[email protected]> AuthorDate: Thu May 25 16:18:54 2023 +0800 IMPALA-12164: Avoid referencing non-materialized slots in analytic limit pushdown When creating single-node analytic plan, if the plan node is an EmptySetNode, its tuple ids should not be considered. Also, when registering conjuncts, if a constant FALSE conjunct is found, the other conjuncts in the same list should be marked as assigned. Tests: - Add FE and e2e regression tests Change-Id: I9e078f48863c38062e1e624a1ff3e9317092466f Reviewed-on: http://gerrit.cloudera.org:8080/19937 Reviewed-by: Quanlong Huang <[email protected]> Tested-by: Impala Public Jenkins <[email protected]> --- .../java/org/apache/impala/analysis/Analyzer.java | 32 +- .../org/apache/impala/planner/AnalyticPlanner.java | 9 +- .../limit-pushdown-partitioned-top-n.test | 331 +++++++++++++++++++++ .../queries/QueryTest/partitioned-top-n.test | 162 ++++++++++ 4 files changed, 526 insertions(+), 8 deletions(-) diff --git a/fe/src/main/java/org/apache/impala/analysis/Analyzer.java b/fe/src/main/java/org/apache/impala/analysis/Analyzer.java index edcb9c003..ef4dd77cc 100644 --- a/fe/src/main/java/org/apache/impala/analysis/Analyzer.java +++ b/fe/src/main/java/org/apache/impala/analysis/Analyzer.java @@ -1865,6 +1865,7 @@ public class Analyzer { globalState_.conjunctsByOjClause.put(rhsRef.getId(), ojConjuncts); } } + boolean foundConstantFalse = false; for (Expr conjunct: conjuncts) { conjunct.setIsOnClauseConjunct(true); registerConjunct(conjunct); @@ -1878,7 +1879,16 @@ public class Analyzer { if (rhsRef.getJoinOp().isInnerJoin()) { globalState_.ijClauseByConjunct.put(conjunct.getId(), rhsRef); } - markConstantConjunct(conjunct, false); + if (markConstantConjunct(conjunct, false)) { + foundConstantFalse = true; + } + } + // If a constant FALSE conjunct is found, we don't need to evaluate the other + // conjuncts in the same list. By marking the conjunct as assigned, the + // getUnassignedConjuncts() method will not return it, which means we won't + // consider it anymore. + if (foundConstantFalse) { + markConjunctsAssigned(conjuncts); } } @@ -1888,9 +1898,18 @@ public class Analyzer { */ public void registerConjuncts(Expr e, boolean fromHavingClause) throws AnalysisException { - for (Expr conjunct: e.getConjuncts()) { + boolean foundConstantFalse = false; + for (Expr conjunct : e.getConjuncts()) { registerConjunct(conjunct); - markConstantConjunct(conjunct, fromHavingClause); + foundConstantFalse = markConstantConjunct(conjunct, fromHavingClause); + if (foundConstantFalse) break; + } + // If a constant FALSE conjunct is found, we don't need to evaluate the other + // conjuncts in the same list. By marking the conjunct as assigned, the + // getUnassignedConjuncts() method will not return it, which means we won't + // consider it anymore. + if (foundConstantFalse) { + markConjunctsAssigned(e.getConjuncts()); } } @@ -1900,11 +1919,12 @@ public class Analyzer { * block as having an empty result set or as having an empty select-project-join * portion, if fromHavingClause is true or false, respectively. * No-op if the conjunct is not constant or is outer joined. + * Return true, if conjunct is constant FALSE. * Throws an AnalysisException if there is an error evaluating `conjunct` */ - private void markConstantConjunct(Expr conjunct, boolean fromHavingClause) + private boolean markConstantConjunct(Expr conjunct, boolean fromHavingClause) throws AnalysisException { - if (!conjunct.isConstant() || isOjConjunct(conjunct)) return; + if (!conjunct.isConstant() || isOjConjunct(conjunct)) return false; markConjunctAssigned(conjunct); if ((!fromHavingClause && !hasEmptySpjResultSet_) || (fromHavingClause && !hasEmptyResultSet_)) { @@ -1925,11 +1945,13 @@ public class Analyzer { } else { hasEmptySpjResultSet_ = true; } + return true; } } catch (InternalException ex) { throw new AnalysisException("Error evaluating \"" + conjunct.toSql() + "\"", ex); } } + return false; } /** diff --git a/fe/src/main/java/org/apache/impala/planner/AnalyticPlanner.java b/fe/src/main/java/org/apache/impala/planner/AnalyticPlanner.java index 77e0b8c7a..739f02330 100644 --- a/fe/src/main/java/org/apache/impala/planner/AnalyticPlanner.java +++ b/fe/src/main/java/org/apache/impala/planner/AnalyticPlanner.java @@ -102,10 +102,13 @@ public class AnalyticPlanner { */ public PlanNode createSingleNodePlan(PlanNode root, List<Expr> groupingExprs, List<Expr> inputPartitionExprs) throws ImpalaException { - // Identify predicates that reference the logical analytic tuple (this logical - // analytic tuple is replaced by different physical ones during planning) + // If the plan node is not an EmptySetNode, identify predicates that reference the + // logical analytic tuple (this logical analytic tuple is replaced by different + // physical ones during planning) List<TupleId> tids = new ArrayList<>(); - tids.addAll(root.getTupleIds()); + if (!(root instanceof EmptySetNode)) { + tids.addAll(root.getTupleIds()); + } tids.add(analyticInfo_.getOutputTupleId()); List<Expr> analyticConjs = analyzer_.getUnassignedConjuncts(tids); if (LOG.isTraceEnabled()) { diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/limit-pushdown-partitioned-top-n.test b/testdata/workloads/functional-planner/queries/PlannerTest/limit-pushdown-partitioned-top-n.test index 3b9c65263..5cd4123f8 100644 --- a/testdata/workloads/functional-planner/queries/PlannerTest/limit-pushdown-partitioned-top-n.test +++ b/testdata/workloads/functional-planner/queries/PlannerTest/limit-pushdown-partitioned-top-n.test @@ -923,3 +923,334 @@ PLAN-ROOT SINK HDFS partitions=11/11 files=11 size=814.73KB row-size=4B cardinality=11.00K ==== +# Regression test for IMPALA-12164. Test non-materialized slots +# with BinaryPredicate. +select id, + RANK() OVER(ORDER BY id DESC) AS rank_id + from (SELECT id + FROM functional.alltypesagg + WHERE int_col = 0 and false) alias_0 + order by id +---- PLAN +PLAN-ROOT SINK +| +03:SORT +| order by: id ASC +| row-size=12B cardinality=0 +| +02:ANALYTIC +| functions: rank() +| order by: id DESC +| window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW +| row-size=12B cardinality=0 +| +01:SORT +| order by: id DESC +| row-size=4B cardinality=0 +| +00:EMPTYSET +==== +# Test non-materialized slots with BinaryPredicate and INNER-JOIN. +select id, + RANK() OVER(ORDER BY id DESC) AS rank_id + from (SELECT tbl_0.id + FROM functional.alltypesagg tbl_0 + JOIN functional.alltypesagg tbl_1 + ON tbl_0.id = tbl_1.id + WHERE tbl_0.int_col =0 and false) alias_0 + order by id +---- PLAN +PLAN-ROOT SINK +| +03:SORT +| order by: id ASC +| row-size=12B cardinality=0 +| +02:ANALYTIC +| functions: rank() +| order by: id DESC +| window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW +| row-size=12B cardinality=0 +| +01:SORT +| order by: id DESC +| row-size=4B cardinality=0 +| +00:EMPTYSET +==== +# Test non-materialized slots with BinaryPredicate and LEFT-JOIN. +select id, + RANK() OVER(ORDER BY id DESC) AS rank_id + from (SELECT tbl_0.id + FROM functional.alltypesagg tbl_0 + LEFT JOIN functional.alltypesagg tbl_1 + ON tbl_0.id = tbl_1.id + WHERE tbl_0.int_col =0 and false) alias_0 + order by id +---- PLAN +PLAN-ROOT SINK +| +03:SORT +| order by: id ASC +| row-size=12B cardinality=0 +| +02:ANALYTIC +| functions: rank() +| order by: id DESC +| window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW +| row-size=12B cardinality=0 +| +01:SORT +| order by: id DESC +| row-size=4B cardinality=0 +| +00:EMPTYSET +==== +# Test non-materialized slots with BinaryPredicate and FULL-JOIN. +select id, + RANK() OVER(ORDER BY id DESC) AS rank_id + from (SELECT tbl_0.id + FROM functional.alltypesagg tbl_0 + FULL JOIN functional.alltypesagg tbl_1 + ON tbl_0.id = tbl_1.id + WHERE tbl_0.int_col =0 and false) alias_0 + order by id +---- PLAN +PLAN-ROOT SINK +| +03:SORT +| order by: id ASC +| row-size=12B cardinality=0 +| +02:ANALYTIC +| functions: rank() +| order by: id DESC +| window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW +| row-size=12B cardinality=0 +| +01:SORT +| order by: id DESC +| row-size=4B cardinality=0 +| +00:EMPTYSET +==== +# Test non-materialized slots with BetweenPredicate. +select id, + RANK() OVER(ORDER BY id DESC) AS rank_id + from (SELECT id + FROM functional.alltypesagg + WHERE int_col between 0 and 10 and false) alias_0 + order by id +---- PLAN +PLAN-ROOT SINK +| +03:SORT +| order by: id ASC +| row-size=12B cardinality=0 +| +02:ANALYTIC +| functions: rank() +| order by: id DESC +| window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW +| row-size=12B cardinality=0 +| +01:SORT +| order by: id DESC +| row-size=4B cardinality=0 +| +00:EMPTYSET +==== +# Test non-materialized slots with BetweenPredicate and INNER-JOIN. +select id, + RANK() OVER(ORDER BY id DESC) AS rank_id + from (SELECT tbl_0.id + FROM functional.alltypesagg tbl_0 + JOIN functional.alltypesagg tbl_1 + ON tbl_0.id = tbl_1.id + WHERE tbl_0.int_col between 0 and 10 and false) alias_0 + order by id +---- PLAN +PLAN-ROOT SINK +| +03:SORT +| order by: id ASC +| row-size=12B cardinality=0 +| +02:ANALYTIC +| functions: rank() +| order by: id DESC +| window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW +| row-size=12B cardinality=0 +| +01:SORT +| order by: id DESC +| row-size=4B cardinality=0 +| +00:EMPTYSET +==== +# Test non-materialized slots with BetweenPredicate and LEFT-JOIN. +select id, + RANK() OVER(ORDER BY id DESC) AS rank_id + from (SELECT tbl_0.id + FROM functional.alltypesagg tbl_0 + LEFT JOIN functional.alltypesagg tbl_1 + ON tbl_0.id = tbl_1.id + WHERE tbl_0.int_col between 0 and 10 and false) alias_0 + order by id +---- PLAN +PLAN-ROOT SINK +| +03:SORT +| order by: id ASC +| row-size=12B cardinality=0 +| +02:ANALYTIC +| functions: rank() +| order by: id DESC +| window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW +| row-size=12B cardinality=0 +| +01:SORT +| order by: id DESC +| row-size=4B cardinality=0 +| +00:EMPTYSET +==== +# Test non-materialized slots with BetweenPredicate and FULL-JOIN. +select id, + RANK() OVER(ORDER BY id DESC) AS rank_id + from (SELECT tbl_0.id + FROM functional.alltypesagg tbl_0 + FULL JOIN functional.alltypesagg tbl_1 + ON tbl_0.id = tbl_1.id + WHERE tbl_0.int_col between 0 and 10 and false) alias_0 + order by id +---- PLAN +PLAN-ROOT SINK +| +03:SORT +| order by: id ASC +| row-size=12B cardinality=0 +| +02:ANALYTIC +| functions: rank() +| order by: id DESC +| window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW +| row-size=12B cardinality=0 +| +01:SORT +| order by: id DESC +| row-size=4B cardinality=0 +| +00:EMPTYSET +==== +# Test non-materialized slots with InPredicate. +select id, + RANK() OVER(ORDER BY id DESC) AS rank_id + from (SELECT id + FROM functional.alltypesagg + WHERE int_col in(1, 2, 3) and false) alias_0 + order by id +---- PLAN +PLAN-ROOT SINK +| +03:SORT +| order by: id ASC +| row-size=12B cardinality=0 +| +02:ANALYTIC +| functions: rank() +| order by: id DESC +| window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW +| row-size=12B cardinality=0 +| +01:SORT +| order by: id DESC +| row-size=4B cardinality=0 +| +00:EMPTYSET +==== +# Test non-materialized slots with InPredicate and INNER-JOIN. +select id, + RANK() OVER(ORDER BY id DESC) AS rank_id + from (SELECT tbl_0.id + FROM functional.alltypesagg tbl_0 + JOIN functional.alltypesagg tbl_1 + ON tbl_0.id = tbl_1.id + WHERE tbl_0.int_col in(1, 2, 3) and false) alias_0 + order by id +---- PLAN +PLAN-ROOT SINK +| +03:SORT +| order by: id ASC +| row-size=12B cardinality=0 +| +02:ANALYTIC +| functions: rank() +| order by: id DESC +| window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW +| row-size=12B cardinality=0 +| +01:SORT +| order by: id DESC +| row-size=4B cardinality=0 +| +00:EMPTYSET +==== +# Test non-materialized slots with InPredicate and LEFT-JOIN. +select id, + RANK() OVER(ORDER BY id DESC) AS rank_id + from (SELECT tbl_0.id + FROM functional.alltypesagg tbl_0 + LEFT JOIN functional.alltypesagg tbl_1 + ON tbl_0.id = tbl_1.id + WHERE tbl_0.int_col in(1, 2, 3) and false) alias_0 + order by id +---- PLAN +PLAN-ROOT SINK +| +03:SORT +| order by: id ASC +| row-size=12B cardinality=0 +| +02:ANALYTIC +| functions: rank() +| order by: id DESC +| window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW +| row-size=12B cardinality=0 +| +01:SORT +| order by: id DESC +| row-size=4B cardinality=0 +| +00:EMPTYSET +==== +# Test non-materialized slots with InPredicate and FULL-JOIN. +select id, + RANK() OVER(ORDER BY id DESC) AS rank_id + from (SELECT tbl_0.id + FROM functional.alltypesagg tbl_0 + FULL JOIN functional.alltypesagg tbl_1 + ON tbl_0.id = tbl_1.id + WHERE tbl_0.int_col in(1, 2, 3) and false) alias_0 + order by id +---- PLAN +PLAN-ROOT SINK +| +03:SORT +| order by: id ASC +| row-size=12B cardinality=0 +| +02:ANALYTIC +| functions: rank() +| order by: id DESC +| window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW +| row-size=12B cardinality=0 +| +01:SORT +| order by: id DESC +| row-size=4B cardinality=0 +| +00:EMPTYSET +==== \ No newline at end of file diff --git a/testdata/workloads/functional-query/queries/QueryTest/partitioned-top-n.test b/testdata/workloads/functional-query/queries/QueryTest/partitioned-top-n.test index 538d289f8..08a161d37 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/partitioned-top-n.test +++ b/testdata/workloads/functional-query/queries/QueryTest/partitioned-top-n.test @@ -132,3 +132,165 @@ where id = max_id and rn < 10 ---- TYPES INT,BIGINT ==== +---- QUERY +# IMPALA-12164: test non-materialized slots with BinaryPredicate +select id, + RANK() OVER(ORDER BY id DESC) AS rank_id + from (SELECT id + FROM functional.alltypesagg + WHERE int_col = 0 and false) alias_0 + order by id +---- RESULTS +---- TYPES +INT,BIGINT +==== +---- QUERY +# IMPALA-12164: test non-materialized slots with BinaryPredicate and INNER-JOIN +select id, + RANK() OVER(ORDER BY id DESC) AS rank_id + from (SELECT tbl_0.id + FROM functional.alltypesagg tbl_0 + JOIN functional.alltypesagg tbl_1 + ON tbl_0.id = tbl_1.id + WHERE tbl_0.int_col =0 and false) alias_0 + order by id +---- RESULTS +---- TYPES +INT,BIGINT +==== +---- QUERY +# IMPALA-12164: test non-materialized slots with BinaryPredicate and LEFT-JOIN +select id, + RANK() OVER(ORDER BY id DESC) AS rank_id + from (SELECT tbl_0.id + FROM functional.alltypesagg tbl_0 + LEFT JOIN functional.alltypesagg tbl_1 + ON tbl_0.id = tbl_1.id + WHERE tbl_0.int_col =0 and false) alias_0 + order by id +---- RESULTS +---- TYPES +INT,BIGINT +==== +---- QUERY +# IMPALA-12164: test non-materialized slots with BinaryPredicate and FULL-JOIN +select id, + RANK() OVER(ORDER BY id DESC) AS rank_id + from (SELECT tbl_0.id + FROM functional.alltypesagg tbl_0 + FULL JOIN functional.alltypesagg tbl_1 + ON tbl_0.id = tbl_1.id + WHERE tbl_0.int_col =0 and false) alias_0 + order by id +---- RESULTS +---- TYPES +INT,BIGINT +==== +---- QUERY +# IMPALA-12164: test non-materialized slots with BetweenPredicate +select id, + RANK() OVER(ORDER BY id DESC) AS rank_id + from (SELECT id + FROM functional.alltypesagg + WHERE int_col between 0 and 10 and false) alias_0 + order by id +---- RESULTS +---- TYPES +INT,BIGINT +==== +---- QUERY +# IMPALA-12164: test non-materialized slots with BetweenPredicate and INNER-JOIN +select id, + RANK() OVER(ORDER BY id DESC) AS rank_id + from (SELECT tbl_0.id + FROM functional.alltypesagg tbl_0 + JOIN functional.alltypesagg tbl_1 + ON tbl_0.id = tbl_1.id + WHERE tbl_0.int_col between 0 and 10 and false) alias_0 + order by id +---- RESULTS +---- TYPES +INT,BIGINT +==== +---- QUERY +# IMPALA-12164: test non-materialized slots with BetweenPredicate and LEFT-JOIN +select id, + RANK() OVER(ORDER BY id DESC) AS rank_id + from (SELECT tbl_0.id + FROM functional.alltypesagg tbl_0 + LEFT JOIN functional.alltypesagg tbl_1 + ON tbl_0.id = tbl_1.id + WHERE tbl_0.int_col between 0 and 10 and false) alias_0 + order by id +---- RESULTS +---- TYPES +INT,BIGINT +==== +---- QUERY +# IMPALA-12164: test non-materialized slots with BetweenPredicate and FULL-JOIN +select id, + RANK() OVER(ORDER BY id DESC) AS rank_id + from (SELECT tbl_0.id + FROM functional.alltypesagg tbl_0 + FULL JOIN functional.alltypesagg tbl_1 + ON tbl_0.id = tbl_1.id + WHERE tbl_0.int_col between 0 and 10 and false) alias_0 + order by id +---- RESULTS +---- TYPES +INT,BIGINT +==== +---- QUERY +# IMPALA-12164: test non-materialized slots with InPredicate +select id, + RANK() OVER(ORDER BY id DESC) AS rank_id + from (SELECT id + FROM functional.alltypesagg + WHERE int_col in(1, 2, 3) and false) alias_0 + order by id +---- RESULTS +---- TYPES +INT,BIGINT +==== +---- QUERY +# IMPALA-12164: test non-materialized slots with InPredicate and INNER-JOIN +select id, + RANK() OVER(ORDER BY id DESC) AS rank_id + from (SELECT tbl_0.id + FROM functional.alltypesagg tbl_0 + JOIN functional.alltypesagg tbl_1 + ON tbl_0.id = tbl_1.id + WHERE tbl_0.int_col in(1, 2, 3) and false) alias_0 + order by id +---- RESULTS +---- TYPES +INT,BIGINT +==== +---- QUERY +# IMPALA-12164: test non-materialized slots with InPredicate and LEFT-JOIN +select id, + RANK() OVER(ORDER BY id DESC) AS rank_id + from (SELECT tbl_0.id + FROM functional.alltypesagg tbl_0 + LEFT JOIN functional.alltypesagg tbl_1 + ON tbl_0.id = tbl_1.id + WHERE tbl_0.int_col in(1, 2, 3) and false) alias_0 + order by id +---- RESULTS +---- TYPES +INT,BIGINT +==== +---- QUERY +# IMPALA-12164: test non-materialized slots with InPredicate and FULL-JOIN +select id, + RANK() OVER(ORDER BY id DESC) AS rank_id + from (SELECT tbl_0.id + FROM functional.alltypesagg tbl_0 + FULL JOIN functional.alltypesagg tbl_1 + ON tbl_0.id = tbl_1.id + WHERE tbl_0.int_col in(1, 2, 3) and false) alias_0 + order by id +---- RESULTS +---- TYPES +INT,BIGINT +====
