This is an automated email from the ASF dual-hosted git repository.
michaelsmith pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
The following commit(s) were added to refs/heads/master by this push:
new da9400d63 IMPALA-13302: Restore registering all conjuncts
da9400d63 is described below
commit da9400d63ce62e37ea531e7fe6564bf4ac2e0e45
Author: Michael Smith <[email protected]>
AuthorDate: Thu Aug 15 16:03:30 2024 -0700
IMPALA-13302: Restore registering all conjuncts
Reverts an optimization in IMPALA-12164 that skipped registering
remaining conjuncts if they were expected to be removed by a rewrite
rule, then call markConjunctsAssigned on them.
In some cases the rewrite rule is not applied in the first pass, only
during reAnalyze. During the first pass, the Expr would have
registerConjunct called and an ID assigned because there were no
constant false conjuncts.
During reAnalyze, there would be a constant false conjunct after
rewrite, so the Expr would skip registerConjunct but then have
markConjunctsAssigned with its existing ID. Later a new Expr gets the
same ID from registerConjunct and skips markConjunctsAssigned because
it's believed to already be assigned, skipping slot materialization.
Incomplete rule rewriting is handled separately as IMPALA-13344.
Logs tuple id for eqJoinConjunct and cleans up logging calls with
parameter substitution. Also returns more slot context on Precondition.
Adds ExprRewriteRules tests that previously hit the new Precondition in
markConjunctAssigned, and rewrite PlannerTest.
Change-Id: I5959a3b3e18302e00b1d37e5f50410ebdb224cb0
Reviewed-on: http://gerrit.cloudera.org:8080/21684
Reviewed-by: Riza Suminto <[email protected]>
Tested-by: Impala Public Jenkins <[email protected]>
---
.../java/org/apache/impala/analysis/Analyzer.java | 25 +++-
.../java/org/apache/impala/analysis/SlotRef.java | 5 +-
.../impala/analysis/ExprRewriteRulesTest.java | 37 ++++++
.../org/apache/impala/planner/PlannerTest.java | 5 +
.../queries/PlannerTest/rewrite.test | 129 +++++++++++++++++++++
5 files changed, 193 insertions(+), 8 deletions(-)
diff --git a/fe/src/main/java/org/apache/impala/analysis/Analyzer.java
b/fe/src/main/java/org/apache/impala/analysis/Analyzer.java
index d7a5894a5..839e0a853 100644
--- a/fe/src/main/java/org/apache/impala/analysis/Analyzer.java
+++ b/fe/src/main/java/org/apache/impala/analysis/Analyzer.java
@@ -1972,8 +1972,11 @@ public class Analyzer {
boolean foundConstantFalse = false;
for (Expr conjunct : e.getConjuncts()) {
registerConjunct(conjunct);
- foundConstantFalse = markConstantConjunct(conjunct, fromHavingClause);
- if (foundConstantFalse) break;
+ // IMPALA-13302 TODO: review whether markConstantConjunct can be skipped
once
+ // foundConstantFalse=true.
+ if (markConstantConjunct(conjunct, fromHavingClause)) {
+ foundConstantFalse = true;
+ }
}
// If a constant FALSE conjunct is found, we don't need to evaluate the
other
// conjuncts in the same list. By marking the conjunct as assigned, the
@@ -2050,8 +2053,8 @@ public class Analyzer {
}
if (LOG.isTraceEnabled()) {
- LOG.trace("register tuple/slotConjunct: " +
Integer.toString(e.getId().asInt())
- + " " + e.toSql() + " " + e.debugString());
+ LOG.trace("register tuple/slotConjunct: {} {} {}", e.getId(),
+ e.toSql(), e.debugString());
}
if (!(e instanceof BinaryPredicate)) return;
@@ -2081,7 +2084,8 @@ public class Analyzer {
globalState_.eqJoinConjuncts.get(tupleIds.get(0)).add(e.getId());
}
binaryPred.setIsEqJoinConjunct(true);
- LOG.trace("register eqJoinConjunct: " +
Integer.toString(e.getId().asInt()));
+ LOG.trace("register eqJoinConjunct {} with tuple id {}",
+ e.getId(), tupleIds.get(0));
}
}
}
@@ -3344,6 +3348,17 @@ public class Analyzer {
* Mark predicate as assigned.
*/
public void markConjunctAssigned(Expr conjunct) {
+ // If the supplied conjunct is not registered with globalState_, it was
created
+ // earlier and this is re-analysis. If we're marking it now, that can
cause conflicts
+ // with an Expr that gets assigned the same ID, usually skipping slot
materialization.
+ // Detect that early; it usually means a new predicate was created but not
analyzed.
+ // TODO IMPALA-13365: switch to
+ // conjunct.equals(globalState_.conjuncts.get(conjunct.getId()))
+ // and figure out why we get conjuncts that don't satisfy that condition.
+ Preconditions.checkArgument(
+ conjunct.getId() == null ||
globalState_.conjuncts.containsKey(conjunct.getId()),
+ "conjunct is not registered in current analyzer: %s", conjunct);
+
globalState_.assignedConjuncts.add(conjunct.getId());
if (Predicate.isEquivalencePredicate(conjunct)) {
BinaryPredicate binaryPred = (BinaryPredicate) conjunct;
diff --git a/fe/src/main/java/org/apache/impala/analysis/SlotRef.java
b/fe/src/main/java/org/apache/impala/analysis/SlotRef.java
index b6dbd1fa9..f1ede3235 100644
--- a/fe/src/main/java/org/apache/impala/analysis/SlotRef.java
+++ b/fe/src/main/java/org/apache/impala/analysis/SlotRef.java
@@ -328,9 +328,8 @@ public class SlotRef extends Expr {
msg.node_type = TExprNodeType.SLOT_REF;
msg.slot_ref = new
TSlotRef(serialCtx.translateSlotId(desc_.getId()).asInt());
// we shouldn't be sending exprs over non-materialized slots
- Preconditions.checkState(desc_.isMaterialized(), String.format(
- "Illegal reference to non-materialized slot: tid=%s sid=%s",
- desc_.getParent().getId(), desc_.getId()));
+ Preconditions.checkState(desc_.isMaterialized(),
+ "Illegal reference to non-materialized slot: %s", this);
Preconditions.checkState(desc_.getByteOffset() >= 0);
// check that the tuples associated with this slot are executable
desc_.getParent().checkIsExecutable();
diff --git
a/fe/src/test/java/org/apache/impala/analysis/ExprRewriteRulesTest.java
b/fe/src/test/java/org/apache/impala/analysis/ExprRewriteRulesTest.java
index 65fccab41..4472cc61b 100644
--- a/fe/src/test/java/org/apache/impala/analysis/ExprRewriteRulesTest.java
+++ b/fe/src/test/java/org/apache/impala/analysis/ExprRewriteRulesTest.java
@@ -451,6 +451,43 @@ public class ExprRewriteRulesTest extends FrontendTestBase
{
RewritesOk("true AND id = 0 OR false", rules, "id = 0");
}
+ /**
+ * Sets up a framework for re-analysis that triggers conjunct ID conflicts
if rewrite
+ * rules don't analyze new predicates. It requires a union of SELECT WHERE
clauses,
+ * with one of the clauses requiring multiple rewrite passes.
+ */
+ private String rewriteTemplate(String whereClause) {
+ return "SELECT 1 FROM functional.alltypes t WHERE t.id = 1 UNION ALL " +
+ "SELECT 1 FROM functional.alltypes t WHERE " + whereClause;
+ }
+
+ /**
+ * IMPALA-13302: Test that compound predicate rewrites - combined with
rewrites that
+ * produce a simplifiable predicate - don't leave unanalyzed conjuncts for
re-analysis.
+ */
+ @Test
+ public void testCompoundPredicateWithRewriteAndReanalyze() throws
ImpalaException {
+ AnalysisContext ctx = createAnalysisCtx();
+ ctx.getQueryOptions().setEnable_expr_rewrites(true);
+
+ String[] cases = {
+ // NormalizeExprsRule should simplify to FALSE AND ...
+ rewriteTemplate("t.id = 1 AND t.id = 1 AND false"),
+ // ExtractCommonConjunctRule subset should simplify to FALSE AND ...
+ rewriteTemplate("((t.id = 1 AND false) or (t.id = 1 AND false)) AND t.id
= 1"),
+ // ExtractCommonConjunctRule distjunct should simplify to FALSE AND ...
+ rewriteTemplate("((t.id = 1 AND false) or (t.id = 2 AND false)) AND t.id
= 1"),
+ // BetweenToCompoundRule should simplify to FALSE AND ...
+ rewriteTemplate("(1 BETWEEN 2 AND 3) AND t.id = 1 AND t.id = 1"),
+ // SimplifyDistinctFromRule should simplify to FALSE AND ...
+ rewriteTemplate("t.id IS DISTINCT FROM t.id AND t.id = 1")
+ };
+
+ for (String sql : cases) {
+ AnalyzesOk(sql, ctx);
+ }
+ }
+
@Test
public void testCaseWithExpr() throws ImpalaException {
ExprRewriteRule rule = SimplifyConditionalsRule.INSTANCE;
diff --git a/fe/src/test/java/org/apache/impala/planner/PlannerTest.java
b/fe/src/test/java/org/apache/impala/planner/PlannerTest.java
index 014712758..8c27338a9 100644
--- a/fe/src/test/java/org/apache/impala/planner/PlannerTest.java
+++ b/fe/src/test/java/org/apache/impala/planner/PlannerTest.java
@@ -491,6 +491,11 @@ public class PlannerTest extends PlannerTestBase {
runPlannerTestFile("setoperation-rewrite", options);
}
+ @Test
+ public void testRewrite() {
+ runPlannerTestFile("rewrite");
+ }
+
@Test
public void testValues() {
runPlannerTestFile("values");
diff --git
a/testdata/workloads/functional-planner/queries/PlannerTest/rewrite.test
b/testdata/workloads/functional-planner/queries/PlannerTest/rewrite.test
new file mode 100644
index 000000000..d2e71fa9c
--- /dev/null
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/rewrite.test
@@ -0,0 +1,129 @@
+# IMPALA-13302: ExtractCommonConjunctRule led to non-materialized slot
+WITH v AS (SELECT 1 FROM functional.alltypes t
+ WHERE ((t.id = 1 and false)
+ or (t.id = 1 and false))
+ AND t.id = 1
+ AND t.id = 1),
+ w AS (SELECT 1 FROM functional.alltypes t1 JOIN functional.alltypes t2 ON
t1.id = t2.id)
+SELECT 1 FROM v UNION ALL SELECT 1 FROM v
+UNION ALL SELECT 1 FROM w UNION ALL SELECT 1 FROM w
+---- PLAN
+PLAN-ROOT SINK
+|
+00:UNION
+| row-size=1B cardinality=12.25K
+|
+|--08:HASH JOIN [INNER JOIN]
+| | hash predicates: t1.id = t2.id
+| | runtime filters: RF002 <- t2.id
+| | row-size=8B cardinality=6.12K
+| |
+| |--07:SCAN HDFS [functional.alltypes t2]
+| | HDFS partitions=24/24 files=24 size=478.45KB
+| | row-size=4B cardinality=6.12K
+| |
+| 06:SCAN HDFS [functional.alltypes t1]
+| HDFS partitions=24/24 files=24 size=478.45KB
+| runtime filters: RF002 -> t1.id
+| row-size=4B cardinality=6.12K
+|
+05:HASH JOIN [INNER JOIN]
+| hash predicates: t1.id = t2.id
+| runtime filters: RF000 <- t2.id
+| row-size=8B cardinality=6.12K
+|
+|--04:SCAN HDFS [functional.alltypes t2]
+| HDFS partitions=24/24 files=24 size=478.45KB
+| row-size=4B cardinality=6.12K
+|
+03:SCAN HDFS [functional.alltypes t1]
+ HDFS partitions=24/24 files=24 size=478.45KB
+ runtime filters: RF000 -> t1.id
+ row-size=4B cardinality=6.12K
+====
+# IMPALA-13203: NormalizeExprsRule led to non-materialized slot
+WITH v AS (SELECT 1 FROM functional.alltypes t
+ WHERE t.id = 1
+ AND t.id = 1
+ AND t.id = 1
+ AND false),
+ w AS (SELECT 1 FROM functional.alltypes t1 JOIN functional.alltypes t2 ON
t1.id = t2.id)
+SELECT 1 FROM v UNION ALL SELECT 1 FROM v
+UNION ALL SELECT 1 FROM w UNION ALL SELECT 1 FROM w
+---- PLAN
+PLAN-ROOT SINK
+|
+00:UNION
+| row-size=1B cardinality=12.25K
+|
+|--08:HASH JOIN [INNER JOIN]
+| | hash predicates: t1.id = t2.id
+| | runtime filters: RF002 <- t2.id
+| | row-size=8B cardinality=6.12K
+| |
+| |--07:SCAN HDFS [functional.alltypes t2]
+| | HDFS partitions=24/24 files=24 size=478.45KB
+| | row-size=4B cardinality=6.12K
+| |
+| 06:SCAN HDFS [functional.alltypes t1]
+| HDFS partitions=24/24 files=24 size=478.45KB
+| runtime filters: RF002 -> t1.id
+| row-size=4B cardinality=6.12K
+|
+05:HASH JOIN [INNER JOIN]
+| hash predicates: t1.id = t2.id
+| runtime filters: RF000 <- t2.id
+| row-size=8B cardinality=6.12K
+|
+|--04:SCAN HDFS [functional.alltypes t2]
+| HDFS partitions=24/24 files=24 size=478.45KB
+| row-size=4B cardinality=6.12K
+|
+03:SCAN HDFS [functional.alltypes t1]
+ HDFS partitions=24/24 files=24 size=478.45KB
+ runtime filters: RF000 -> t1.id
+ row-size=4B cardinality=6.12K
+====
+# IMPALA-13203: NormalizeExprsRule led to non-materialized slot
+WITH v AS (SELECT 1 FROM functional.alltypes t
+ WHERE false
+ AND t.id = 1
+ AND t.id = 1
+ AND t.id = 1),
+ w AS (SELECT 1 FROM functional.alltypes t1 JOIN functional.alltypes t2 ON
t1.id = t2.id)
+SELECT 1 FROM v UNION ALL SELECT 1 FROM v
+UNION ALL SELECT 1 FROM w UNION ALL SELECT 1 FROM w
+---- PLAN
+PLAN-ROOT SINK
+|
+00:UNION
+| row-size=1B cardinality=12.25K
+|
+|--08:HASH JOIN [INNER JOIN]
+| | hash predicates: t1.id = t2.id
+| | runtime filters: RF002 <- t2.id
+| | row-size=8B cardinality=6.12K
+| |
+| |--07:SCAN HDFS [functional.alltypes t2]
+| | HDFS partitions=24/24 files=24 size=478.45KB
+| | row-size=4B cardinality=6.12K
+| |
+| 06:SCAN HDFS [functional.alltypes t1]
+| HDFS partitions=24/24 files=24 size=478.45KB
+| runtime filters: RF002 -> t1.id
+| row-size=4B cardinality=6.12K
+|
+05:HASH JOIN [INNER JOIN]
+| hash predicates: t1.id = t2.id
+| runtime filters: RF000 <- t2.id
+| row-size=8B cardinality=6.12K
+|
+|--04:SCAN HDFS [functional.alltypes t2]
+| HDFS partitions=24/24 files=24 size=478.45KB
+| row-size=4B cardinality=6.12K
+|
+03:SCAN HDFS [functional.alltypes t1]
+ HDFS partitions=24/24 files=24 size=478.45KB
+ runtime filters: RF000 -> t1.id
+ row-size=4B cardinality=6.12K
+====