This is an automated email from the ASF dual-hosted git repository.

stigahuang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git


The following commit(s) were added to refs/heads/master by this push:
     new 0918147e6 IMPALA-13270: Fix IllegalStateException on runtime filter
0918147e6 is described below

commit 0918147e69b1625e76886506b065c310c7cc52d1
Author: Michael Smith <[email protected]>
AuthorDate: Fri Aug 2 10:47:58 2024 -0700

    IMPALA-13270: Fix IllegalStateException on runtime filter
    
    IMPALA-12800 improved ExprSubstitutionMap to use a HashMap for lookups.
    Some methods in ExprSubstitutionMap guard against duplicate entries, but
    not creation or adding fields, so cases were added where repeated
    expressions would be added to the map. In practice, only the first entry
    added would be matched. IMPALA-12800 started removing duplicate entries
    from the map to reduce memory use, but missed that one caller -
    RuntimeFilterGenerator - was expecting the map size to exactly match the
    input expression list.
    
    Fixes the IllegalStateException caused by runtime filters where the same
    expression is repeated multiple times by changing the precondition to
    verify that each SlotRef has a mapping added. It doesn't verify the
    final size, because SlotRefs may be repeated and the map will avoid
    adding duplicates.
    
    Removes trim method - added in IMPALA-13270 - as it no longer provides
    any benefit when performing lookups with a HashMap, and may actually do
    more work during the trim. test_query_compilation.py continues to pass,
    and I see no discernible difference in "Single node plan" time; both are
    30-40ms on my machine.
    
    Adds a test case that failed with the old precondition. IDE-assisted
    search did not find any other cases where ExprSubstitutionMap#size is
    compared against a non-zero value.
    
    Change-Id: I23c7bcf33e5185f10a6ae475debb8ab70a2ec5eb
    Reviewed-on: http://gerrit.cloudera.org:8080/21638
    Reviewed-by: Impala Public Jenkins <[email protected]>
    Tested-by: Impala Public Jenkins <[email protected]>
---
 .../impala/analysis/ExprSubstitutionMap.java       | 40 ----------------------
 .../impala/planner/RuntimeFilterGenerator.java     |  5 ++-
 .../apache/impala/planner/SingleNodePlanner.java   |  8 ++---
 .../org/apache/impala/planner/PlannerTest.java     |  9 +++++
 .../PlannerTest/runtime-filter-repeated-expr.test  | 31 +++++++++++++++++
 5 files changed, 46 insertions(+), 47 deletions(-)

diff --git 
a/fe/src/main/java/org/apache/impala/analysis/ExprSubstitutionMap.java 
b/fe/src/main/java/org/apache/impala/analysis/ExprSubstitutionMap.java
index a68cb193e..0b4e6372f 100644
--- a/fe/src/main/java/org/apache/impala/analysis/ExprSubstitutionMap.java
+++ b/fe/src/main/java/org/apache/impala/analysis/ExprSubstitutionMap.java
@@ -245,44 +245,4 @@ public final class ExprSubstitutionMap {
     }
     return true;
   }
-
-  /**
-   * Remove expressions that are not used according to baseTblSMap
-   */
-  public void trim(ExprSubstitutionMap baseTblSMap, Analyzer analyzer) {
-    Preconditions.checkState(size() == baseTblSMap.size());
-    for (int i = size() - 1; i >= 0; --i) {
-      final Expr baseTblExpr = baseTblSMap.getRhs().get(i);
-      List<SlotId> slotIds = new ArrayList<>();
-      baseTblExpr.getIds(null, slotIds);
-      // Struct children are allowed to be non-materialised because the query 
may only
-      // concern a subset of the fields of the struct. In this case we do not 
remove the
-      // entry from this 'ExprSubstitutionMap', only if none of the children 
are
-      // materialised.
-      if (!baseTblExpr.getType().isStructType()) {
-        for (SlotId id: slotIds) {
-          if (!analyzer.getSlotDesc(id).isMaterialized()) {
-            Expr removed = lhs_.remove(i);
-            substitutions_.remove(removed);
-            rhs_.remove(i);
-            break;
-          }
-        }
-      } else { // If it is a struct, remove iff none of the children are 
materialised.
-        boolean foundMaterialized = false;
-        for (SlotId id: slotIds) {
-          if (analyzer.getSlotDesc(id).isMaterialized()) {
-            foundMaterialized = true;
-            break;
-          }
-        }
-        if (!foundMaterialized) {
-          Expr removed = lhs_.remove(i);
-          substitutions_.remove(removed);
-          rhs_.remove(i);
-        }
-      }
-    }
-    verify();
-  }
 }
diff --git 
a/fe/src/main/java/org/apache/impala/planner/RuntimeFilterGenerator.java 
b/fe/src/main/java/org/apache/impala/planner/RuntimeFilterGenerator.java
index 782d68581..d05fe9399 100644
--- a/fe/src/main/java/org/apache/impala/planner/RuntimeFilterGenerator.java
+++ b/fe/src/main/java/org/apache/impala/planner/RuntimeFilterGenerator.java
@@ -1418,16 +1418,19 @@ public final class RuntimeFilterGenerator {
       targetExpr.collect(SlotRef.class, exprSlots);
       List<SlotId> sids = filter.getTargetSlots().get(targetTid);
       for (SlotRef slotRef: exprSlots) {
+        boolean matched = false;
         for (SlotId sid: sids) {
           if (analyzer.hasValueTransfer(slotRef.getSlotId(), sid)) {
             SlotRef newSlotRef = new SlotRef(analyzer.getSlotDesc(sid));
             newSlotRef.analyzeNoThrow(analyzer);
             smap.put(slotRef, newSlotRef);
+            matched = true;
             break;
           }
         }
+        Preconditions.checkState(matched,
+            "No SlotId found for RuntimeFilter %s SlotRef %s", filter, 
slotRef);
       }
-      Preconditions.checkState(exprSlots.size() == smap.size());
       try {
         targetExpr = targetExpr.substitute(smap, analyzer, false);
       } catch (Exception e) {
diff --git a/fe/src/main/java/org/apache/impala/planner/SingleNodePlanner.java 
b/fe/src/main/java/org/apache/impala/planner/SingleNodePlanner.java
index a379e9cf4..fc935b382 100644
--- a/fe/src/main/java/org/apache/impala/planner/SingleNodePlanner.java
+++ b/fe/src/main/java/org/apache/impala/planner/SingleNodePlanner.java
@@ -1254,12 +1254,8 @@ public class SingleNodePlanner {
     // of the inline view's plan root. This ensures that all downstream exprs 
referencing
     // the inline view are replaced with exprs referencing the physical output 
of the
     // inline view's plan.
-    ExprSubstitutionMap outputSmap = inlineViewRef.getSmap();
-    if (outputSmap != null && !inlineViewRef.isTableMaskingView()) {
-      outputSmap.trim(inlineViewRef.getBaseTblSmap(), analyzer);
-    }
-    outputSmap = ExprSubstitutionMap.compose(
-        outputSmap, rootNode.getOutputSmap(), analyzer);
+    ExprSubstitutionMap outputSmap = ExprSubstitutionMap.compose(
+        inlineViewRef.getSmap(), rootNode.getOutputSmap(), analyzer);
     if (analyzer.isOuterJoined(inlineViewRef.getId())) {
       // Exprs against non-matched rows of an outer join should always return 
NULL.
       // Make the rhs exprs of the output smap nullable, if necessary. This 
expr wrapping
diff --git a/fe/src/test/java/org/apache/impala/planner/PlannerTest.java 
b/fe/src/test/java/org/apache/impala/planner/PlannerTest.java
index 16bd69d4e..4bfc93d49 100644
--- a/fe/src/test/java/org/apache/impala/planner/PlannerTest.java
+++ b/fe/src/test/java/org/apache/impala/planner/PlannerTest.java
@@ -1595,4 +1595,13 @@ public class PlannerTest extends PlannerTestBase {
         "location '/'");
     runPlannerTestFile("many-expression", "test_many_expressions");
   }
+
+  /**
+   * Test that runtime filters with the same expression repeated multiple 
times are
+   * planned successfully (IMPALA-13270).
+   */
+  @Test
+  public void testRuntimeFilterRepeatedExpr() {
+    runPlannerTestFile("runtime-filter-repeated-expr", "functional");
+  }
 }
diff --git 
a/testdata/workloads/functional-planner/queries/PlannerTest/runtime-filter-repeated-expr.test
 
b/testdata/workloads/functional-planner/queries/PlannerTest/runtime-filter-repeated-expr.test
new file mode 100644
index 000000000..7c5ad8a6a
--- /dev/null
+++ 
b/testdata/workloads/functional-planner/queries/PlannerTest/runtime-filter-repeated-expr.test
@@ -0,0 +1,31 @@
+SELECT alt.int_col, alt.string_col
+FROM alltypes alt
+INNER JOIN (
+  SELECT int_col,
+    concat('20',
+      substring(date_string_col, 7, 2), '-',
+      substring(date_string_col, 4, 2), '-',
+      substring(date_string_col, 1, 2), ' 00:00:00') ts
+  FROM alltypesaggnonulls GROUP BY int_col, date_string_col
+) altann ON altann.ts = from_unixtime(alt.id, 'yyyy-MM-dd HH:mm:ss')
+---- PLAN
+PLAN-ROOT SINK
+|
+03:HASH JOIN [INNER JOIN]
+|  hash predicates: concat('20', substring(date_string_col, 7, 2), '-', 
substring(date_string_col, 4, 2), '-', substring(date_string_col, 1, 2), ' 
00:00:00') = from_unixtime(alt.id, 'yyyy-MM-dd HH:mm:ss')
+|  runtime filters: RF000 <- from_unixtime(alt.id, 'yyyy-MM-dd HH:mm:ss')
+|  row-size=36B cardinality=9.08K
+|
+|--00:SCAN HDFS [functional.alltypes alt]
+|     HDFS partitions=24/24 files=24 size=478.45KB
+|     row-size=20B cardinality=6.12K
+|
+02:AGGREGATE [FINALIZE]
+|  group by: int_col, date_string_col
+|  row-size=16B cardinality=9.08K
+|
+01:SCAN HDFS [functional.alltypesaggnonulls]
+   HDFS partitions=10/10 files=10 size=744.82KB
+   runtime filters: RF000 -> concat('20', 
substring(functional.alltypesaggnonulls.date_string_col, 7, 2), '-', 
substring(functional.alltypesaggnonulls.date_string_col, 4, 2), '-', 
substring(functional.alltypesaggnonulls.date_string_col, 1, 2), ' 00:00:00')
+   row-size=16B cardinality=9.08K
+====

Reply via email to