(impala) 16/45: IMPALA-13270: Fix IllegalStateException on runtime filter

stigahuang Tue, 13 Aug 2024 14:49:41 -0700

This is an automated email from the ASF dual-hosted git repository.

stigahuang pushed a commit to branch branch-4.4.1
in repository https://gitbox.apache.org/repos/asf/impala.git


commit 5eb3187b3ac6447a6a80bed6d022dca1dd910760
Author: Michael Smith <[email protected]>
AuthorDate: Fri Aug 2 10:47:58 2024 -0700

    IMPALA-13270: Fix IllegalStateException on runtime filter
    
    IMPALA-12800 improved ExprSubstitutionMap to use a HashMap for lookups.
    Some methods in ExprSubstitutionMap guard against duplicate entries, but
    not creation or adding fields, so cases were added where repeated
    expressions would be added to the map. In practice, only the first entry
    added would be matched. IMPALA-12800 started removing duplicate entries
    from the map to reduce memory use, but missed that one caller -
    RuntimeFilterGenerator - was expecting the map size to exactly match the
    input expression list.
    
    Fixes the IllegalStateException caused by runtime filters where the same
    expression is repeated multiple times by changing the precondition to
    verify that each SlotRef has a mapping added. It doesn't verify the
    final size, because SlotRefs may be repeated and the map will avoid
    adding duplicates.
    
    Removes trim method - added in IMPALA-13270 - as it no longer provides
    any benefit when performing lookups with a HashMap, and may actually do
    more work during the trim. test_query_compilation.py continues to pass,
    and I see no discernible difference in "Single node plan" time; both are
    30-40ms on my machine.
    
    Adds a test case that failed with the old precondition. IDE-assisted
    search did not find any other cases where ExprSubstitutionMap#size is
    compared against a non-zero value.
    
    Change-Id: I23c7bcf33e5185f10a6ae475debb8ab70a2ec5eb
    Reviewed-on: http://gerrit.cloudera.org:8080/21638
    Reviewed-by: Impala Public Jenkins <[email protected]>
    Tested-by: Impala Public Jenkins <[email protected]>
---
 .../impala/analysis/ExprSubstitutionMap.java       | 40 ----------------------
 .../impala/planner/RuntimeFilterGenerator.java     |  5 ++-
 .../apache/impala/planner/SingleNodePlanner.java   |  8 ++---
 .../org/apache/impala/planner/PlannerTest.java     |  9 +++++
 .../PlannerTest/runtime-filter-repeated-expr.test  | 31 +++++++++++++++++
 5 files changed, 46 insertions(+), 47 deletions(-)

diff --git 
a/fe/src/main/java/org/apache/impala/analysis/ExprSubstitutionMap.java 
b/fe/src/main/java/org/apache/impala/analysis/ExprSubstitutionMap.java
index a68cb193e..0b4e6372f 100644
--- a/fe/src/main/java/org/apache/impala/analysis/ExprSubstitutionMap.java
+++ b/fe/src/main/java/org/apache/impala/analysis/ExprSubstitutionMap.java
@@ -245,44 +245,4 @@ public final class ExprSubstitutionMap {
     }
     return true;
   }
-
-  /**
-   * Remove expressions that are not used according to baseTblSMap
-   */
-  public void trim(ExprSubstitutionMap baseTblSMap, Analyzer analyzer) {
-    Preconditions.checkState(size() == baseTblSMap.size());
-    for (int i = size() - 1; i >= 0; --i) {
-      final Expr baseTblExpr = baseTblSMap.getRhs().get(i);
-      List<SlotId> slotIds = new ArrayList<>();
-      baseTblExpr.getIds(null, slotIds);
-      // Struct children are allowed to be non-materialised because the query 
may only
-      // concern a subset of the fields of the struct. In this case we do not 
remove the
-      // entry from this 'ExprSubstitutionMap', only if none of the children 
are
-      // materialised.
-      if (!baseTblExpr.getType().isStructType()) {
-        for (SlotId id: slotIds) {
-          if (!analyzer.getSlotDesc(id).isMaterialized()) {
-            Expr removed = lhs_.remove(i);
-            substitutions_.remove(removed);
-            rhs_.remove(i);
-            break;
-          }
-        }
-      } else { // If it is a struct, remove iff none of the children are 
materialised.
-        boolean foundMaterialized = false;
-        for (SlotId id: slotIds) {
-          if (analyzer.getSlotDesc(id).isMaterialized()) {
-            foundMaterialized = true;
-            break;
-          }
-        }
-        if (!foundMaterialized) {
-          Expr removed = lhs_.remove(i);
-          substitutions_.remove(removed);
-          rhs_.remove(i);
-        }
-      }
-    }
-    verify();
-  }
 }
diff --git 
a/fe/src/main/java/org/apache/impala/planner/RuntimeFilterGenerator.java 
b/fe/src/main/java/org/apache/impala/planner/RuntimeFilterGenerator.java
index 782d68581..d05fe9399 100644
--- a/fe/src/main/java/org/apache/impala/planner/RuntimeFilterGenerator.java
+++ b/fe/src/main/java/org/apache/impala/planner/RuntimeFilterGenerator.java
@@ -1418,16 +1418,19 @@ public final class RuntimeFilterGenerator {
       targetExpr.collect(SlotRef.class, exprSlots);
       List<SlotId> sids = filter.getTargetSlots().get(targetTid);
       for (SlotRef slotRef: exprSlots) {
+        boolean matched = false;
         for (SlotId sid: sids) {
           if (analyzer.hasValueTransfer(slotRef.getSlotId(), sid)) {
             SlotRef newSlotRef = new SlotRef(analyzer.getSlotDesc(sid));
             newSlotRef.analyzeNoThrow(analyzer);
             smap.put(slotRef, newSlotRef);
+            matched = true;
             break;
           }
         }
+        Preconditions.checkState(matched,
+            "No SlotId found for RuntimeFilter %s SlotRef %s", filter, 
slotRef);
       }
-      Preconditions.checkState(exprSlots.size() == smap.size());
       try {
         targetExpr = targetExpr.substitute(smap, analyzer, false);
       } catch (Exception e) {
diff --git a/fe/src/main/java/org/apache/impala/planner/SingleNodePlanner.java 
b/fe/src/main/java/org/apache/impala/planner/SingleNodePlanner.java
index a379e9cf4..fc935b382 100644
--- a/fe/src/main/java/org/apache/impala/planner/SingleNodePlanner.java
+++ b/fe/src/main/java/org/apache/impala/planner/SingleNodePlanner.java
@@ -1254,12 +1254,8 @@ public class SingleNodePlanner {
     // of the inline view's plan root. This ensures that all downstream exprs 
referencing
     // the inline view are replaced with exprs referencing the physical output 
of the
     // inline view's plan.
-    ExprSubstitutionMap outputSmap = inlineViewRef.getSmap();
-    if (outputSmap != null && !inlineViewRef.isTableMaskingView()) {
-      outputSmap.trim(inlineViewRef.getBaseTblSmap(), analyzer);
-    }
-    outputSmap = ExprSubstitutionMap.compose(
-        outputSmap, rootNode.getOutputSmap(), analyzer);
+    ExprSubstitutionMap outputSmap = ExprSubstitutionMap.compose(
+        inlineViewRef.getSmap(), rootNode.getOutputSmap(), analyzer);
     if (analyzer.isOuterJoined(inlineViewRef.getId())) {
       // Exprs against non-matched rows of an outer join should always return 
NULL.
       // Make the rhs exprs of the output smap nullable, if necessary. This 
expr wrapping
diff --git a/fe/src/test/java/org/apache/impala/planner/PlannerTest.java 
b/fe/src/test/java/org/apache/impala/planner/PlannerTest.java
index 7af466f9f..16dbe3029 100644
--- a/fe/src/test/java/org/apache/impala/planner/PlannerTest.java
+++ b/fe/src/test/java/org/apache/impala/planner/PlannerTest.java
@@ -1567,4 +1567,13 @@ public class PlannerTest extends PlannerTestBase {
         "location '/'");
     runPlannerTestFile("many-expression", "test_many_expressions");
   }
+
+  /**
+   * Test that runtime filters with the same expression repeated multiple 
times are
+   * planned successfully (IMPALA-13270).
+   */
+  @Test
+  public void testRuntimeFilterRepeatedExpr() {
+    runPlannerTestFile("runtime-filter-repeated-expr", "functional");
+  }
 }
diff --git 
a/testdata/workloads/functional-planner/queries/PlannerTest/runtime-filter-repeated-expr.test
 
b/testdata/workloads/functional-planner/queries/PlannerTest/runtime-filter-repeated-expr.test
new file mode 100644
index 000000000..7c5ad8a6a
--- /dev/null
+++ 
b/testdata/workloads/functional-planner/queries/PlannerTest/runtime-filter-repeated-expr.test
@@ -0,0 +1,31 @@
+SELECT alt.int_col, alt.string_col
+FROM alltypes alt
+INNER JOIN (
+  SELECT int_col,
+    concat('20',
+      substring(date_string_col, 7, 2), '-',
+      substring(date_string_col, 4, 2), '-',
+      substring(date_string_col, 1, 2), ' 00:00:00') ts
+  FROM alltypesaggnonulls GROUP BY int_col, date_string_col
+) altann ON altann.ts = from_unixtime(alt.id, 'yyyy-MM-dd HH:mm:ss')
+---- PLAN
+PLAN-ROOT SINK
+|
+03:HASH JOIN [INNER JOIN]
+|  hash predicates: concat('20', substring(date_string_col, 7, 2), '-', 
substring(date_string_col, 4, 2), '-', substring(date_string_col, 1, 2), ' 
00:00:00') = from_unixtime(alt.id, 'yyyy-MM-dd HH:mm:ss')
+|  runtime filters: RF000 <- from_unixtime(alt.id, 'yyyy-MM-dd HH:mm:ss')
+|  row-size=36B cardinality=9.08K
+|
+|--00:SCAN HDFS [functional.alltypes alt]
+|     HDFS partitions=24/24 files=24 size=478.45KB
+|     row-size=20B cardinality=6.12K
+|
+02:AGGREGATE [FINALIZE]
+|  group by: int_col, date_string_col
+|  row-size=16B cardinality=9.08K
+|
+01:SCAN HDFS [functional.alltypesaggnonulls]
+   HDFS partitions=10/10 files=10 size=744.82KB
+   runtime filters: RF000 -> concat('20', 
substring(functional.alltypesaggnonulls.date_string_col, 7, 2), '-', 
substring(functional.alltypesaggnonulls.date_string_col, 4, 2), '-', 
substring(functional.alltypesaggnonulls.date_string_col, 1, 2), ' 00:00:00')
+   row-size=16B cardinality=9.08K
+====

(impala) 16/45: IMPALA-13270: Fix IllegalStateException on runtime filter

Reply via email to