This is an automated email from the ASF dual-hosted git repository. stigahuang pushed a commit to branch branch-4.4.1 in repository https://gitbox.apache.org/repos/asf/impala.git
commit 5eb3187b3ac6447a6a80bed6d022dca1dd910760 Author: Michael Smith <[email protected]> AuthorDate: Fri Aug 2 10:47:58 2024 -0700 IMPALA-13270: Fix IllegalStateException on runtime filter IMPALA-12800 improved ExprSubstitutionMap to use a HashMap for lookups. Some methods in ExprSubstitutionMap guard against duplicate entries, but not creation or adding fields, so cases were added where repeated expressions would be added to the map. In practice, only the first entry added would be matched. IMPALA-12800 started removing duplicate entries from the map to reduce memory use, but missed that one caller - RuntimeFilterGenerator - was expecting the map size to exactly match the input expression list. Fixes the IllegalStateException caused by runtime filters where the same expression is repeated multiple times by changing the precondition to verify that each SlotRef has a mapping added. It doesn't verify the final size, because SlotRefs may be repeated and the map will avoid adding duplicates. Removes trim method - added in IMPALA-13270 - as it no longer provides any benefit when performing lookups with a HashMap, and may actually do more work during the trim. test_query_compilation.py continues to pass, and I see no discernible difference in "Single node plan" time; both are 30-40ms on my machine. Adds a test case that failed with the old precondition. IDE-assisted search did not find any other cases where ExprSubstitutionMap#size is compared against a non-zero value. Change-Id: I23c7bcf33e5185f10a6ae475debb8ab70a2ec5eb Reviewed-on: http://gerrit.cloudera.org:8080/21638 Reviewed-by: Impala Public Jenkins <[email protected]> Tested-by: Impala Public Jenkins <[email protected]> --- .../impala/analysis/ExprSubstitutionMap.java | 40 ---------------------- .../impala/planner/RuntimeFilterGenerator.java | 5 ++- .../apache/impala/planner/SingleNodePlanner.java | 8 ++--- .../org/apache/impala/planner/PlannerTest.java | 9 +++++ .../PlannerTest/runtime-filter-repeated-expr.test | 31 +++++++++++++++++ 5 files changed, 46 insertions(+), 47 deletions(-) diff --git a/fe/src/main/java/org/apache/impala/analysis/ExprSubstitutionMap.java b/fe/src/main/java/org/apache/impala/analysis/ExprSubstitutionMap.java index a68cb193e..0b4e6372f 100644 --- a/fe/src/main/java/org/apache/impala/analysis/ExprSubstitutionMap.java +++ b/fe/src/main/java/org/apache/impala/analysis/ExprSubstitutionMap.java @@ -245,44 +245,4 @@ public final class ExprSubstitutionMap { } return true; } - - /** - * Remove expressions that are not used according to baseTblSMap - */ - public void trim(ExprSubstitutionMap baseTblSMap, Analyzer analyzer) { - Preconditions.checkState(size() == baseTblSMap.size()); - for (int i = size() - 1; i >= 0; --i) { - final Expr baseTblExpr = baseTblSMap.getRhs().get(i); - List<SlotId> slotIds = new ArrayList<>(); - baseTblExpr.getIds(null, slotIds); - // Struct children are allowed to be non-materialised because the query may only - // concern a subset of the fields of the struct. In this case we do not remove the - // entry from this 'ExprSubstitutionMap', only if none of the children are - // materialised. - if (!baseTblExpr.getType().isStructType()) { - for (SlotId id: slotIds) { - if (!analyzer.getSlotDesc(id).isMaterialized()) { - Expr removed = lhs_.remove(i); - substitutions_.remove(removed); - rhs_.remove(i); - break; - } - } - } else { // If it is a struct, remove iff none of the children are materialised. - boolean foundMaterialized = false; - for (SlotId id: slotIds) { - if (analyzer.getSlotDesc(id).isMaterialized()) { - foundMaterialized = true; - break; - } - } - if (!foundMaterialized) { - Expr removed = lhs_.remove(i); - substitutions_.remove(removed); - rhs_.remove(i); - } - } - } - verify(); - } } diff --git a/fe/src/main/java/org/apache/impala/planner/RuntimeFilterGenerator.java b/fe/src/main/java/org/apache/impala/planner/RuntimeFilterGenerator.java index 782d68581..d05fe9399 100644 --- a/fe/src/main/java/org/apache/impala/planner/RuntimeFilterGenerator.java +++ b/fe/src/main/java/org/apache/impala/planner/RuntimeFilterGenerator.java @@ -1418,16 +1418,19 @@ public final class RuntimeFilterGenerator { targetExpr.collect(SlotRef.class, exprSlots); List<SlotId> sids = filter.getTargetSlots().get(targetTid); for (SlotRef slotRef: exprSlots) { + boolean matched = false; for (SlotId sid: sids) { if (analyzer.hasValueTransfer(slotRef.getSlotId(), sid)) { SlotRef newSlotRef = new SlotRef(analyzer.getSlotDesc(sid)); newSlotRef.analyzeNoThrow(analyzer); smap.put(slotRef, newSlotRef); + matched = true; break; } } + Preconditions.checkState(matched, + "No SlotId found for RuntimeFilter %s SlotRef %s", filter, slotRef); } - Preconditions.checkState(exprSlots.size() == smap.size()); try { targetExpr = targetExpr.substitute(smap, analyzer, false); } catch (Exception e) { diff --git a/fe/src/main/java/org/apache/impala/planner/SingleNodePlanner.java b/fe/src/main/java/org/apache/impala/planner/SingleNodePlanner.java index a379e9cf4..fc935b382 100644 --- a/fe/src/main/java/org/apache/impala/planner/SingleNodePlanner.java +++ b/fe/src/main/java/org/apache/impala/planner/SingleNodePlanner.java @@ -1254,12 +1254,8 @@ public class SingleNodePlanner { // of the inline view's plan root. This ensures that all downstream exprs referencing // the inline view are replaced with exprs referencing the physical output of the // inline view's plan. - ExprSubstitutionMap outputSmap = inlineViewRef.getSmap(); - if (outputSmap != null && !inlineViewRef.isTableMaskingView()) { - outputSmap.trim(inlineViewRef.getBaseTblSmap(), analyzer); - } - outputSmap = ExprSubstitutionMap.compose( - outputSmap, rootNode.getOutputSmap(), analyzer); + ExprSubstitutionMap outputSmap = ExprSubstitutionMap.compose( + inlineViewRef.getSmap(), rootNode.getOutputSmap(), analyzer); if (analyzer.isOuterJoined(inlineViewRef.getId())) { // Exprs against non-matched rows of an outer join should always return NULL. // Make the rhs exprs of the output smap nullable, if necessary. This expr wrapping diff --git a/fe/src/test/java/org/apache/impala/planner/PlannerTest.java b/fe/src/test/java/org/apache/impala/planner/PlannerTest.java index 7af466f9f..16dbe3029 100644 --- a/fe/src/test/java/org/apache/impala/planner/PlannerTest.java +++ b/fe/src/test/java/org/apache/impala/planner/PlannerTest.java @@ -1567,4 +1567,13 @@ public class PlannerTest extends PlannerTestBase { "location '/'"); runPlannerTestFile("many-expression", "test_many_expressions"); } + + /** + * Test that runtime filters with the same expression repeated multiple times are + * planned successfully (IMPALA-13270). + */ + @Test + public void testRuntimeFilterRepeatedExpr() { + runPlannerTestFile("runtime-filter-repeated-expr", "functional"); + } } diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/runtime-filter-repeated-expr.test b/testdata/workloads/functional-planner/queries/PlannerTest/runtime-filter-repeated-expr.test new file mode 100644 index 000000000..7c5ad8a6a --- /dev/null +++ b/testdata/workloads/functional-planner/queries/PlannerTest/runtime-filter-repeated-expr.test @@ -0,0 +1,31 @@ +SELECT alt.int_col, alt.string_col +FROM alltypes alt +INNER JOIN ( + SELECT int_col, + concat('20', + substring(date_string_col, 7, 2), '-', + substring(date_string_col, 4, 2), '-', + substring(date_string_col, 1, 2), ' 00:00:00') ts + FROM alltypesaggnonulls GROUP BY int_col, date_string_col +) altann ON altann.ts = from_unixtime(alt.id, 'yyyy-MM-dd HH:mm:ss') +---- PLAN +PLAN-ROOT SINK +| +03:HASH JOIN [INNER JOIN] +| hash predicates: concat('20', substring(date_string_col, 7, 2), '-', substring(date_string_col, 4, 2), '-', substring(date_string_col, 1, 2), ' 00:00:00') = from_unixtime(alt.id, 'yyyy-MM-dd HH:mm:ss') +| runtime filters: RF000 <- from_unixtime(alt.id, 'yyyy-MM-dd HH:mm:ss') +| row-size=36B cardinality=9.08K +| +|--00:SCAN HDFS [functional.alltypes alt] +| HDFS partitions=24/24 files=24 size=478.45KB +| row-size=20B cardinality=6.12K +| +02:AGGREGATE [FINALIZE] +| group by: int_col, date_string_col +| row-size=16B cardinality=9.08K +| +01:SCAN HDFS [functional.alltypesaggnonulls] + HDFS partitions=10/10 files=10 size=744.82KB + runtime filters: RF000 -> concat('20', substring(functional.alltypesaggnonulls.date_string_col, 7, 2), '-', substring(functional.alltypesaggnonulls.date_string_col, 4, 2), '-', substring(functional.alltypesaggnonulls.date_string_col, 1, 2), ' 00:00:00') + row-size=16B cardinality=9.08K +====
