This is an automated email from the ASF dual-hosted git repository. stigahuang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit 825e059dc5c1bf325a61b588f1d9615d569ee04b Author: Daniel Becker <[email protected]> AuthorDate: Thu Jan 11 15:18:47 2024 +0100 IMPALA-12695: Crash with UNION with complex types If we unnest an array coming from a UNION ALL, we read invalid memory and in ASAN builds we crash. Example: with v as (select arr1 from complextypes_arrays union all select arr1 from complextypes_arrays) select am.item from v, v.arr1 am; The problem seems to be that in the item tuple of the collections, the item slots are present twice. This is because both the inline view analyzer and the main analyzer add slots with the same path to the tuple. This is possible because - the target tuple is determined based on the path via Path.getRootDesc(), so it will be the same both in the inline view and in the main scope AND - the inline view analyzer and the main one do not share 'slotPathMap_', so the analyzer cannot recognise that a slot for the path has already been added. This commit solves the problem by checking the target tuple whether a slot with the same path already exists in it, and if it does, we reuse that slot. Note, however, that when Analyzer.registerSlotRef() is called with 'duplicateIfCollections=true', a separate slot is added for collections which should not be reused. This commit adds a set, 'duplicateCollectionSlots', in Analyzer.GlobalState to keep track of such collection slots, and these slots are never reused. Note that there is another bug, IMPALA-12753, that a predicate on the collection item in the above query is only enforced on the first child of the union. Therefore this commit disallows placing a predicate on a collection item when the unnested collection comes from a union. Testing: - added test queries in nested-array-in-select-list.test, nested-map-in-select-list.test, zipping-unnest-in-from-clause.test and zipping-unnest-in-select-list.test Change-Id: I340adc50e6d7cda6f59dacd7a46b6adc31635d46 Reviewed-on: http://gerrit.cloudera.org:8080/20953 Reviewed-by: Impala Public Jenkins <[email protected]> Tested-by: Impala Public Jenkins <[email protected]> --- .../java/org/apache/impala/analysis/Analyzer.java | 38 +++++- .../org/apache/impala/analysis/InlineViewRef.java | 1 + .../java/org/apache/impala/planner/UnnestNode.java | 60 +++++++++ .../QueryTest/nested-array-in-select-list.test | 137 +++++++++++++++++++++ .../QueryTest/nested-map-in-select-list.test | 37 +++++- .../QueryTest/zipping-unnest-in-from-clause.test | 52 ++++++++ .../QueryTest/zipping-unnest-in-select-list.test | 52 ++++++++ 7 files changed, 373 insertions(+), 4 deletions(-) diff --git a/fe/src/main/java/org/apache/impala/analysis/Analyzer.java b/fe/src/main/java/org/apache/impala/analysis/Analyzer.java index 600cc9a8f..8c2194728 100644 --- a/fe/src/main/java/org/apache/impala/analysis/Analyzer.java +++ b/fe/src/main/java/org/apache/impala/analysis/Analyzer.java @@ -604,6 +604,10 @@ public class Analyzer { // Shows how many zipping unnests were in the query; public int numZippingUnnests = 0; + // Ids of (collection-typed) SlotDescriptors that were registered with + // 'duplicateIfCollections=true' in 'Analyzer.registerSlotRef()'. + public Set<SlotId> duplicateCollectionSlots = new HashSet<>(); + public GlobalState(StmtTableCache stmtTableCache, TQueryCtx queryCtx, AuthorizationFactory authzFactory, AuthorizationContext authzCtx) { this.stmtTableCache = stmtTableCache; @@ -1600,7 +1604,9 @@ public class Analyzer { if (slotPath.destType().isCollectionType() && duplicateIfCollections) { // Register a new slot descriptor for collection types. The BE currently // relies on this behavior for setting unnested collection slots to NULL. - return createAndRegisterRawSlotDesc(slotPath, false); + SlotDescriptor res = createAndRegisterRawSlotDesc(slotPath, false); + globalState_.duplicateCollectionSlots.add(res.getId()); + return res; } // SlotRefs with scalar or struct types are registered against the slot's // fully-qualified lowercase path. @@ -1610,10 +1616,40 @@ public class Analyzer { SlotDescriptor existingSlotDesc = slotPathMap_.get(key); if (existingSlotDesc != null) return existingSlotDesc; + SlotDescriptor existingInTuple = findPathInCurrentTuple(slotPath); + if (existingInTuple != null) return existingInTuple; + return createAndRegisterSlotDesc(slotPath); } } + // It is possible that another Analyzer, for example a child Analyzer in an inline view, + // has already inserted a SlotDescriptor with the current path in the current tuple. But + // because it was a different Analyzer, the path is not present in this Analyzer's + // 'slotPathMap_'. We should reuse the existing SlotDescriptor unless it was explicitly + // added with 'duplicateIfCollections=true' in 'registerSlotRef()'. + // + // Returns the existing SlotDescriptor with the same path in the current tuple if it + // exists or 'null' if it doesn't. + private SlotDescriptor findPathInCurrentTuple(Path slotPath) { + Preconditions.checkNotNull(slotPath); + + TupleDescriptor currentTupleDesc = tupleStack_.peek(); + Preconditions.checkNotNull(currentTupleDesc); + + final List<String> slotPathFQR = slotPath.getFullyQualifiedRawPath(); + Preconditions.checkNotNull(slotPathFQR); + + for (SlotDescriptor slotDesc : currentTupleDesc.getSlots()) { + final List<String> tupleSlotFQR = slotDesc.getPath().getFullyQualifiedRawPath(); + if (slotPathFQR.equals(tupleSlotFQR) && + !globalState_.duplicateCollectionSlots.contains(slotDesc.getId())) { + return slotDesc; + } + } + return null; + } + private SlotDescriptor createAndRegisterSlotDesc(Path slotPath) throws AnalysisException { if (slotPath.destType().isCollectionType()) { diff --git a/fe/src/main/java/org/apache/impala/analysis/InlineViewRef.java b/fe/src/main/java/org/apache/impala/analysis/InlineViewRef.java index 2a57e6ec8..4ad3073dc 100644 --- a/fe/src/main/java/org/apache/impala/analysis/InlineViewRef.java +++ b/fe/src/main/java/org/apache/impala/analysis/InlineViewRef.java @@ -372,6 +372,7 @@ public class InlineViewRef extends TableRef { // We don't recurse deeper and only add the immediate item child to the // substitution map. This is enough both for collections in select list and in // from clause. + // TODO: Revisit for IMPALA-12160. putExprsIntoSmaps(analyzer, itemSlotDesc, srcItemSlotRef, baseTableItemSlotRef, false); } diff --git a/fe/src/main/java/org/apache/impala/planner/UnnestNode.java b/fe/src/main/java/org/apache/impala/planner/UnnestNode.java index 919d849e3..7bebe83ab 100644 --- a/fe/src/main/java/org/apache/impala/planner/UnnestNode.java +++ b/fe/src/main/java/org/apache/impala/planner/UnnestNode.java @@ -17,13 +17,18 @@ package org.apache.impala.planner; +import java.util.ArrayList; import java.util.List; import org.apache.impala.analysis.Analyzer; import org.apache.impala.analysis.CollectionTableRef; import org.apache.impala.analysis.Expr; +import org.apache.impala.analysis.SlotDescriptor; import org.apache.impala.analysis.SlotRef; import org.apache.impala.analysis.ToSqlUtils; +import org.apache.impala.analysis.TupleDescriptor; +import org.apache.impala.analysis.TupleId; +import org.apache.impala.common.AnalysisException; import org.apache.impala.common.ImpalaException; import org.apache.impala.thrift.TExplainLevel; import org.apache.impala.thrift.TPlanNode; @@ -79,6 +84,61 @@ public class UnnestNode extends PlanNode { // Unnest is like a scan and must materialize the slots of its conjuncts. analyzer.materializeSlots(conjuncts_); computeMemLayout(analyzer); + + checkUnnestFromUnionWithPredicate(analyzer); + } + + // Filtering an unnested collection that comes from a UNION [ALL] is not supported, see + // IMPALA-12753. + private void checkUnnestFromUnionWithPredicate(Analyzer analyzer) + throws AnalysisException { + PlanNode subplanInputNode = containingSubplanNode_.getChild(0); + if (!(subplanInputNode instanceof UnionNode)) return; + + UnionNode union = (UnionNode) subplanInputNode; + + // Tuple descriptors of the UNION and their descendants (for complex types). + List<TupleDescriptor> unionDescs = new ArrayList<>(); + for (TupleId tid : union.getTupleIds()) { + TupleDescriptor tuple = analyzer.getDescTbl().getTupleDesc(tid); + getCollTupleDescs(tuple, unionDescs); + } + + for (CollectionTableRef collTblRef : tblRefs_) { + final TupleDescriptor collItemTupleDesc = collTblRef.getDesc(); + + if (!unionDescs.contains(collItemTupleDesc)) continue; + + List<Expr> predicates = analyzer.getConjuncts(); + for (Expr pred : predicates) { + if (!pred.isAuxExpr()) { + List<Expr> matching = new ArrayList(); + pred.collect(expr -> (expr instanceof SlotRef) && + ((SlotRef) expr).getDesc().getParent().equals(collItemTupleDesc), + matching); + if (!matching.isEmpty()) { + throw new AnalysisException("Filtering an unnested collection that comes " + + "from a UNION [ALL] is not supported yet."); + } + } + } + } + } + + // Returns the TupleDescriptors contained by 'tuple' (includes item tuple descs of + // collections). + private void getCollTupleDescs(TupleDescriptor tuple, + List<TupleDescriptor> tupleList) { + tupleList.add(tuple); + for (SlotDescriptor slot : tuple.getSlots()) { + if (slot.getType().isCollectionType()) { + TupleDescriptor itemTuple = slot.getItemTupleDesc(); + Preconditions.checkNotNull(itemTuple); + tupleList.add(itemTuple); + // TODO: Continue recursively (for collections and probably also + // structs) once IMPALA-12751 is solved. + } + } } @Override diff --git a/testdata/workloads/functional-query/queries/QueryTest/nested-array-in-select-list.test b/testdata/workloads/functional-query/queries/QueryTest/nested-array-in-select-list.test index 13dae41ea..a874f8878 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/nested-array-in-select-list.test +++ b/testdata/workloads/functional-query/queries/QueryTest/nested-array-in-select-list.test @@ -150,6 +150,143 @@ select 1 from (select int_array from complextypestbl) s tinyint ==== ---- QUERY +# Regression test for IMPALA-12695. +with v as (select arr1 from complextypes_arrays + union all select arr1 from complextypes_arrays) +select am.item from v, v.arr1 am; +---- RESULTS +1 +2 +3 +4 +5 +1 +NULL +3 +4 +5 +10 +9 +8 +10 +10 +NULL +12 +1 +2 +1 +2 +3 +1 +2 +3 +4 +5 +1 +NULL +3 +4 +5 +10 +9 +8 +10 +10 +NULL +12 +1 +2 +1 +2 +3 +---- TYPES +int +==== +---- QUERY +# Filtering an unnested collection that comes from a UNION [ALL] is not supported, see +# IMPALA-12753. +with v as (select arr1 from complextypes_arrays + union all select arr1 from complextypes_arrays) +select am.item from v, v.arr1 am where am.item is NULL; +---- CATCH +AnalysisException: Filtering an unnested collection that comes from a UNION [ALL] is not supported yet. +==== +---- QUERY +# Regression test for IMPALA-12695 with varlen collection item and an additional inline view layer. +with v0 as (select arr2 from complextypes_arrays + union all select arr2 from complextypes_arrays), + v1 as (select am.item from v0, v0.arr2 am) +select item from v1; +---- RESULTS +'one' +'two' +'three' +'four' +'five' +'one' +'two' +'three' +'NULL' +'five' +'ten' +'ten' +'nine' +'eight' +'ten' +'eleven' +'twelve' +'thirteen' +'str1' +'str2' +'str1' +'str2' +'one' +'two' +'three' +'four' +'five' +'one' +'two' +'three' +'NULL' +'five' +'ten' +'ten' +'nine' +'eight' +'ten' +'eleven' +'twelve' +'thirteen' +'str1' +'str2' +'str1' +'str2' +---- TYPES +string +==== +---- QUERY +# Filtering an unnested collection that comes from a UNION [ALL] is not supported, see +# IMPALA-12753. +with v0 as (select arr2 from complextypes_arrays + union all select arr2 from complextypes_arrays), + v1 as (select am.item from v0, v0.arr2 am) +select item from v1 where item="one"; +---- CATCH +AnalysisException: Filtering an unnested collection that comes from a UNION [ALL] is not supported yet. +==== +---- QUERY +# Filtering an unnested collection that comes from a UNION [ALL] is not supported, see +# IMPALA-12753. The query contains an extra inline view layer above the union. +with v0 as (select arr2 from complextypes_arrays + union all select arr2 from complextypes_arrays), + v1 as (select 1, arr2 from v0), + v2 as (select am.item from v1, v1.arr2 am) +select item from v2 where item="one"; +---- CATCH +AnalysisException: Filtering an unnested collection that comes from a UNION [ALL] is not supported yet. +==== +---- QUERY select id, int_array from (select id, int_array from complextypestbl) s; ---- RESULTS 1,'[1,2,3]' diff --git a/testdata/workloads/functional-query/queries/QueryTest/nested-map-in-select-list.test b/testdata/workloads/functional-query/queries/QueryTest/nested-map-in-select-list.test index e21747c08..efec3b7c5 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/nested-map-in-select-list.test +++ b/testdata/workloads/functional-query/queries/QueryTest/nested-map-in-select-list.test @@ -169,9 +169,6 @@ tinyint,string ==== ---- QUERY # Constants in the select list of unions also lead to a "non-pass-through" union. -select 1, int_map, int_map_array from complextypestbl - union all select 2, int_map, int_map_array from complextypestbl; - select 1, map_1d, map_3d from collection_tbl union all select 2, map_1d, map_3d from collection_tbl ---- RESULTS @@ -210,6 +207,40 @@ select 1 from (select int_map from complextypestbl) s tinyint ==== ---- QUERY +# Regression test for IMPALA-12695. +with v0 as (select int_map from complextypestbl + union all select int_map from complextypestbl), + v1 as (select m.key from v0, v0.int_map m) +select key from v1; +---- RESULTS +'k1' +'k1' +'k2' +'k1' +'k2' +'k1' +'k3' +'k1' +'k1' +'k2' +'k1' +'k2' +'k1' +'k3' +---- TYPES +string +==== +---- QUERY +# Filtering an unnested collection that comes from a UNION [ALL] is not supported, see +# IMPALA-12753. +with v0 as (select int_map from complextypestbl + union all select int_map from complextypestbl), + v1 as (select m.key from v0, v0.int_map m) +select key from v1 where key="k1"; +---- CATCH +AnalysisException: Filtering an unnested collection that comes from a UNION [ALL] is not supported yet. +==== +---- QUERY select id, int_map from (select id, int_map from complextypestbl) s; ---- RESULTS 1,'{"k1":1,"k2":100}' diff --git a/testdata/workloads/functional-query/queries/QueryTest/zipping-unnest-in-from-clause.test b/testdata/workloads/functional-query/queries/QueryTest/zipping-unnest-in-from-clause.test index 93b3a02f1..591e36af2 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/zipping-unnest-in-from-clause.test +++ b/testdata/workloads/functional-query/queries/QueryTest/zipping-unnest-in-from-clause.test @@ -409,3 +409,55 @@ where t1.id = 3 and t2.id < 3; 3,2,8,'NULL' ---- TYPES INT,INT,INT,STRING +==== +---- QUERY +with v as (select arr1 from complextypes_arrays + union all select arr1 from complextypes_arrays) +select arr1.item from v, unnest(v.arr1); +---- RESULTS +1 +2 +3 +4 +5 +1 +NULL +3 +4 +5 +10 +9 +8 +10 +10 +NULL +12 +1 +2 +1 +2 +3 +1 +2 +3 +4 +5 +1 +NULL +3 +4 +5 +10 +9 +8 +10 +10 +NULL +12 +1 +2 +1 +2 +3 +---- TYPES +INT diff --git a/testdata/workloads/functional-query/queries/QueryTest/zipping-unnest-in-select-list.test b/testdata/workloads/functional-query/queries/QueryTest/zipping-unnest-in-select-list.test index e36c05acf..d471b86ac 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/zipping-unnest-in-select-list.test +++ b/testdata/workloads/functional-query/queries/QueryTest/zipping-unnest-in-select-list.test @@ -230,3 +230,55 @@ where arr1.item < 3; 10,2,2 ---- TYPES INT,INT,INT +==== +---- QUERY +with v as (select arr1 from complextypes_arrays + union all select arr1 from complextypes_arrays) +select unnest(arr1) a from v +---- RESULTS +1 +2 +3 +4 +5 +1 +NULL +3 +4 +5 +10 +9 +8 +10 +10 +NULL +12 +1 +2 +1 +2 +3 +1 +2 +3 +4 +5 +1 +NULL +3 +4 +5 +10 +9 +8 +10 +10 +NULL +12 +1 +2 +1 +2 +3 +---- TYPES +INT
