This is an automated email from the ASF dual-hosted git repository. stigahuang pushed a commit to branch branch-4.4.1 in repository https://gitbox.apache.org/repos/asf/impala.git
commit 53a0b89647cc109fc030f7111ea33337e02f702b Author: Daniel Becker <[email protected]> AuthorDate: Mon Aug 5 12:39:19 2024 +0200 IMPALA-13272: Analytic function of collections can lead to crash The following query leads to DCHECK in debug builds and may cause more subtle issues in RELEASE builds: select row_no from ( select arr.small, row_number() over (order by arr.inner_struct1.str) as row_no from functional_parquet.collection_struct_mix t, t.arr_contains_nested_struct arr ) res; The problem is in AnalyticPlanner.createSortInfo(). Because it is an array unnesting operation, there are two tuples from which we try to add slot descriptors to the sorting tuple: the array item tuple (which we'll need) and the main tuple (which we don't actually need). The main tuple contains the slot desc for the array. It is marked as materialised, so we add it to the sorting tuple, but its child 'small' is not materialised, which leads to the error. This change solves the problem by only adding slot descs to the sorting tuple if they are fully materialised, i.e. they and all their children recursively are also materialised. Testing: - added test queries in sort-complex.test. Change-Id: I71d1fa28ad4ff2e1a8fc5b91d3fc271c33765190 Reviewed-on: http://gerrit.cloudera.org:8080/21643 Reviewed-by: Impala Public Jenkins <[email protected]> Tested-by: Impala Public Jenkins <[email protected]> --- .../java/org/apache/impala/analysis/Analyzer.java | 2 +- .../org/apache/impala/analysis/SlotDescriptor.java | 36 ++++++++++--- .../java/org/apache/impala/analysis/SortInfo.java | 2 +- .../apache/impala/analysis/TupleDescriptor.java | 2 +- .../org/apache/impala/planner/AnalyticPlanner.java | 11 +++- .../queries/QueryTest/sort-complex.test | 60 ++++++++++++++++++++++ 6 files changed, 102 insertions(+), 11 deletions(-) diff --git a/fe/src/main/java/org/apache/impala/analysis/Analyzer.java b/fe/src/main/java/org/apache/impala/analysis/Analyzer.java index 9dd55bea6..3796f32c5 100644 --- a/fe/src/main/java/org/apache/impala/analysis/Analyzer.java +++ b/fe/src/main/java/org/apache/impala/analysis/Analyzer.java @@ -1792,7 +1792,7 @@ public class Analyzer { Preconditions.checkState(collTblRef.getCollectionExpr() instanceof SlotRef); SlotDescriptor desc = ((SlotRef) collTblRef.getCollectionExpr()).getDesc(); - desc.setIsMaterializedRecursively(true); + desc.setShouldMaterializeRecursively(true); try (TupleStackGuard guard = new TupleStackGuard(desc.getItemTupleDesc())) { if (slotPath.destType().isArrayType()) { diff --git a/fe/src/main/java/org/apache/impala/analysis/SlotDescriptor.java b/fe/src/main/java/org/apache/impala/analysis/SlotDescriptor.java index 7f2496636..5d42503ad 100644 --- a/fe/src/main/java/org/apache/impala/analysis/SlotDescriptor.java +++ b/fe/src/main/java/org/apache/impala/analysis/SlotDescriptor.java @@ -66,7 +66,7 @@ public class SlotDescriptor { private boolean isMaterialized_ = false; // If true, then computeMemLayout() should be recursively called for itemTupleDesc_. - private boolean materializeRecursively_ = false; + private boolean shouldMaterializeRecursively_ = false; // if false, this slot cannot be NULL // Note: it is still possible that a SlotRef pointing to this descriptor could have a @@ -139,15 +139,39 @@ public class SlotDescriptor { isMaterialized_ = value; LOG.trace("Mark slot(sid={}) of tuple(tid={}) as {}materialized", id_, parent_.getId(), isMaterialized_ ? "" : "non-"); - if (materializeRecursively_) { + if (shouldMaterializeRecursively_) { Preconditions.checkState(itemTupleDesc_ != null); itemTupleDesc_.materializeSlots(); } } - public boolean isMaterializedRecursively() { return materializeRecursively_; } - public void setIsMaterializedRecursively(boolean value) { - materializeRecursively_ = value; + public boolean shouldMaterializeRecursively() { + return shouldMaterializeRecursively_; } + + public void setShouldMaterializeRecursively(boolean value) { + shouldMaterializeRecursively_ = value; + } + + /** + * Returns true iff this SlotDescriptor and all its children (if any) are materialized, + * recursively. + * This is relevant in case of complex types, for example when only a subset of the + * fields of a struct is queried - in this case the queried fields are materialized, the + * other fields are not materialized, but the struct itself also needs to be marked as + * materialized because otherwise no memory layout can be calculated for the fields. + */ + public boolean isFullyMaterialized() { + if (!isMaterialized()) return false; + if (itemTupleDesc_ != null) { + Preconditions.checkState(type_.isComplexType()); + for (SlotDescriptor childSlotDesc : itemTupleDesc_.getSlots()) { + if (!childSlotDesc.isFullyMaterialized()) return false; + } + } + + return true; + } + public boolean getIsNullable() { return isNullable_; } public void setIsNullable(boolean value) { isNullable_ = value; } public int getByteSize() { return byteSize_; } @@ -487,7 +511,7 @@ public class SlotDescriptor { .add("label", label_) .add("type", typeStr) .add("materialized", isMaterialized_) - .add("materializeRecursively", materializeRecursively_) + .add("shouldMaterializeRecursively", shouldMaterializeRecursively_) .add("byteSize", byteSize_) .add("byteOffset", byteOffset_) .add("nullable", isNullable_) diff --git a/fe/src/main/java/org/apache/impala/analysis/SortInfo.java b/fe/src/main/java/org/apache/impala/analysis/SortInfo.java index b15eabbbb..2b7c6c00e 100644 --- a/fe/src/main/java/org/apache/impala/analysis/SortInfo.java +++ b/fe/src/main/java/org/apache/impala/analysis/SortInfo.java @@ -276,7 +276,7 @@ public class SortInfo { Preconditions.checkNotNull(null); } } else if (dstType.isCollectionType()) { - dstSlotDesc.setIsMaterializedRecursively(true); + dstSlotDesc.setShouldMaterializeRecursively(true); } outputSmap_.put(srcExpr.clone(), dstExpr); materializedExprs_.add(srcExpr); diff --git a/fe/src/main/java/org/apache/impala/analysis/TupleDescriptor.java b/fe/src/main/java/org/apache/impala/analysis/TupleDescriptor.java index 5d81d40c3..cbcd4f6ca 100644 --- a/fe/src/main/java/org/apache/impala/analysis/TupleDescriptor.java +++ b/fe/src/main/java/org/apache/impala/analysis/TupleDescriptor.java @@ -441,7 +441,7 @@ public class TupleDescriptor { nullIndicatorByte = nullIndicators.first; nullIndicatorBit = nullIndicators.second; } - if (d.getType().isCollectionType() && d.isMaterializedRecursively()) { + if (d.getType().isCollectionType() && d.shouldMaterializeRecursively()) { d.getItemTupleDesc().computeMemLayout(); } } diff --git a/fe/src/main/java/org/apache/impala/planner/AnalyticPlanner.java b/fe/src/main/java/org/apache/impala/planner/AnalyticPlanner.java index 104d40a83..12f3c854f 100644 --- a/fe/src/main/java/org/apache/impala/planner/AnalyticPlanner.java +++ b/fe/src/main/java/org/apache/impala/planner/AnalyticPlanner.java @@ -320,7 +320,7 @@ public class AnalyticPlanner { return createSortInfo(input, sortExprs, isAsc, nullsFirst, TSortingOrder.LEXICAL); } - /** + /** * Same as above, but with extra parameter, sorting order. */ private SortInfo createSortInfo(PlanNode input, List<Expr> sortExprs, @@ -329,7 +329,14 @@ public class AnalyticPlanner { for (TupleId tid: input.getTupleIds()) { TupleDescriptor tupleDesc = analyzer_.getTupleDesc(tid); for (SlotDescriptor inputSlotDesc: tupleDesc.getSlots()) { - if (inputSlotDesc.isMaterialized()) { + // Only add fully materialized slot descriptors. It is possible that a slot desc + // shows up that is materialized but not all of its children are, see + // IMPALA-13272. This can happen for example if only a subset of the fields of a + // struct is queried - the overall struct needs to be marked materialized to allow + // the required children to be materialized where they are needed, but the struct + // as a whole does not need to be added to the sorting tuple (or any other tuple) + // - the required fields are added separately in their own right. + if (inputSlotDesc.isFullyMaterialized()) { // Project out collection slots that are not supported in the sorting tuple // (collections within structs). if (SortInfo.isValidInSortingTuple(inputSlotDesc.getType())) { diff --git a/testdata/workloads/functional-query/queries/QueryTest/sort-complex.test b/testdata/workloads/functional-query/queries/QueryTest/sort-complex.test index 0693c5b24..68b84bbd1 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/sort-complex.test +++ b/testdata/workloads/functional-query/queries/QueryTest/sort-complex.test @@ -151,6 +151,66 @@ select id, arr_contains_nested_struct from collection_struct_mix order by id; INT,STRING ==== ---- QUERY +# Regression test for # IMPALA-13272 +select + row_no +from ( + select + arr.small, + row_number() over (order by arr.inner_struct1.str) as row_no + from collection_struct_mix t, t.arr_contains_nested_struct arr +) res; +---- RESULTS +1 +2 +3 +4 +5 +6 +---- TYPES +BIGINT +==== +---- QUERY +# Regression test for # IMPALA-13272 +select + row_no, str +from ( + select + arr.small, + arr.inner_struct1.str str, + row_number() over (order by arr.inner_struct1.str) as row_no + from collection_struct_mix t, t.arr_contains_nested_struct arr +) res; +---- RESULTS +1,'' +2,'a few soju distilleries' +3,'NULL' +4,'NULL' +5,'NULL' +6,'NULL' +---- TYPES +BIGINT,STRING +==== +---- QUERY +# Regression test for # IMPALA-13272 +select + row_no, str, small +from ( + select + arr.small, + arr.inner_struct2.str str, + row_number() over (order by arr.inner_struct2.str) as row_no + from collection_struct_mix t, t.arr_contains_nested_struct arr +) res limit 4; +---- RESULTS +1,'four spaceship captains',2 +2,'lots of soju distilleries',105 +3,'more spaceship captains',20 +4,'very few distilleries',104 +---- TYPES +BIGINT,STRING, SMALLINT +==== +---- QUERY # Sorting is not supported yet for collections within structs: IMPALA-12160. Test with a # struct that contains an array. select id, struct_contains_arr from collection_struct_mix order by id
