This is an automated email from the ASF dual-hosted git repository. laszlog pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit 74617537b5c805327349ef0ac5c79b84dc1e786d Author: Tamas Mate <[email protected]> AuthorDate: Thu Jan 11 14:26:54 2024 +0100 IMPALA-12706: Fix nested struct querying for Iceberg metadata tables This commit fixes a DCHECK failure when querying a struct inside a struct. The previous field accessor creation logic was trying to find the ColumnDescriptor for a struct inside a struct and hit a DCHECK because there are no ColumnDescriptors for struct fields. The logic has been reworked to only use ColumnDescriptors for top level columns. Testing: - Added E2E test to cover this case Change-Id: Iadd029a4edc500bd8d8fca3f958903c2dbe09e8e Reviewed-on: http://gerrit.cloudera.org:8080/20883 Reviewed-by: Impala Public Jenkins <[email protected]> Tested-by: Impala Public Jenkins <[email protected]> --- .../iceberg-metadata/iceberg-metadata-scan-node.cc | 33 ++++++++++------------ .../iceberg-metadata/iceberg-metadata-scan-node.h | 8 +++--- .../queries/QueryTest/iceberg-metadata-tables.test | 10 +++++++ 3 files changed, 29 insertions(+), 22 deletions(-) diff --git a/be/src/exec/iceberg-metadata/iceberg-metadata-scan-node.cc b/be/src/exec/iceberg-metadata/iceberg-metadata-scan-node.cc index d779992fb..5210b7cd1 100644 --- a/be/src/exec/iceberg-metadata/iceberg-metadata-scan-node.cc +++ b/be/src/exec/iceberg-metadata/iceberg-metadata-scan-node.cc @@ -91,30 +91,27 @@ Status IcebergMetadataScanNode::CreateFieldAccessors() { JNIEnv* env = JniUtil::GetJNIEnv(); if (env == nullptr) return Status("Failed to get/create JVM"); for (SlotDescriptor* slot_desc: tuple_desc_->slots()) { - if (slot_desc->type().IsStructType()) { - // Get the top level struct's field id from the ColumnDescriptor then recursively - // get the field ids for struct fields - int field_id = tuple_desc_->table_desc()->GetColumnDesc(slot_desc).field_id(); - RETURN_IF_ERROR(AddAccessorForFieldId(env, field_id, slot_desc->id())); - RETURN_IF_ERROR(CreateFieldAccessors(env, slot_desc)); - } else if (slot_desc->col_path().size() > 1) { - DCHECK(!slot_desc->type().IsComplexType()); - // Slot that is child of a struct without tuple, can occur when a struct member is - // in the select list. ColumnType has a tree structure, and this loop finds the - // STRUCT node that stores the primitive type. Because, that struct node has the - // field id list of its childs. + int field_id = -1; + if (slot_desc->col_path().size() == 1) { + // Top level slots have ColumnDescriptors that store the field ids. + field_id = tuple_desc_->table_desc()->GetColumnDesc(slot_desc).field_id(); + } else { + // Non top level slots are fields of a nested type. This code path is to handle + // slots that does not have their nested type's tuple available. + // This loop finds the struct ColumnType node that stores the slot as it has the + // field id list of its children. int root_type_index = slot_desc->col_path()[0]; ColumnType* current_type = &const_cast<ColumnType&>( tuple_desc_->table_desc()->col_descs()[root_type_index].type()); for (int i = 1; i < slot_desc->col_path().size() - 1; ++i) { current_type = ¤t_type->children[slot_desc->col_path()[i]]; } - int field_id = current_type->field_ids[slot_desc->col_path().back()]; - RETURN_IF_ERROR(AddAccessorForFieldId(env, field_id, slot_desc->id())); - } else { - // For primitives in the top level tuple, use the ColumnDescriptor - int field_id = tuple_desc_->table_desc()->GetColumnDesc(slot_desc).field_id(); - RETURN_IF_ERROR(AddAccessorForFieldId(env, field_id, slot_desc->id())); + field_id = current_type->field_ids[slot_desc->col_path().back()]; + } + DCHECK_NE(field_id, -1); + RETURN_IF_ERROR(AddAccessorForFieldId(env, field_id, slot_desc->id())); + if (slot_desc->type().IsStructType()) { + RETURN_IF_ERROR(CreateFieldAccessors(env, slot_desc)); } } return Status::OK(); diff --git a/be/src/exec/iceberg-metadata/iceberg-metadata-scan-node.h b/be/src/exec/iceberg-metadata/iceberg-metadata-scan-node.h index 64f1d3aa5..ae15efbd4 100644 --- a/be/src/exec/iceberg-metadata/iceberg-metadata-scan-node.h +++ b/be/src/exec/iceberg-metadata/iceberg-metadata-scan-node.h @@ -121,10 +121,10 @@ class IcebergMetadataScanNode : public ScanNode { Status GetCatalogTable(jobject* jtable); /// Populates the jaccessors_ map by creating the accessors for the columns in the JVM. - /// To create a field accessor for a column the Iceberg field id is needed. For - /// primitive type columns that are not a field of a struct, this can be found in the - /// ColumnDescriptor. However, ColumnDescriptors are not available for struct fields, - /// in this case the SlotDescriptor can be used. + /// To create a field accessor for a column the Iceberg field id is needed. For columns + /// that are not a field of a struct, this can be found in the ColumnDescriptor. + /// However, ColumnDescriptors are not available for struct fields, in this case the + /// ColumnType of the SlotDescriptor can be used. Status CreateFieldAccessors(); /// Recursive part of the Accessor collection, when there is a struct in the tuple. diff --git a/testdata/workloads/functional-query/queries/QueryTest/iceberg-metadata-tables.test b/testdata/workloads/functional-query/queries/QueryTest/iceberg-metadata-tables.test index 0f58dae99..a559e13b6 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/iceberg-metadata-tables.test +++ b/testdata/workloads/functional-query/queries/QueryTest/iceberg-metadata-tables.test @@ -470,6 +470,16 @@ select readable_metrics from functional_parquet.iceberg_query_metadata.entries; STRING ==== ---- QUERY +select readable_metrics.i from functional_parquet.iceberg_query_metadata.entries; +---- RESULTS +'{"column_size":47,"value_count":1,"null_value_count":0,"nan_value_count":null,"lower_bound":3,"upper_bound":3}' +'{"column_size":47,"value_count":1,"null_value_count":0,"nan_value_count":null,"lower_bound":2,"upper_bound":2}' +'{"column_size":47,"value_count":1,"null_value_count":0,"nan_value_count":null,"lower_bound":1,"upper_bound":1}' +'{"column_size":null,"value_count":null,"null_value_count":null,"nan_value_count":null,"lower_bound":null,"upper_bound":null}' +---- TYPES +STRING +==== +---- QUERY select snapshot_id, readable_metrics from functional_parquet.iceberg_query_metadata.entries; ---- RESULTS row_regex:[1-9]\d*|0,'{"i":{"column_size":47,"value_count":1,"null_value_count":0,"nan_value_count":null,"lower_bound":3,"upper_bound":3}}'
