This is an automated email from the ASF dual-hosted git repository. michaelsmith pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit 20d4ecf845f89011e098e50c789b7763ddc9c4b7 Author: fulili <[email protected]> AuthorDate: Fri Oct 20 11:06:29 2023 +0000 IMPALA-12509: Optimize the backend startup and planner time of large Iceberg table query Remove some objects from TTableDescriptor that will not be used by backend for iceberg table, which can significantly improve the speed of Query Plan and backend start for query in large Iceberg tables. Change-Id: I46a68f21903c16db3b02681feddb61ddce57879a Reviewed-on: http://gerrit.cloudera.org:8080/20601 Reviewed-by: Impala Public Jenkins <[email protected]> Tested-by: Impala Public Jenkins <[email protected]> --- .../java/org/apache/impala/catalog/FeCatalogUtils.java | 3 ++- .../java/org/apache/impala/catalog/FeIcebergTable.java | 12 ++++++++++-- .../impala/catalog/IcebergPositionDeleteTable.java | 4 +++- .../java/org/apache/impala/catalog/IcebergTable.java | 11 ++++++----- .../apache/impala/catalog/IcebergTimeTravelTable.java | 15 +++++++++------ .../impala/catalog/iceberg/IcebergCtasTarget.java | 10 +++++----- .../apache/impala/catalog/local/LocalIcebergTable.java | 17 +++++++---------- 7 files changed, 42 insertions(+), 30 deletions(-) diff --git a/fe/src/main/java/org/apache/impala/catalog/FeCatalogUtils.java b/fe/src/main/java/org/apache/impala/catalog/FeCatalogUtils.java index 6475199b4..50e1adbec 100644 --- a/fe/src/main/java/org/apache/impala/catalog/FeCatalogUtils.java +++ b/fe/src/main/java/org/apache/impala/catalog/FeCatalogUtils.java @@ -438,7 +438,8 @@ public abstract class FeCatalogUtils { res.setTable_type(TTableType.ICEBERG_TABLE); LocalIcebergTable iceTable = (LocalIcebergTable) table; res.setIceberg_table(FeIcebergTable.Utils.getTIcebergTable(iceTable)); - res.setHdfs_table(iceTable.transfromToTHdfsTable(/*unused*/true)); + res.setHdfs_table(iceTable.transformToTHdfsTable(/*unused*/true, + ThriftObjectType.FULL)); } else if (table instanceof LocalView) { res.setTable_type(TTableType.VIEW); // Metadata of the view are stored in msTable. Nothing else need to add here. diff --git a/fe/src/main/java/org/apache/impala/catalog/FeIcebergTable.java b/fe/src/main/java/org/apache/impala/catalog/FeIcebergTable.java index f23096c40..93449fc90 100644 --- a/fe/src/main/java/org/apache/impala/catalog/FeIcebergTable.java +++ b/fe/src/main/java/org/apache/impala/catalog/FeIcebergTable.java @@ -61,6 +61,7 @@ import org.apache.impala.analysis.IcebergPartitionSpec; import org.apache.impala.analysis.LiteralExpr; import org.apache.impala.analysis.TimeTravelSpec; import org.apache.impala.analysis.TimeTravelSpec.Kind; +import org.apache.impala.catalog.CatalogObject.ThriftObjectType; import org.apache.impala.catalog.HdfsPartition.FileBlock; import org.apache.impala.catalog.HdfsPartition.FileDescriptor; import org.apache.impala.catalog.iceberg.GroupedContentFiles; @@ -329,7 +330,7 @@ public interface FeIcebergTable extends FeFsTable { return false; } - THdfsTable transfromToTHdfsTable(boolean updatePartitionFlag); + THdfsTable transformToTHdfsTable(boolean updatePartitionFlag, ThriftObjectType type); /** * Returns the current snapshot id of the Iceberg API table if it exists, otherwise @@ -645,6 +646,11 @@ public interface FeIcebergTable extends FeFsTable { } public static TIcebergTable getTIcebergTable(FeIcebergTable icebergTable) { + return getTIcebergTable(icebergTable, ThriftObjectType.FULL); + } + + public static TIcebergTable getTIcebergTable(FeIcebergTable icebergTable, + ThriftObjectType type) { TIcebergTable tIcebergTable = new TIcebergTable(); tIcebergTable.setTable_location(icebergTable.getIcebergTableLocation()); @@ -654,7 +660,9 @@ public interface FeIcebergTable extends FeFsTable { tIcebergTable.setDefault_partition_spec_id( icebergTable.getDefaultPartitionSpecId()); - tIcebergTable.setContent_files(icebergTable.getContentFileStore().toThrift()); + if (type == ThriftObjectType.FULL) { + tIcebergTable.setContent_files(icebergTable.getContentFileStore().toThrift()); + } tIcebergTable.setCatalog_snapshot_id(icebergTable.snapshotId()); tIcebergTable.setParquet_compression_codec( diff --git a/fe/src/main/java/org/apache/impala/catalog/IcebergPositionDeleteTable.java b/fe/src/main/java/org/apache/impala/catalog/IcebergPositionDeleteTable.java index 1fc4aece7..7aad988cc 100644 --- a/fe/src/main/java/org/apache/impala/catalog/IcebergPositionDeleteTable.java +++ b/fe/src/main/java/org/apache/impala/catalog/IcebergPositionDeleteTable.java @@ -24,6 +24,7 @@ import java.util.Set; import org.apache.commons.lang.NotImplementedException; import org.apache.iceberg.Table; +import org.apache.impala.catalog.CatalogObject.ThriftObjectType; import org.apache.impala.catalog.HdfsPartition.FileDescriptor; import org.apache.impala.analysis.IcebergPartitionSpec; import org.apache.impala.thrift.TColumnStats; @@ -187,7 +188,8 @@ public class IcebergPositionDeleteTable extends VirtualTable implements FeIceber } @Override - public THdfsTable transfromToTHdfsTable(boolean updatePartitionFlag) { + public THdfsTable transformToTHdfsTable(boolean updatePartitionFlag, + ThriftObjectType type) { throw new IllegalStateException("not implemented here"); } diff --git a/fe/src/main/java/org/apache/impala/catalog/IcebergTable.java b/fe/src/main/java/org/apache/impala/catalog/IcebergTable.java index f30883788..c8e2ee618 100644 --- a/fe/src/main/java/org/apache/impala/catalog/IcebergTable.java +++ b/fe/src/main/java/org/apache/impala/catalog/IcebergTable.java @@ -331,7 +331,7 @@ public class IcebergTable extends Table implements FeIcebergTable { TTable table = super.toThrift(); table.setTable_type(TTableType.ICEBERG_TABLE); table.setIceberg_table(Utils.getTIcebergTable(this)); - table.setHdfs_table(transfromToTHdfsTable(true)); + table.setHdfs_table(transformToTHdfsTable(true, ThriftObjectType.FULL)); return table; } @@ -545,13 +545,14 @@ public class IcebergTable extends Table implements FeIcebergTable { Set<Long> referencedPartitions) { TTableDescriptor desc = new TTableDescriptor(tableId, TTableType.ICEBERG_TABLE, getTColumnDescriptors(), numClusteringCols_, name_, db_.getName()); - desc.setIcebergTable(Utils.getTIcebergTable(this)); - desc.setHdfsTable(transfromToTHdfsTable(false)); + desc.setIcebergTable(Utils.getTIcebergTable(this, ThriftObjectType.DESCRIPTOR_ONLY)); + desc.setHdfsTable(transformToTHdfsTable(false, ThriftObjectType.DESCRIPTOR_ONLY)); return desc; } - public THdfsTable transfromToTHdfsTable(boolean updatePartitionFlag) { - THdfsTable hdfsTable = hdfsTable_.getTHdfsTable(ThriftObjectType.FULL, null); + public THdfsTable transformToTHdfsTable(boolean updatePartitionFlag, + ThriftObjectType type) { + THdfsTable hdfsTable = hdfsTable_.getTHdfsTable(type, null); if (updatePartitionFlag) { // Iceberg table only has one THdfsPartition, we set this partition // file format by iceberg file format which depend on table properties diff --git a/fe/src/main/java/org/apache/impala/catalog/IcebergTimeTravelTable.java b/fe/src/main/java/org/apache/impala/catalog/IcebergTimeTravelTable.java index 521ac75fa..7f4d3d011 100644 --- a/fe/src/main/java/org/apache/impala/catalog/IcebergTimeTravelTable.java +++ b/fe/src/main/java/org/apache/impala/catalog/IcebergTimeTravelTable.java @@ -32,6 +32,7 @@ import org.apache.impala.analysis.IcebergPartitionSpec; import org.apache.impala.analysis.LiteralExpr; import org.apache.impala.analysis.TableName; import org.apache.impala.analysis.TimeTravelSpec; +import org.apache.impala.catalog.CatalogObject.ThriftObjectType; import org.apache.impala.common.AnalysisException; import org.apache.impala.common.FileSystemUtil; import org.apache.impala.common.ImpalaRuntimeException; @@ -166,8 +167,8 @@ public class IcebergTimeTravelTable int tableId, Set<Long> referencedPartitions) { TTableDescriptor desc = new TTableDescriptor(tableId, TTableType.ICEBERG_TABLE, getTColumnDescriptors(), 0, getName(), getDb().getName()); - desc.setIcebergTable(Utils.getTIcebergTable(this)); - desc.setHdfsTable(transfromToTHdfsTable(false)); + desc.setIcebergTable(Utils.getTIcebergTable(this, ThriftObjectType.DESCRIPTOR_ONLY)); + desc.setHdfsTable(transformToTHdfsTable(false, ThriftObjectType.DESCRIPTOR_ONLY)); return desc; } @@ -192,8 +193,9 @@ public class IcebergTimeTravelTable } @Override - public THdfsTable transfromToTHdfsTable(boolean updatePartitionFlag) { - return base_.transfromToTHdfsTable(updatePartitionFlag); + public THdfsTable transformToTHdfsTable(boolean updatePartitionFlag, + ThriftObjectType type) { + return base_.transformToTHdfsTable(updatePartitionFlag, type); } } @@ -452,8 +454,9 @@ class ForwardingFeIcebergTable implements FeIcebergTable { } @Override - public THdfsTable transfromToTHdfsTable(boolean updatePartitionFlag) { - return base.transfromToTHdfsTable(updatePartitionFlag); + public THdfsTable transformToTHdfsTable(boolean updatePartitionFlag, + ThriftObjectType type) { + return base.transformToTHdfsTable(updatePartitionFlag, type); } @Override diff --git a/fe/src/main/java/org/apache/impala/catalog/iceberg/IcebergCtasTarget.java b/fe/src/main/java/org/apache/impala/catalog/iceberg/IcebergCtasTarget.java index 67eb50525..e9806ac73 100644 --- a/fe/src/main/java/org/apache/impala/catalog/iceberg/IcebergCtasTarget.java +++ b/fe/src/main/java/org/apache/impala/catalog/iceberg/IcebergCtasTarget.java @@ -35,7 +35,7 @@ import org.apache.iceberg.types.TypeUtil; import org.apache.impala.analysis.ColumnDef; import org.apache.impala.analysis.IcebergPartitionSpec; import org.apache.impala.catalog.CatalogException; -import org.apache.impala.catalog.CatalogObject; +import org.apache.impala.catalog.CatalogObject.ThriftObjectType; import org.apache.impala.catalog.Column; import org.apache.impala.catalog.CtasTargetTable; import org.apache.impala.catalog.Db; @@ -49,7 +49,6 @@ import org.apache.impala.catalog.IcebergColumn; import org.apache.impala.catalog.IcebergContentFileStore; import org.apache.impala.catalog.IcebergStructField; import org.apache.impala.catalog.IcebergTable; -import org.apache.impala.catalog.HdfsPartition.FileDescriptor; import org.apache.impala.catalog.local.LocalDb; import org.apache.impala.catalog.local.LocalFsTable; import org.apache.impala.common.ImpalaRuntimeException; @@ -254,7 +253,8 @@ public class IcebergCtasTarget extends CtasTargetTable implements FeIcebergTable } @Override - public THdfsTable transfromToTHdfsTable(boolean updatePartitionFlag) { + public THdfsTable transformToTHdfsTable(boolean updatePartitionFlag, + ThriftObjectType type) { throw new IllegalStateException("not implemented here"); } @@ -290,7 +290,7 @@ public class IcebergCtasTarget extends CtasTargetTable implements FeIcebergTable getNumClusteringCols(), getName(), db_.getName()); - desc.setIcebergTable(Utils.getTIcebergTable(this)); + desc.setIcebergTable(Utils.getTIcebergTable(this, ThriftObjectType.DESCRIPTOR_ONLY)); desc.setHdfsTable(transformToTHdfsTable()); return desc; } @@ -305,7 +305,7 @@ public class IcebergCtasTarget extends CtasTargetTable implements FeIcebergTable private THdfsTable transformOldToTHdfsTable() { THdfsTable hdfsTable = ((HdfsTable)fsTable_).getTHdfsTable( - CatalogObject.ThriftObjectType.FULL, null); + ThriftObjectType.FULL, null); hdfsTable.setPrototype_partition(createPrototypePartition()); return hdfsTable; } diff --git a/fe/src/main/java/org/apache/impala/catalog/local/LocalIcebergTable.java b/fe/src/main/java/org/apache/impala/catalog/local/LocalIcebergTable.java index 78dfbd9f0..755055a7e 100644 --- a/fe/src/main/java/org/apache/impala/catalog/local/LocalIcebergTable.java +++ b/fe/src/main/java/org/apache/impala/catalog/local/LocalIcebergTable.java @@ -27,7 +27,7 @@ import java.util.Set; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.Table; import org.apache.impala.analysis.IcebergPartitionSpec; -import org.apache.impala.catalog.CatalogObject; +import org.apache.impala.catalog.CatalogObject.ThriftObjectType; import org.apache.impala.catalog.Column; import org.apache.impala.catalog.FeCatalogUtils; import org.apache.impala.catalog.FeFsPartition; @@ -253,17 +253,14 @@ public class LocalIcebergTable extends LocalTable implements FeIcebergTable { FeCatalogUtils.getTColumnDescriptors(this), getNumClusteringCols(), name_, db_.getName()); - desc.setIcebergTable(Utils.getTIcebergTable(this)); - desc.setHdfsTable(transfromToTHdfsTable()); + desc.setIcebergTable(Utils.getTIcebergTable(this, ThriftObjectType.DESCRIPTOR_ONLY)); + desc.setHdfsTable(transformToTHdfsTable(false, ThriftObjectType.DESCRIPTOR_ONLY)); return desc; } @Override - public THdfsTable transfromToTHdfsTable(boolean updatePartitionFlag) { - return this.transfromToTHdfsTable(); - } - - private THdfsTable transfromToTHdfsTable() { + public THdfsTable transformToTHdfsTable(boolean updatePartitionFlag, + ThriftObjectType type) { Map<Long, THdfsPartition> idToPartition = new HashMap<>(); // LocalFsTable transformed from iceberg table only has one partition Collection<? extends FeFsPartition> partitions = @@ -272,11 +269,11 @@ public class LocalIcebergTable extends LocalTable implements FeIcebergTable { FeFsPartition partition = (FeFsPartition) partitions.toArray()[0]; idToPartition.put(partition.getId(), FeCatalogUtils.fsPartitionToThrift(partition, - CatalogObject.ThriftObjectType.DESCRIPTOR_ONLY)); + ThriftObjectType.DESCRIPTOR_ONLY)); THdfsPartition tPrototypePartition = FeCatalogUtils.fsPartitionToThrift( localFsTable_.createPrototypePartition(), - CatalogObject.ThriftObjectType.DESCRIPTOR_ONLY); + ThriftObjectType.DESCRIPTOR_ONLY); THdfsTable hdfsTable = new THdfsTable(localFsTable_.getHdfsBaseDir(), getColumnNames(), localFsTable_.getNullPartitionKeyValue(), FeFsTable.DEFAULT_NULL_COLUMN_VALUE, idToPartition, tPrototypePartition);
