This is an automated email from the ASF dual-hosted git repository.

michaelsmith pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 20d4ecf845f89011e098e50c789b7763ddc9c4b7
Author: fulili <[email protected]>
AuthorDate: Fri Oct 20 11:06:29 2023 +0000

    IMPALA-12509: Optimize the backend startup and planner time of large 
Iceberg table query
    
    Remove some objects from TTableDescriptor that will not be used by
    backend for iceberg table, which can significantly improve the speed
    of Query Plan and backend start for query in large Iceberg tables.
    
    Change-Id: I46a68f21903c16db3b02681feddb61ddce57879a
    Reviewed-on: http://gerrit.cloudera.org:8080/20601
    Reviewed-by: Impala Public Jenkins <[email protected]>
    Tested-by: Impala Public Jenkins <[email protected]>
---
 .../java/org/apache/impala/catalog/FeCatalogUtils.java  |  3 ++-
 .../java/org/apache/impala/catalog/FeIcebergTable.java  | 12 ++++++++++--
 .../impala/catalog/IcebergPositionDeleteTable.java      |  4 +++-
 .../java/org/apache/impala/catalog/IcebergTable.java    | 11 ++++++-----
 .../apache/impala/catalog/IcebergTimeTravelTable.java   | 15 +++++++++------
 .../impala/catalog/iceberg/IcebergCtasTarget.java       | 10 +++++-----
 .../apache/impala/catalog/local/LocalIcebergTable.java  | 17 +++++++----------
 7 files changed, 42 insertions(+), 30 deletions(-)

diff --git a/fe/src/main/java/org/apache/impala/catalog/FeCatalogUtils.java 
b/fe/src/main/java/org/apache/impala/catalog/FeCatalogUtils.java
index 6475199b4..50e1adbec 100644
--- a/fe/src/main/java/org/apache/impala/catalog/FeCatalogUtils.java
+++ b/fe/src/main/java/org/apache/impala/catalog/FeCatalogUtils.java
@@ -438,7 +438,8 @@ public abstract class FeCatalogUtils {
       res.setTable_type(TTableType.ICEBERG_TABLE);
       LocalIcebergTable iceTable = (LocalIcebergTable) table;
       res.setIceberg_table(FeIcebergTable.Utils.getTIcebergTable(iceTable));
-      res.setHdfs_table(iceTable.transfromToTHdfsTable(/*unused*/true));
+      res.setHdfs_table(iceTable.transformToTHdfsTable(/*unused*/true,
+          ThriftObjectType.FULL));
     } else if (table instanceof LocalView) {
       res.setTable_type(TTableType.VIEW);
       // Metadata of the view are stored in msTable. Nothing else need to add 
here.
diff --git a/fe/src/main/java/org/apache/impala/catalog/FeIcebergTable.java 
b/fe/src/main/java/org/apache/impala/catalog/FeIcebergTable.java
index f23096c40..93449fc90 100644
--- a/fe/src/main/java/org/apache/impala/catalog/FeIcebergTable.java
+++ b/fe/src/main/java/org/apache/impala/catalog/FeIcebergTable.java
@@ -61,6 +61,7 @@ import org.apache.impala.analysis.IcebergPartitionSpec;
 import org.apache.impala.analysis.LiteralExpr;
 import org.apache.impala.analysis.TimeTravelSpec;
 import org.apache.impala.analysis.TimeTravelSpec.Kind;
+import org.apache.impala.catalog.CatalogObject.ThriftObjectType;
 import org.apache.impala.catalog.HdfsPartition.FileBlock;
 import org.apache.impala.catalog.HdfsPartition.FileDescriptor;
 import org.apache.impala.catalog.iceberg.GroupedContentFiles;
@@ -329,7 +330,7 @@ public interface FeIcebergTable extends FeFsTable {
     return false;
   }
 
-  THdfsTable transfromToTHdfsTable(boolean updatePartitionFlag);
+  THdfsTable transformToTHdfsTable(boolean updatePartitionFlag, 
ThriftObjectType type);
 
   /**
    * Returns the current snapshot id of the Iceberg API table if it exists, 
otherwise
@@ -645,6 +646,11 @@ public interface FeIcebergTable extends FeFsTable {
     }
 
     public static TIcebergTable getTIcebergTable(FeIcebergTable icebergTable) {
+      return getTIcebergTable(icebergTable, ThriftObjectType.FULL);
+    }
+
+    public static TIcebergTable getTIcebergTable(FeIcebergTable icebergTable,
+        ThriftObjectType type) {
       TIcebergTable tIcebergTable = new TIcebergTable();
       tIcebergTable.setTable_location(icebergTable.getIcebergTableLocation());
 
@@ -654,7 +660,9 @@ public interface FeIcebergTable extends FeFsTable {
       tIcebergTable.setDefault_partition_spec_id(
           icebergTable.getDefaultPartitionSpecId());
 
-      
tIcebergTable.setContent_files(icebergTable.getContentFileStore().toThrift());
+      if (type == ThriftObjectType.FULL) {
+        
tIcebergTable.setContent_files(icebergTable.getContentFileStore().toThrift());
+      }
 
       tIcebergTable.setCatalog_snapshot_id(icebergTable.snapshotId());
       tIcebergTable.setParquet_compression_codec(
diff --git 
a/fe/src/main/java/org/apache/impala/catalog/IcebergPositionDeleteTable.java 
b/fe/src/main/java/org/apache/impala/catalog/IcebergPositionDeleteTable.java
index 1fc4aece7..7aad988cc 100644
--- a/fe/src/main/java/org/apache/impala/catalog/IcebergPositionDeleteTable.java
+++ b/fe/src/main/java/org/apache/impala/catalog/IcebergPositionDeleteTable.java
@@ -24,6 +24,7 @@ import java.util.Set;
 
 import org.apache.commons.lang.NotImplementedException;
 import org.apache.iceberg.Table;
+import org.apache.impala.catalog.CatalogObject.ThriftObjectType;
 import org.apache.impala.catalog.HdfsPartition.FileDescriptor;
 import org.apache.impala.analysis.IcebergPartitionSpec;
 import org.apache.impala.thrift.TColumnStats;
@@ -187,7 +188,8 @@ public class IcebergPositionDeleteTable extends 
VirtualTable implements FeIceber
   }
 
   @Override
-  public THdfsTable transfromToTHdfsTable(boolean updatePartitionFlag) {
+  public THdfsTable transformToTHdfsTable(boolean updatePartitionFlag,
+      ThriftObjectType type) {
     throw new IllegalStateException("not implemented here");
   }
 
diff --git a/fe/src/main/java/org/apache/impala/catalog/IcebergTable.java 
b/fe/src/main/java/org/apache/impala/catalog/IcebergTable.java
index f30883788..c8e2ee618 100644
--- a/fe/src/main/java/org/apache/impala/catalog/IcebergTable.java
+++ b/fe/src/main/java/org/apache/impala/catalog/IcebergTable.java
@@ -331,7 +331,7 @@ public class IcebergTable extends Table implements 
FeIcebergTable {
     TTable table = super.toThrift();
     table.setTable_type(TTableType.ICEBERG_TABLE);
     table.setIceberg_table(Utils.getTIcebergTable(this));
-    table.setHdfs_table(transfromToTHdfsTable(true));
+    table.setHdfs_table(transformToTHdfsTable(true, ThriftObjectType.FULL));
     return table;
   }
 
@@ -545,13 +545,14 @@ public class IcebergTable extends Table implements 
FeIcebergTable {
       Set<Long> referencedPartitions) {
     TTableDescriptor desc = new TTableDescriptor(tableId, 
TTableType.ICEBERG_TABLE,
         getTColumnDescriptors(), numClusteringCols_, name_, db_.getName());
-    desc.setIcebergTable(Utils.getTIcebergTable(this));
-    desc.setHdfsTable(transfromToTHdfsTable(false));
+    desc.setIcebergTable(Utils.getTIcebergTable(this, 
ThriftObjectType.DESCRIPTOR_ONLY));
+    desc.setHdfsTable(transformToTHdfsTable(false, 
ThriftObjectType.DESCRIPTOR_ONLY));
     return desc;
   }
 
-  public THdfsTable transfromToTHdfsTable(boolean updatePartitionFlag) {
-    THdfsTable hdfsTable = hdfsTable_.getTHdfsTable(ThriftObjectType.FULL, 
null);
+  public THdfsTable transformToTHdfsTable(boolean updatePartitionFlag,
+      ThriftObjectType type) {
+    THdfsTable hdfsTable = hdfsTable_.getTHdfsTable(type, null);
     if (updatePartitionFlag) {
       // Iceberg table only has one THdfsPartition, we set this partition
       // file format by iceberg file format which depend on table properties
diff --git 
a/fe/src/main/java/org/apache/impala/catalog/IcebergTimeTravelTable.java 
b/fe/src/main/java/org/apache/impala/catalog/IcebergTimeTravelTable.java
index 521ac75fa..7f4d3d011 100644
--- a/fe/src/main/java/org/apache/impala/catalog/IcebergTimeTravelTable.java
+++ b/fe/src/main/java/org/apache/impala/catalog/IcebergTimeTravelTable.java
@@ -32,6 +32,7 @@ import org.apache.impala.analysis.IcebergPartitionSpec;
 import org.apache.impala.analysis.LiteralExpr;
 import org.apache.impala.analysis.TableName;
 import org.apache.impala.analysis.TimeTravelSpec;
+import org.apache.impala.catalog.CatalogObject.ThriftObjectType;
 import org.apache.impala.common.AnalysisException;
 import org.apache.impala.common.FileSystemUtil;
 import org.apache.impala.common.ImpalaRuntimeException;
@@ -166,8 +167,8 @@ public class IcebergTimeTravelTable
       int tableId, Set<Long> referencedPartitions) {
     TTableDescriptor desc = new TTableDescriptor(tableId, 
TTableType.ICEBERG_TABLE,
         getTColumnDescriptors(), 0, getName(), getDb().getName());
-    desc.setIcebergTable(Utils.getTIcebergTable(this));
-    desc.setHdfsTable(transfromToTHdfsTable(false));
+    desc.setIcebergTable(Utils.getTIcebergTable(this, 
ThriftObjectType.DESCRIPTOR_ONLY));
+    desc.setHdfsTable(transformToTHdfsTable(false, 
ThriftObjectType.DESCRIPTOR_ONLY));
     return desc;
   }
 
@@ -192,8 +193,9 @@ public class IcebergTimeTravelTable
   }
 
   @Override
-  public THdfsTable transfromToTHdfsTable(boolean updatePartitionFlag) {
-    return base_.transfromToTHdfsTable(updatePartitionFlag);
+  public THdfsTable transformToTHdfsTable(boolean updatePartitionFlag,
+      ThriftObjectType type) {
+    return base_.transformToTHdfsTable(updatePartitionFlag, type);
   }
 }
 
@@ -452,8 +454,9 @@ class ForwardingFeIcebergTable implements FeIcebergTable {
   }
 
   @Override
-  public THdfsTable transfromToTHdfsTable(boolean updatePartitionFlag) {
-    return base.transfromToTHdfsTable(updatePartitionFlag);
+  public THdfsTable transformToTHdfsTable(boolean updatePartitionFlag,
+      ThriftObjectType type) {
+    return base.transformToTHdfsTable(updatePartitionFlag, type);
   }
 
   @Override
diff --git 
a/fe/src/main/java/org/apache/impala/catalog/iceberg/IcebergCtasTarget.java 
b/fe/src/main/java/org/apache/impala/catalog/iceberg/IcebergCtasTarget.java
index 67eb50525..e9806ac73 100644
--- a/fe/src/main/java/org/apache/impala/catalog/iceberg/IcebergCtasTarget.java
+++ b/fe/src/main/java/org/apache/impala/catalog/iceberg/IcebergCtasTarget.java
@@ -35,7 +35,7 @@ import org.apache.iceberg.types.TypeUtil;
 import org.apache.impala.analysis.ColumnDef;
 import org.apache.impala.analysis.IcebergPartitionSpec;
 import org.apache.impala.catalog.CatalogException;
-import org.apache.impala.catalog.CatalogObject;
+import org.apache.impala.catalog.CatalogObject.ThriftObjectType;
 import org.apache.impala.catalog.Column;
 import org.apache.impala.catalog.CtasTargetTable;
 import org.apache.impala.catalog.Db;
@@ -49,7 +49,6 @@ import org.apache.impala.catalog.IcebergColumn;
 import org.apache.impala.catalog.IcebergContentFileStore;
 import org.apache.impala.catalog.IcebergStructField;
 import org.apache.impala.catalog.IcebergTable;
-import org.apache.impala.catalog.HdfsPartition.FileDescriptor;
 import org.apache.impala.catalog.local.LocalDb;
 import org.apache.impala.catalog.local.LocalFsTable;
 import org.apache.impala.common.ImpalaRuntimeException;
@@ -254,7 +253,8 @@ public class IcebergCtasTarget extends CtasTargetTable 
implements FeIcebergTable
   }
 
   @Override
-  public THdfsTable transfromToTHdfsTable(boolean updatePartitionFlag) {
+  public THdfsTable transformToTHdfsTable(boolean updatePartitionFlag,
+      ThriftObjectType type) {
     throw new IllegalStateException("not implemented here");
   }
 
@@ -290,7 +290,7 @@ public class IcebergCtasTarget extends CtasTargetTable 
implements FeIcebergTable
         getNumClusteringCols(),
         getName(), db_.getName());
 
-    desc.setIcebergTable(Utils.getTIcebergTable(this));
+    desc.setIcebergTable(Utils.getTIcebergTable(this, 
ThriftObjectType.DESCRIPTOR_ONLY));
     desc.setHdfsTable(transformToTHdfsTable());
     return desc;
   }
@@ -305,7 +305,7 @@ public class IcebergCtasTarget extends CtasTargetTable 
implements FeIcebergTable
 
   private THdfsTable transformOldToTHdfsTable() {
     THdfsTable hdfsTable = ((HdfsTable)fsTable_).getTHdfsTable(
-        CatalogObject.ThriftObjectType.FULL, null);
+        ThriftObjectType.FULL, null);
     hdfsTable.setPrototype_partition(createPrototypePartition());
     return hdfsTable;
   }
diff --git 
a/fe/src/main/java/org/apache/impala/catalog/local/LocalIcebergTable.java 
b/fe/src/main/java/org/apache/impala/catalog/local/LocalIcebergTable.java
index 78dfbd9f0..755055a7e 100644
--- a/fe/src/main/java/org/apache/impala/catalog/local/LocalIcebergTable.java
+++ b/fe/src/main/java/org/apache/impala/catalog/local/LocalIcebergTable.java
@@ -27,7 +27,7 @@ import java.util.Set;
 import org.apache.hadoop.hive.metastore.api.FieldSchema;
 import org.apache.hadoop.hive.metastore.api.Table;
 import org.apache.impala.analysis.IcebergPartitionSpec;
-import org.apache.impala.catalog.CatalogObject;
+import org.apache.impala.catalog.CatalogObject.ThriftObjectType;
 import org.apache.impala.catalog.Column;
 import org.apache.impala.catalog.FeCatalogUtils;
 import org.apache.impala.catalog.FeFsPartition;
@@ -253,17 +253,14 @@ public class LocalIcebergTable extends LocalTable 
implements FeIcebergTable {
         FeCatalogUtils.getTColumnDescriptors(this),
         getNumClusteringCols(),
         name_, db_.getName());
-    desc.setIcebergTable(Utils.getTIcebergTable(this));
-    desc.setHdfsTable(transfromToTHdfsTable());
+    desc.setIcebergTable(Utils.getTIcebergTable(this, 
ThriftObjectType.DESCRIPTOR_ONLY));
+    desc.setHdfsTable(transformToTHdfsTable(false, 
ThriftObjectType.DESCRIPTOR_ONLY));
     return desc;
   }
 
   @Override
-  public THdfsTable transfromToTHdfsTable(boolean updatePartitionFlag) {
-    return this.transfromToTHdfsTable();
-  }
-
-  private THdfsTable transfromToTHdfsTable() {
+  public THdfsTable transformToTHdfsTable(boolean updatePartitionFlag,
+      ThriftObjectType type) {
     Map<Long, THdfsPartition> idToPartition = new HashMap<>();
     // LocalFsTable transformed from iceberg table only has one partition
     Collection<? extends FeFsPartition> partitions =
@@ -272,11 +269,11 @@ public class LocalIcebergTable extends LocalTable 
implements FeIcebergTable {
     FeFsPartition partition = (FeFsPartition) partitions.toArray()[0];
     idToPartition.put(partition.getId(),
         FeCatalogUtils.fsPartitionToThrift(partition,
-            CatalogObject.ThriftObjectType.DESCRIPTOR_ONLY));
+            ThriftObjectType.DESCRIPTOR_ONLY));
 
     THdfsPartition tPrototypePartition = FeCatalogUtils.fsPartitionToThrift(
         localFsTable_.createPrototypePartition(),
-        CatalogObject.ThriftObjectType.DESCRIPTOR_ONLY);
+        ThriftObjectType.DESCRIPTOR_ONLY);
     THdfsTable hdfsTable = new THdfsTable(localFsTable_.getHdfsBaseDir(),
         getColumnNames(), localFsTable_.getNullPartitionKeyValue(),
         FeFsTable.DEFAULT_NULL_COLUMN_VALUE, idToPartition, 
tPrototypePartition);

Reply via email to