Rajesh Balamohan created HIVE-26686: ---------------------------------------
Summary: Iceberg: Having lot of snapshots impacts runtime due to multiple loads of the table Key: HIVE-26686 URL: https://issues.apache.org/jira/browse/HIVE-26686 Project: Hive Issue Type: Improvement Components: HiveServer2 Reporter: Rajesh Balamohan When large number of snpashots are present in manifest file, it adversely impacts the runtime of the queries. (e.g 15 mts trickle feed). Having more snapshots will slow down runtime in 2 additional places. 1. At the time of populating statistics, it tries to load the table details again. i.e refresh table invocation 2. At the time of hive metastore hook (HiveIcebergMetaHook::doPreAlterTable), during pre alter table. Need to check if entire table information along with snapshot details are needed for this. {noformat} at org.apache.hive.iceberg.com.fasterxml.jackson.databind.deser.std.BaseNodeDeserializer.deserializeArray(JsonNodeDeserializer.java:437) at org.apache.hive.iceberg.com.fasterxml.jackson.databind.deser.std.BaseNodeDeserializer.deserializeObject(JsonNodeDeserializer.java:261) at org.apache.hive.iceberg.com.fasterxml.jackson.databind.deser.std.JsonNodeDeserializer.deserialize(JsonNodeDeserializer.java:68) at org.apache.hive.iceberg.com.fasterxml.jackson.databind.deser.std.JsonNodeDeserializer.deserialize(JsonNodeDeserializer.java:15) at org.apache.hive.iceberg.com.fasterxml.jackson.databind.ObjectMapper._readMapAndClose(ObjectMapper.java:4218) at org.apache.hive.iceberg.com.fasterxml.jackson.databind.ObjectMapper.readValue(ObjectMapper.java:3251) at org.apache.iceberg.TableMetadataParser.read(TableMetadataParser.java:264) at org.apache.iceberg.TableMetadataParser.read(TableMetadataParser.java:258) at org.apache.iceberg.BaseMetastoreTableOperations.lambda$refreshFromMetadataLocation$0(BaseMetastoreTableOperations.java:177) at org.apache.iceberg.BaseMetastoreTableOperations$$Lambda$685/0x0000000840e1b440.apply(Unknown Source) at org.apache.iceberg.BaseMetastoreTableOperations.lambda$refreshFromMetadataLocation$1(BaseMetastoreTableOperations.java:191) at org.apache.iceberg.BaseMetastoreTableOperations$$Lambda$686/0x0000000840e1a840.run(Unknown Source) at org.apache.iceberg.util.Tasks$Builder.runTaskWithRetry(Tasks.java:404) at org.apache.iceberg.util.Tasks$Builder.runSingleThreaded(Tasks.java:214) at org.apache.iceberg.util.Tasks$Builder.run(Tasks.java:198) at org.apache.iceberg.util.Tasks$Builder.run(Tasks.java:190) at org.apache.iceberg.BaseMetastoreTableOperations.refreshFromMetadataLocation(BaseMetastoreTableOperations.java:191) at org.apache.iceberg.BaseMetastoreTableOperations.refreshFromMetadataLocation(BaseMetastoreTableOperations.java:176) at org.apache.iceberg.BaseMetastoreTableOperations.refreshFromMetadataLocation(BaseMetastoreTableOperations.java:171) at org.apache.iceberg.hive.HiveTableOperations.doRefresh(HiveTableOperations.java:153) at org.apache.iceberg.BaseMetastoreTableOperations.refresh(BaseMetastoreTableOperations.java:96) at org.apache.iceberg.BaseMetastoreTableOperations.current(BaseMetastoreTableOperations.java:79) at org.apache.iceberg.BaseMetastoreCatalog.loadTable(BaseMetastoreCatalog.java:44) at org.apache.iceberg.mr.Catalogs.loadTable(Catalogs.java:116) at org.apache.iceberg.mr.Catalogs.loadTable(Catalogs.java:106) at org.apache.iceberg.mr.hive.HiveIcebergStorageHandler.getBasicStatistics(HiveIcebergStorageHandler.java:309) at org.apache.hadoop.hive.ql.stats.BasicStatsTask$BasicStatsProcessor.<init>(BasicStatsTask.java:138) at org.apache.hadoop.hive.ql.stats.BasicStatsTask.aggregateStats(BasicStatsTask.java:301) at org.apache.hadoop.hive.ql.stats.BasicStatsTask.process(BasicStatsTask.java:108) at org.apache.hadoop.hive.ql.exec.StatsTask.execute(StatsTask.java:107) at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:213) at org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:105) at org.apache.hadoop.hive.ql.Executor.launchTask(Executor.java:360) at org.apache.hadoop.hive.ql.Executor.launchTasks(Executor.java:333) at org.apache.hadoop.hive.ql.Executor.runTasks(Executor.java:250) at org.apache.hadoop.hive.ql.Executor.execute(Executor.java:111) at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:806) at org.apache.hadoop.hive.ql.Driver.run(Driver.java:540) at org.apache.hadoop.hive.ql.Driver.run(Driver.java:534) at org.apache.hadoop.hive.ql.reexec.ReExecDriver.run(ReExecDriver.java:166) at org.apache.hive.service.cli.operation.SQLOperation.runQuery(SQLOperation.java:232) at org.apache.hive.service.cli.operation.SQLOperation.access$700(SQLOperation.java:89) at org.apache.hive.service.cli.operation.SQLOperation$BackgroundWork$1.run(SQLOperation.java:338) at java.security.AccessController.doPrivileged(java.base@11.0.17/Native Method) at javax.security.auth.Subject.doAs(java.base@11.0.17/Subject.java:423) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1899) at org.apache.hive.service.cli.operation.SQLOperation$BackgroundWork.run(SQLOperation.java:358) at java.util.concurrent.Executors$RunnableAdapter.call(java.base@11.0.17/Executors.java:515) at java.util.concurrent.FutureTask.run(java.base@11.0.17/FutureTask.java:264) at java.util.concurrent.Executors$RunnableAdapter.call(java.base@11.0.17/Executors.java:515) {noformat} {noformat} at org.apache.hive.iceberg.com.fasterxml.jackson.databind.deser.std.BaseNodeDeserializer.deserializeObject(JsonNodeDeserializer.java:258) at org.apache.hive.iceberg.com.fasterxml.jackson.databind.deser.std.BaseNodeDeserializer.deserializeArray(JsonNodeDeserializer.java:437) at org.apache.hive.iceberg.com.fasterxml.jackson.databind.deser.std.BaseNodeDeserializer.deserializeObject(JsonNodeDeserializer.java:261) at org.apache.hive.iceberg.com.fasterxml.jackson.databind.deser.std.JsonNodeDeserializer.deserialize(JsonNodeDeserializer.java:68) at org.apache.hive.iceberg.com.fasterxml.jackson.databind.deser.std.JsonNodeDeserializer.deserialize(JsonNodeDeserializer.java:15) at org.apache.hive.iceberg.com.fasterxml.jackson.databind.ObjectMapper._readMapAndClose(ObjectMapper.java:4218) at org.apache.hive.iceberg.com.fasterxml.jackson.databind.ObjectMapper.readValue(ObjectMapper.java:3251) at org.apache.iceberg.TableMetadataParser.read(TableMetadataParser.java:264) at org.apache.iceberg.TableMetadataParser.read(TableMetadataParser.java:258) at org.apache.iceberg.BaseMetastoreTableOperations.lambda$refreshFromMetadataLocation$0(BaseMetastoreTableOperations.java:177) at org.apache.iceberg.BaseMetastoreTableOperations$$Lambda$685/0x0000000840e1b440.apply(Unknown Source) at org.apache.iceberg.BaseMetastoreTableOperations.lambda$refreshFromMetadataLocation$1(BaseMetastoreTableOperations.java:191) at org.apache.iceberg.BaseMetastoreTableOperations$$Lambda$686/0x0000000840e1a840.run(Unknown Source) at org.apache.iceberg.util.Tasks$Builder.runTaskWithRetry(Tasks.java:404) at org.apache.iceberg.util.Tasks$Builder.runSingleThreaded(Tasks.java:214) at org.apache.iceberg.util.Tasks$Builder.run(Tasks.java:198) at org.apache.iceberg.util.Tasks$Builder.run(Tasks.java:190) at org.apache.iceberg.BaseMetastoreTableOperations.refreshFromMetadataLocation(BaseMetastoreTableOperations.java:191) at org.apache.iceberg.BaseMetastoreTableOperations.refreshFromMetadataLocation(BaseMetastoreTableOperations.java:176) at org.apache.iceberg.BaseMetastoreTableOperations.refreshFromMetadataLocation(BaseMetastoreTableOperations.java:171) at org.apache.iceberg.hive.HiveTableOperations.doRefresh(HiveTableOperations.java:153) at org.apache.iceberg.BaseMetastoreTableOperations.refresh(BaseMetastoreTableOperations.java:96) at org.apache.iceberg.BaseMetastoreTableOperations.current(BaseMetastoreTableOperations.java:79) at org.apache.iceberg.BaseMetastoreCatalog.loadTable(BaseMetastoreCatalog.java:44) at org.apache.iceberg.mr.Catalogs.loadTable(Catalogs.java:116) at org.apache.iceberg.mr.Catalogs.loadTable(Catalogs.java:106) at org.apache.iceberg.mr.hive.IcebergTableUtil.lambda$getTable$1(IcebergTableUtil.java:99) at org.apache.iceberg.mr.hive.IcebergTableUtil$$Lambda$669/0x0000000840e1f840.apply(Unknown Source) at org.apache.iceberg.mr.hive.IcebergTableUtil.getTable(IcebergTableUtil.java:105) at org.apache.iceberg.mr.hive.HiveIcebergMetaHook.doPreAlterTable(HiveIcebergMetaHook.java:323) at org.apache.iceberg.mr.hive.HiveIcebergMetaHook.preAlterTable(HiveIcebergMetaHook.java:313) at org.apache.hadoop.hive.metastore.HiveMetaStoreClient.alter_table(HiveMetaStoreClient.java:514) at org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient.alter_table(SessionHiveMetaStoreClient.java:508) at jdk.internal.reflect.GeneratedMethodAccessor233.invoke(Unknown Source) {noformat} -- This message was sent by Atlassian Jira (v8.20.10#820010)