[ https://issues.apache.org/jira/browse/HUDI-8601?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Lin Liu updated HUDI-8601: -------------------------- Status: In Progress (was: Open) > Support LEAF_INDEX block type in HFile > -------------------------------------- > > Key: HUDI-8601 > URL: https://issues.apache.org/jira/browse/HUDI-8601 > Project: Apache Hudi > Issue Type: Bug > Reporter: Y Ethan Guo > Assignee: Lin Liu > Priority: Blocker > Fix For: 1.0.2 > > Original Estimate: 16h > Remaining Estimate: 16h > > Metadata table read fails due to LEAF_INDEX block type not supported in the > native HFile reader. This only happens if the HFile is larger than 500MB. > {code:java} > Driver stacktrace:) > 17:19:45.068 [main] ERROR org.apache.hudi.async.HoodieAsyncService - Service > shutdown with error > java.util.concurrent.ExecutionException: > org.apache.hudi.exception.HoodieException: Failed to instantiate Metadata > table > at > java.util.concurrent.CompletableFuture.reportGet(CompletableFuture.java:357) > ~[?:1.8.0_432] > at > java.util.concurrent.CompletableFuture.get(CompletableFuture.java:1908) > ~[?:1.8.0_432] > at > org.apache.hudi.async.HoodieAsyncService.waitForShutdown(HoodieAsyncService.java:102) > ~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1] > at > org.apache.hudi.utilities.ingestion.HoodieIngestionService.startIngestion(HoodieIngestionService.java:65) > ~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1] > at org.apache.hudi.common.util.Option.ifPresent(Option.java:101) > [hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1] > at > org.apache.hudi.utilities.streamer.HoodieStreamer.sync(HoodieStreamer.java:222) > [hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1] > at > org.apache.hudi.utilities.streamer.HoodieStreamer.main(HoodieStreamer.java:637) > [hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1] > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > ~[?:1.8.0_432] > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > ~[?:1.8.0_432] > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > ~[?:1.8.0_432] > at java.lang.reflect.Method.invoke(Method.java:498) ~[?:1.8.0_432] > at > org.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52) > [spark-core_2.12-3.5.3.jar:3.5.3] > at > org.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:1029) > [spark-core_2.12-3.5.3.jar:3.5.3] > at org.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:194) > [spark-core_2.12-3.5.3.jar:3.5.3] > at org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:217) > [spark-core_2.12-3.5.3.jar:3.5.3] > at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:91) > [spark-core_2.12-3.5.3.jar:3.5.3] > at > org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1120) > [spark-core_2.12-3.5.3.jar:3.5.3] > at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1129) > [spark-core_2.12-3.5.3.jar:3.5.3] > at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala) > [spark-core_2.12-3.5.3.jar:3.5.3] > Caused by: org.apache.hudi.exception.HoodieException: Failed to instantiate > Metadata table > at > org.apache.hudi.utilities.streamer.HoodieStreamer$StreamSyncService.lambda$startService$1(HoodieStreamer.java:858) > ~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1] > at > java.util.concurrent.CompletableFuture$AsyncSupply.run(CompletableFuture.java:1604) > ~[?:1.8.0_432] > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > ~[?:1.8.0_432] > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > ~[?:1.8.0_432] > at java.lang.Thread.run(Thread.java:750) ~[?:1.8.0_432] > Caused by: org.apache.hudi.exception.HoodieException: Failed to instantiate > Metadata table > at > org.apache.hudi.client.SparkRDDWriteClient.initializeMetadataTable(SparkRDDWriteClient.java:308) > ~[hudi-spark3.5-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1] > at > org.apache.hudi.client.SparkRDDWriteClient.initMetadataTable(SparkRDDWriteClient.java:270) > ~[hudi-spark3.5-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1] > at > org.apache.hudi.client.BaseHoodieWriteClient.lambda$doInitTable$7(BaseHoodieWriteClient.java:1284) > ~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1] > at > org.apache.hudi.client.BaseHoodieWriteClient.executeUsingTxnManager(BaseHoodieWriteClient.java:1291) > ~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1] > at > org.apache.hudi.client.BaseHoodieWriteClient.doInitTable(BaseHoodieWriteClient.java:1281) > ~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1] > at > org.apache.hudi.client.BaseHoodieWriteClient.initTable(BaseHoodieWriteClient.java:1331) > ~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1] > at > org.apache.hudi.client.SparkRDDWriteClient.upsert(SparkRDDWriteClient.java:128) > ~[hudi-spark3.5-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1] > at > org.apache.hudi.utilities.streamer.StreamSync.writeToSink(StreamSync.java:1042) > ~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1] > at > org.apache.hudi.utilities.streamer.StreamSync.writeToSinkAndDoMetaSync(StreamSync.java:897) > ~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1] > at > org.apache.hudi.utilities.streamer.StreamSync.syncOnce(StreamSync.java:520) > ~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1] > at > org.apache.hudi.utilities.streamer.HoodieStreamer$StreamSyncService.lambda$startService$1(HoodieStreamer.java:820) > ~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1] > at > java.util.concurrent.CompletableFuture$AsyncSupply.run(CompletableFuture.java:1604) > ~[?:1.8.0_432] > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > ~[?:1.8.0_432] > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > ~[?:1.8.0_432] > at java.lang.Thread.run(Thread.java:750) ~[?:1.8.0_432] > Caused by: org.apache.hudi.exception.HoodieCompactionException: Could not > compact > s3a://-/2024-11-28-09-43-22-streamer-mor-upsert-async-with-index-emr/test_table/.hoodie/metadata > at > org.apache.hudi.table.action.compact.RunCompactionActionExecutor.execute(RunCompactionActionExecutor.java:130) > ~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1] > at > org.apache.hudi.table.HoodieSparkMergeOnReadTable.compact(HoodieSparkMergeOnReadTable.java:155) > ~[hudi-spark3.5-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1] > at > org.apache.hudi.client.BaseHoodieTableServiceClient.compact(BaseHoodieTableServiceClient.java:311) > ~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1] > at > org.apache.hudi.client.BaseHoodieTableServiceClient.lambda$runAnyPendingCompactions$5(BaseHoodieTableServiceClient.java:263) > ~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1] > at java.util.ArrayList.forEach(ArrayList.java:1259) ~[?:1.8.0_432] > at > org.apache.hudi.client.BaseHoodieTableServiceClient.runAnyPendingCompactions(BaseHoodieTableServiceClient.java:261) > ~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1] > at > org.apache.hudi.client.BaseHoodieWriteClient.runAnyPendingCompactions(BaseHoodieWriteClient.java:624) > ~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1] > at > org.apache.hudi.metadata.HoodieBackedTableMetadataWriter.runPendingTableServicesOperations(HoodieBackedTableMetadataWriter.java:1544) > ~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1] > at > org.apache.hudi.metadata.HoodieBackedTableMetadataWriter.performTableServices(HoodieBackedTableMetadataWriter.java:1509) > ~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1] > at > org.apache.hudi.client.SparkRDDWriteClient.initializeMetadataTable(SparkRDDWriteClient.java:305) > ~[hudi-spark3.5-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1] > at > org.apache.hudi.client.SparkRDDWriteClient.initMetadataTable(SparkRDDWriteClient.java:270) > ~[hudi-spark3.5-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1] > at > org.apache.hudi.client.BaseHoodieWriteClient.lambda$doInitTable$7(BaseHoodieWriteClient.java:1284) > ~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1] > at > org.apache.hudi.client.BaseHoodieWriteClient.executeUsingTxnManager(BaseHoodieWriteClient.java:1291) > ~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1] > at > org.apache.hudi.client.BaseHoodieWriteClient.doInitTable(BaseHoodieWriteClient.java:1281) > ~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1] > at > org.apache.hudi.client.BaseHoodieWriteClient.initTable(BaseHoodieWriteClient.java:1331) > ~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1] > at > org.apache.hudi.client.SparkRDDWriteClient.upsert(SparkRDDWriteClient.java:128) > ~[hudi-spark3.5-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1] > at > org.apache.hudi.utilities.streamer.StreamSync.writeToSink(StreamSync.java:1042) > ~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1] > at > org.apache.hudi.utilities.streamer.StreamSync.writeToSinkAndDoMetaSync(StreamSync.java:897) > ~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1] > at > org.apache.hudi.utilities.streamer.StreamSync.syncOnce(StreamSync.java:520) > ~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1] > at > org.apache.hudi.utilities.streamer.HoodieStreamer$StreamSyncService.lambda$startService$1(HoodieStreamer.java:820) > ~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1] > at > java.util.concurrent.CompletableFuture$AsyncSupply.run(CompletableFuture.java:1604) > ~[?:1.8.0_432] > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > ~[?:1.8.0_432] > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > ~[?:1.8.0_432] > at java.lang.Thread.run(Thread.java:750) ~[?:1.8.0_432] > Caused by: org.apache.spark.SparkException: Job aborted due to stage failure: > Task 31 in stage 8.0 failed 4 times, most recent failure: Lost task 31.3 in > stage 8.0 (TID 137) (ip-10-0-94-245.us-west-2.compute.internal executor 4): > org.apache.hudi.exception.HoodieException: > org.apache.hudi.exception.HoodieException: > org.apache.hudi.exception.HoodieIOException: unable to read next record from > hfile > at > org.apache.hudi.table.action.commit.HoodieMergeHelper.runMerge(HoodieMergeHelper.java:151) > at > org.apache.hudi.table.HoodieSparkTable.runMerge(HoodieSparkTable.java:147) > at > org.apache.hudi.table.HoodieSparkCopyOnWriteTable.handleUpdateInternal(HoodieSparkCopyOnWriteTable.java:246) > at > org.apache.hudi.table.HoodieSparkCopyOnWriteTable.handleUpdate(HoodieSparkCopyOnWriteTable.java:241) > at > org.apache.hudi.table.action.compact.CompactionExecutionHelper.writeFileAndGetWriteStats(CompactionExecutionHelper.java:63) > at > org.apache.hudi.table.action.compact.HoodieCompactor.compact(HoodieCompactor.java:238) > at > org.apache.hudi.table.action.compact.HoodieCompactor.lambda$compact$988df80a$1(HoodieCompactor.java:133) > at > org.apache.spark.api.java.JavaPairRDD$.$anonfun$toScalaFunction$1(JavaPairRDD.scala:1070) > at scala.collection.Iterator$$anon$10.next(Iterator.scala:461) > at scala.collection.Iterator$$anon$11.nextCur(Iterator.scala:486) > at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:492) > at > org.apache.spark.storage.memory.MemoryStore.putIterator(MemoryStore.scala:223) > at > org.apache.spark.storage.memory.MemoryStore.putIteratorAsBytes(MemoryStore.scala:352) > at > org.apache.spark.storage.BlockManager.$anonfun$doPutIterator$1(BlockManager.scala:1614) > at > org.apache.spark.storage.BlockManager.org$apache$spark$storage$BlockManager$$doPut(BlockManager.scala:1524) > at > org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1588) > at > org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:1389) > at > org.apache.spark.storage.BlockManager.getOrElseUpdateRDDBlock(BlockManager.scala:1343) > at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:379) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:329) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:367) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:331) > at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:93) > at > org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:166) > at org.apache.spark.scheduler.Task.run(Task.scala:141) > at > org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$4(Executor.scala:620) > at > org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally(SparkErrorUtils.scala:64) > at > org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally$(SparkErrorUtils.scala:61) > at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:94) > at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:623) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > at java.lang.Thread.run(Thread.java:750) > Caused by: org.apache.hudi.exception.HoodieException: > org.apache.hudi.exception.HoodieIOException: unable to read next record from > hfile > at > org.apache.hudi.common.util.queue.SimpleExecutor.execute(SimpleExecutor.java:75) > at > org.apache.hudi.table.action.commit.HoodieMergeHelper.runMerge(HoodieMergeHelper.java:149) > ... 33 more > Caused by: org.apache.hudi.exception.HoodieIOException: unable to read next > record from hfile > at > org.apache.hudi.io.storage.HoodieNativeAvroHFileReader$RecordIterator.hasNext(HoodieNativeAvroHFileReader.java:353) > at > org.apache.hudi.common.util.collection.MappingIterator.hasNext(MappingIterator.java:39) > at > org.apache.hudi.common.util.queue.SimpleExecutor.execute(SimpleExecutor.java:67) > ... 34 more > Caused by: java.io.IOException: Parsing of the HFile block type LEAF_INDEX is > not supported > at org.apache.hudi.io.hfile.HFileBlock.parse(HFileBlock.java:132) > at > org.apache.hudi.io.hfile.HFileBlockReader.nextBlock(HFileBlockReader.java:83) > at > org.apache.hudi.io.hfile.HFileReaderImpl.instantiateHFileDataBlock(HFileReaderImpl.java:293) > at > org.apache.hudi.io.hfile.HFileReaderImpl.getKeyValue(HFileReaderImpl.java:225) > at > org.apache.hudi.io.storage.HoodieNativeAvroHFileReader$RecordIterator.hasNext(HoodieNativeAvroHFileReader.java:350) > ... 36 more{code} -- This message was sent by Atlassian Jira (v8.20.10#820010)