[ https://issues.apache.org/jira/browse/HUDI-8744?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Sagar Sumit updated HUDI-8744: ------------------------------ Status: Patch Available (was: In Progress) > Improve expression index config experience > ------------------------------------------ > > Key: HUDI-8744 > URL: https://issues.apache.org/jira/browse/HUDI-8744 > Project: Apache Hudi > Issue Type: Sub-task > Reporter: Y Ethan Guo > Assignee: Sagar Sumit > Priority: Blocker > Fix For: 1.0.1 > > Original Estimate: 2h > Remaining Estimate: 2h > > When running Hudi streamer with the following config and no expression index > created from SQL, the Hudi stream failed after some time. > {code:java} > hoodie.write.record.merge.mode=COMMIT_TIME_ORDERING > # Key fields, for kafka example > hoodie.datasource.write.recordkey.field=key > hoodie.datasource.write.partitionpath.field=partition > # Schema provider props (change to absolute path based on your installation) > hoodie.deltastreamer.schemaprovider.source.schema.file=s3a://hudi-benchmark-source/benchmark_schema_city.avsc > hoodie.deltastreamer.schemaprovider.target.schema.file=s3a://hudi-benchmark-source/benchmark_schema_city.avsc > # DFS Source > hoodie.deltastreamer.source.dfs.root=s3a://hudi-benchmark-source/upserts-20p-0.2update-last10-100r > benchmark.input.source.path=s3a://hudi-benchmark-source/upserts-20p-0.2update-last10-100r > # Compaction > hoodie.compact.inline.max.delta.commits=3 > # Clean and archive > hoodie.clean.async=true > hoodie.keep.max.commits=7 > hoodie.keep.min.commits=5 > hoodie.cleaner.commits.retained=4 > # Concurrency control > hoodie.write.concurrency.mode=optimistic_concurrency_control > hoodie.cleaner.policy.failed.writes=LAZY > hoodie.write.lock.provider=org.apache.hudi.client.transaction.lock.InProcessLockProvider > # Metadata table > hoodie.metadata.enable=true > hoodie.metadata.index.bloom.filter.enable=true > hoodie.metadata.index.bloom.filter.file.group.count=10 > hoodie.metadata.index.column.stats.enable=true > hoodie.metadata.index.column.stats.file.group.count=10 > hoodie.metadata.record.index.enable=true > hoodie.metadata.index.functional.enable=true > hoodie.metadata.index.partition.stats.enable=true > hoodie.metadata.index.secondary.enable=true > hoodie.metadata.index.secondary.column=city > 10:09:28.881 [pool-30-thread-1] ERROR > org.apache.hudi.utilities.streamer.HoodieStreamer - Shutting down delta-sync > due to exception > org.apache.hudi.exception.HoodieException: Error waiting for async clean > service to finish > at > org.apache.hudi.async.AsyncCleanerService.waitForCompletion(AsyncCleanerService.java:76) > ~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1] > at > org.apache.hudi.client.BaseHoodieTableServiceClient.asyncClean(BaseHoodieTableServiceClient.java:144) > ~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1] > at > org.apache.hudi.client.BaseHoodieWriteClient.autoCleanOnCommit(BaseHoodieWriteClient.java:596) > ~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1] > at > org.apache.hudi.client.BaseHoodieWriteClient.mayBeCleanAndArchive(BaseHoodieWriteClient.java:581) > ~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1] > at > org.apache.hudi.client.BaseHoodieWriteClient.commitStats(BaseHoodieWriteClient.java:258) > ~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1] > at > org.apache.hudi.client.SparkRDDWriteClient.commit(SparkRDDWriteClient.java:93) > ~[hudi-spark3.5-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1] > at > org.apache.hudi.utilities.streamer.StreamSync.writeToSinkAndDoMetaSync(StreamSync.java:948) > ~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1] > at > org.apache.hudi.utilities.streamer.StreamSync.syncOnce(StreamSync.java:520) > ~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1] > at > org.apache.hudi.utilities.streamer.HoodieStreamer$StreamSyncService.lambda$startService$1(HoodieStreamer.java:820) > ~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1] > at > java.util.concurrent.CompletableFuture$AsyncSupply.run(CompletableFuture.java:1604) > [?:1.8.0_432] > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > [?:1.8.0_432] > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > [?:1.8.0_432] > at java.lang.Thread.run(Thread.java:750) [?:1.8.0_432] > Caused by: java.util.concurrent.ExecutionException: > org.apache.hudi.exception.HoodieMetadataException: Functional index metadata > not found > at > java.util.concurrent.CompletableFuture.reportGet(CompletableFuture.java:357) > ~[?:1.8.0_432] > at > java.util.concurrent.CompletableFuture.get(CompletableFuture.java:1908) > ~[?:1.8.0_432] > at > org.apache.hudi.async.HoodieAsyncService.waitForShutdown(HoodieAsyncService.java:102) > ~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1] > at > org.apache.hudi.async.AsyncCleanerService.waitForCompletion(AsyncCleanerService.java:74) > ~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1] > ... 12 more > Caused by: org.apache.hudi.exception.HoodieMetadataException: Functional > index metadata not found > at > org.apache.hudi.metadata.HoodieTableMetadataUtil.convertMetadataToFunctionalIndexRecords(HoodieTableMetadataUtil.java:613) > ~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1] > at > org.apache.hudi.metadata.HoodieTableMetadataUtil.convertMetadataToRecords(HoodieTableMetadataUtil.java:577) > ~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1] > at > org.apache.hudi.metadata.HoodieBackedTableMetadataWriter.lambda$update$39(HoodieBackedTableMetadataWriter.java:1223) > ~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1] > at > org.apache.hudi.metadata.HoodieBackedTableMetadataWriter.processAndCommit(HoodieBackedTableMetadataWriter.java:993) > ~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1] > at > org.apache.hudi.metadata.HoodieBackedTableMetadataWriter.update(HoodieBackedTableMetadataWriter.java:1223) > ~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1] > at > org.apache.hudi.table.action.BaseActionExecutor.writeTableMetadata(BaseActionExecutor.java:105) > ~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1] > at > org.apache.hudi.table.action.clean.CleanActionExecutor.runClean(CleanActionExecutor.java:234) > ~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1] > at > org.apache.hudi.table.action.clean.CleanActionExecutor.runPendingClean(CleanActionExecutor.java:199) > ~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1] > at > org.apache.hudi.table.action.clean.CleanActionExecutor.execute(CleanActionExecutor.java:270) > ~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1] > at > org.apache.hudi.table.HoodieSparkCopyOnWriteTable.clean(HoodieSparkCopyOnWriteTable.java:269) > ~[hudi-spark3.5-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1] > at > org.apache.hudi.client.BaseHoodieTableServiceClient.clean(BaseHoodieTableServiceClient.java:808) > ~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1] > at > org.apache.hudi.client.BaseHoodieWriteClient.clean(BaseHoodieWriteClient.java:863) > ~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1] > at > org.apache.hudi.client.BaseHoodieWriteClient.clean(BaseHoodieWriteClient.java:836) > ~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1] > at > org.apache.hudi.async.AsyncCleanerService.lambda$startService$0(AsyncCleanerService.java:54) > ~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1] > ... 4 more > 10:09:38.620 [main] ERROR org.apache.hudi.async.HoodieAsyncService - Service > shutdown with error > java.util.concurrent.ExecutionException: > org.apache.hudi.exception.HoodieException: Error waiting for async clean > service to finish > at > java.util.concurrent.CompletableFuture.reportGet(CompletableFuture.java:357) > ~[?:1.8.0_432] > at > java.util.concurrent.CompletableFuture.get(CompletableFuture.java:1908) > ~[?:1.8.0_432] > at > org.apache.hudi.async.HoodieAsyncService.waitForShutdown(HoodieAsyncService.java:102) > ~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1] > at > org.apache.hudi.utilities.ingestion.HoodieIngestionService.startIngestion(HoodieIngestionService.java:65) > ~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1] > at org.apache.hudi.common.util.Option.ifPresent(Option.java:101) > [hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1] > at > org.apache.hudi.utilities.streamer.HoodieStreamer.sync(HoodieStreamer.java:222) > [hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1] > at > org.apache.hudi.utilities.streamer.HoodieStreamer.main(HoodieStreamer.java:637) > [hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1] > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > ~[?:1.8.0_432] > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > ~[?:1.8.0_432] > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > ~[?:1.8.0_432] > at java.lang.reflect.Method.invoke(Method.java:498) ~[?:1.8.0_432] > at > org.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52) > [spark-core_2.12-3.5.3.jar:3.5.3] > at > org.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:1029) > [spark-core_2.12-3.5.3.jar:3.5.3] > at > org.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:194) > [spark-core_2.12-3.5.3.jar:3.5.3] > at org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:217) > [spark-core_2.12-3.5.3.jar:3.5.3] > at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:91) > [spark-core_2.12-3.5.3.jar:3.5.3] > at > org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1120) > [spark-core_2.12-3.5.3.jar:3.5.3] > at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1129) > [spark-core_2.12-3.5.3.jar:3.5.3] > at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala) > [spark-core_2.12-3.5.3.jar:3.5.3] > Caused by: org.apache.hudi.exception.HoodieException: Error waiting for async > clean service to finish > at > org.apache.hudi.utilities.streamer.HoodieStreamer$StreamSyncService.lambda$startService$1(HoodieStreamer.java:858) > ~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1] > at > java.util.concurrent.CompletableFuture$AsyncSupply.run(CompletableFuture.java:1604) > ~[?:1.8.0_432] > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > ~[?:1.8.0_432] > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > ~[?:1.8.0_432] > at java.lang.Thread.run(Thread.java:750) ~[?:1.8.0_432] > Caused by: org.apache.hudi.exception.HoodieException: Error waiting for async > clean service to finish > at > org.apache.hudi.async.AsyncCleanerService.waitForCompletion(AsyncCleanerService.java:76) > ~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1] > at > org.apache.hudi.client.BaseHoodieTableServiceClient.asyncClean(BaseHoodieTableServiceClient.java:144) > ~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1] > at > org.apache.hudi.client.BaseHoodieWriteClient.autoCleanOnCommit(BaseHoodieWriteClient.java:596) > ~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1] > at > org.apache.hudi.client.BaseHoodieWriteClient.mayBeCleanAndArchive(BaseHoodieWriteClient.java:581) > ~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1] > at > org.apache.hudi.client.BaseHoodieWriteClient.commitStats(BaseHoodieWriteClient.java:258) > ~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1] > at > org.apache.hudi.client.SparkRDDWriteClient.commit(SparkRDDWriteClient.java:93) > ~[hudi-spark3.5-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1] > at > org.apache.hudi.utilities.streamer.StreamSync.writeToSinkAndDoMetaSync(StreamSync.java:948) > ~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1] > at > org.apache.hudi.utilities.streamer.StreamSync.syncOnce(StreamSync.java:520) > ~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1] > at > org.apache.hudi.utilities.streamer.HoodieStreamer$StreamSyncService.lambda$startService$1(HoodieStreamer.java:820) > ~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1] > at > java.util.concurrent.CompletableFuture$AsyncSupply.run(CompletableFuture.java:1604) > ~[?:1.8.0_432] > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > ~[?:1.8.0_432] > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > ~[?:1.8.0_432] > at java.lang.Thread.run(Thread.java:750) ~[?:1.8.0_432] > Caused by: java.util.concurrent.ExecutionException: > org.apache.hudi.exception.HoodieMetadataException: Functional index metadata > not found > at > java.util.concurrent.CompletableFuture.reportGet(CompletableFuture.java:357) > ~[?:1.8.0_432] > at > java.util.concurrent.CompletableFuture.get(CompletableFuture.java:1908) > ~[?:1.8.0_432] > at > org.apache.hudi.async.HoodieAsyncService.waitForShutdown(HoodieAsyncService.java:102) > ~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1] > at > org.apache.hudi.async.AsyncCleanerService.waitForCompletion(AsyncCleanerService.java:74) > ~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1] > at > org.apache.hudi.client.BaseHoodieTableServiceClient.asyncClean(BaseHoodieTableServiceClient.java:144) > ~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1] > at > org.apache.hudi.client.BaseHoodieWriteClient.autoCleanOnCommit(BaseHoodieWriteClient.java:596) > ~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1] > at > org.apache.hudi.client.BaseHoodieWriteClient.mayBeCleanAndArchive(BaseHoodieWriteClient.java:581) > ~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1] > at > org.apache.hudi.client.BaseHoodieWriteClient.commitStats(BaseHoodieWriteClient.java:258) > ~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1] > at > org.apache.hudi.client.SparkRDDWriteClient.commit(SparkRDDWriteClient.java:93) > ~[hudi-spark3.5-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1] > at > org.apache.hudi.utilities.streamer.StreamSync.writeToSinkAndDoMetaSync(StreamSync.java:948) > ~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1] > at > org.apache.hudi.utilities.streamer.StreamSync.syncOnce(StreamSync.java:520) > ~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1] > at > org.apache.hudi.utilities.streamer.HoodieStreamer$StreamSyncService.lambda$startService$1(HoodieStreamer.java:820) > ~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1] > at > java.util.concurrent.CompletableFuture$AsyncSupply.run(CompletableFuture.java:1604) > ~[?:1.8.0_432] > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > ~[?:1.8.0_432] > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > ~[?:1.8.0_432] > at java.lang.Thread.run(Thread.java:750) ~[?:1.8.0_432] > Caused by: org.apache.hudi.exception.HoodieMetadataException: Functional > index metadata not found > at > org.apache.hudi.metadata.HoodieTableMetadataUtil.convertMetadataToFunctionalIndexRecords(HoodieTableMetadataUtil.java:613) > ~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1] > at > org.apache.hudi.metadata.HoodieTableMetadataUtil.convertMetadataToRecords(HoodieTableMetadataUtil.java:577) > ~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1] > at > org.apache.hudi.metadata.HoodieBackedTableMetadataWriter.lambda$update$39(HoodieBackedTableMetadataWriter.java:1223) > ~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1] > at > org.apache.hudi.metadata.HoodieBackedTableMetadataWriter.processAndCommit(HoodieBackedTableMetadataWriter.java:993) > ~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1] > at > org.apache.hudi.metadata.HoodieBackedTableMetadataWriter.update(HoodieBackedTableMetadataWriter.java:1223) > ~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1] > at > org.apache.hudi.table.action.BaseActionExecutor.writeTableMetadata(BaseActionExecutor.java:105) > ~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1] > at > org.apache.hudi.table.action.clean.CleanActionExecutor.runClean(CleanActionExecutor.java:234) > ~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1] > at > org.apache.hudi.table.action.clean.CleanActionExecutor.runPendingClean(CleanActionExecutor.java:199) > ~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1] > at > org.apache.hudi.table.action.clean.CleanActionExecutor.execute(CleanActionExecutor.java:270) > ~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1] > at > org.apache.hudi.table.HoodieSparkCopyOnWriteTable.clean(HoodieSparkCopyOnWriteTable.java:269) > ~[hudi-spark3.5-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1] > at > org.apache.hudi.client.BaseHoodieTableServiceClient.clean(BaseHoodieTableServiceClient.java:808) > ~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1] > at > org.apache.hudi.client.BaseHoodieWriteClient.clean(BaseHoodieWriteClient.java:863) > ~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1] > at > org.apache.hudi.client.BaseHoodieWriteClient.clean(BaseHoodieWriteClient.java:836) > ~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1] > at > org.apache.hudi.async.AsyncCleanerService.lambda$startService$0(AsyncCleanerService.java:54) > ~[hudi-utilities-slim-bundle_2.12-1.0.0-rc1.jar:1.0.0-rc1] > at > java.util.concurrent.CompletableFuture$AsyncSupply.run(CompletableFuture.java:1604) > ~[?:1.8.0_432] > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > ~[?:1.8.0_432] > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > ~[?:1.8.0_432] > at java.lang.Thread.run(Thread.java:750) ~[?:1.8.0_432] > {code} -- This message was sent by Atlassian Jira (v8.20.10#820010)