Ethan Guo created HUDI-3961:
-------------------------------
Summary: Encounter NoClassDefFoundError when using Spark 3.1
bundle and utilities slim bundle
Key: HUDI-3961
URL: https://issues.apache.org/jira/browse/HUDI-3961
Project: Apache Hudi
Issue Type: Task
Reporter: Ethan Guo
Fix For: 0.11.0
When running deltastreamer with both Spark 3.1 and utilities slim bundle
(compiled with Spark 3.2 profile), the following exception is thrown:
{code:java}
export SPARK_HOME=/Users/ethan/Work/lib/spark-3.1.3-bin-hadoop3.2
export
HUDI_SPARK_BUNDLE_JAR=/Users/ethan/Work/lib/hudi_releases/0.11.0-rc3/hudi-spark3.1-bundle_2.12-0.11.0-rc3.jar
export
HUDI_UTILITIES_SLIM_JAR=/Users/ethan/Work/lib/hudi_releases/0.11.0-rc3/hudi-utilities-slim-bundle_2.12-0.11.0-rc3.jar
/Users/ethan/Work/lib/spark-3.1.3-bin-hadoop3.2/bin/spark-submit \
--master local[4] \
--driver-memory 4g --executor-memory 2g --num-executors 4
--executor-cores 1 \
--conf spark.serializer=org.apache.spark.serializer.KryoSerializer \
--conf
spark.hadoop.fs.s3a.aws.credentials.provider=com.amazonaws.auth.DefaultAWSCredentialsProviderChain
\
--conf spark.sql.catalogImplementation=hive \
--conf spark.driver.maxResultSize=1g \
--conf spark.speculation=true \
--conf spark.speculation.multiplier=1.0 \
--conf spark.speculation.quantile=0.5 \
--conf spark.ui.port=6680 \
--conf spark.eventLog.enabled=true \
--conf spark.eventLog.dir=/Users/ethan/Work/data/hudi/spark-logs \
--packages org.apache.spark:spark-avro_2.12:3.1.3 \
--jars
/Users/ethan/Work/repo/hudi-benchmarks/target/hudi-benchmarks-0.1-SNAPSHOT.jar,$HUDI_SPARK_BUNDLE_JAR
\
--class org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer \
$HUDI_UTILITIES_SLIM_JAR \
--props $TEST_ROOT_DIR/ds_mor.properties \
--source-class BenchmarkDataSource \
--source-ordering-field ts \
--target-base-path $TEST_ROOT_DIR/test_table \
--target-table test_table \
--table-type MERGE_ON_READ \
--op UPSERT \
--continuous{code}
{code:java}
Exception in thread "main" org.apache.hudi.exception.HoodieException:
java.lang.NoClassDefFoundError: org/apache/avro/AvroMissingFieldException
at
org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer.lambda$sync$1(HoodieDeltaStreamer.java:191)
at org.apache.hudi.common.util.Option.ifPresent(Option.java:97)
at
org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer.sync(HoodieDeltaStreamer.java:186)
at
org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer.main(HoodieDeltaStreamer.java:549)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at
org.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)
at
org.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:951)
at org.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)
at org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)
at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)
at
org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1039)
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1048)
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
Caused by: java.util.concurrent.ExecutionException:
java.lang.NoClassDefFoundError: org/apache/avro/AvroMissingFieldException
at
java.util.concurrent.CompletableFuture.reportGet(CompletableFuture.java:357)
at java.util.concurrent.CompletableFuture.get(CompletableFuture.java:1908)
at
org.apache.hudi.async.HoodieAsyncService.waitForShutdown(HoodieAsyncService.java:103)
at
org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer.lambda$sync$1(HoodieDeltaStreamer.java:189)
... 15 more
Caused by: java.lang.NoClassDefFoundError:
org/apache/avro/AvroMissingFieldException
at
org.apache.hudi.avro.model.HoodieCleanerPlan.newBuilder(HoodieCleanerPlan.java:246)
at
org.apache.hudi.table.action.clean.CleanPlanActionExecutor.requestClean(CleanPlanActionExecutor.java:104)
at
org.apache.hudi.table.action.clean.CleanPlanActionExecutor.requestClean(CleanPlanActionExecutor.java:141)
at
org.apache.hudi.table.action.clean.CleanPlanActionExecutor.execute(CleanPlanActionExecutor.java:166)
at
org.apache.hudi.table.HoodieSparkCopyOnWriteTable.scheduleCleaning(HoodieSparkCopyOnWriteTable.java:204)
at
org.apache.hudi.client.BaseHoodieWriteClient.scheduleTableServiceInternal(BaseHoodieWriteClient.java:1353)
at
org.apache.hudi.client.BaseHoodieWriteClient.clean(BaseHoodieWriteClient.java:864)
at
org.apache.hudi.client.BaseHoodieWriteClient.clean(BaseHoodieWriteClient.java:825)
at
org.apache.hudi.metadata.HoodieBackedTableMetadataWriter.cleanIfNecessary(HoodieBackedTableMetadataWriter.java:1036)
at
org.apache.hudi.metadata.SparkHoodieBackedTableMetadataWriter.commit(SparkHoodieBackedTableMetadataWriter.java:176)
at
org.apache.hudi.metadata.HoodieBackedTableMetadataWriter.processAndCommit(HoodieBackedTableMetadataWriter.java:803)
at
org.apache.hudi.metadata.HoodieBackedTableMetadataWriter.update(HoodieBackedTableMetadataWriter.java:870)
at
org.apache.hudi.client.BaseHoodieWriteClient.lambda$writeTableMetadata$0(BaseHoodieWriteClient.java:338)
at org.apache.hudi.common.util.Option.ifPresent(Option.java:97)
at
org.apache.hudi.client.BaseHoodieWriteClient.writeTableMetadata(BaseHoodieWriteClient.java:338)
at
org.apache.hudi.client.BaseHoodieWriteClient.commit(BaseHoodieWriteClient.java:269)
at
org.apache.hudi.client.BaseHoodieWriteClient.commitStats(BaseHoodieWriteClient.java:234)
at
org.apache.hudi.client.SparkRDDWriteClient.commit(SparkRDDWriteClient.java:122)
at
org.apache.hudi.utilities.deltastreamer.DeltaSync.writeToSink(DeltaSync.java:622)
at
org.apache.hudi.utilities.deltastreamer.DeltaSync.syncOnce(DeltaSync.java:331)
at
org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer$DeltaSyncService.lambda$startService$0(HoodieDeltaStreamer.java:675)
at
java.util.concurrent.CompletableFuture$AsyncSupply.run(CompletableFuture.java:1604)
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Caused by: java.lang.ClassNotFoundException:
org.apache.avro.AvroMissingFieldException
at java.net.URLClassLoader.findClass(URLClassLoader.java:382)
at java.lang.ClassLoader.loadClass(ClassLoader.java:418)
at java.lang.ClassLoader.loadClass(ClassLoader.java:351)
... 25 more {code}
--
This message was sent by Atlassian Jira
(v8.20.7#820007)