gfn9cho opened a new issue #954: org.apache.hudi.org.apache.hadoop_hive.metastore.api.NoSuchObjectException: <hivedb.tableName> table not found URL: https://github.com/apache/incubator-hudi/issues/954 I am using hudi-spark-bundle-0.5.1-SNAPSHOT.jar in EMR and getting the below exception in hiveSync. We are using AWS glue catalog for hive metastore. Hive table is getting created. I could see the table in hive with no data in it. org.apache.hudi.hive.HoodieHiveSyncException: Failed to sync partitions for table <tableName> at org.apache.hudi.hive.HiveSyncTool.syncPartitions(HiveSyncTool.java:172) at org.apache.hudi.hive.HiveSyncTool.syncHoodieTable(HiveSyncTool.java:107) at org.apache.hudi.hive.HiveSyncTool.syncHoodieTable(HiveSyncTool.java:67) at org.apache.hudi.HoodieSparkSqlWriter$.syncHive(HoodieSparkSqlWriter.scala:235) at org.apache.hudi.HoodieSparkSqlWriter$.write(HoodieSparkSqlWriter.scala:169) at org.apache.hudi.DefaultSource.createRelation(DefaultSource.scala:91) at org.apache.spark.sql.execution.datasources.SaveIntoDataSourceCommand.run(SaveIntoDataSourceCommand.scala:45) at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:70) at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:68) at org.apache.spark.sql.execution.command.ExecutedCommandExec.doExecute(commands.scala:86) at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:131) at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:127) at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:155) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:152) at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:127) at org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:80) at org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:80) at org.apache.spark.sql.DataFrameWriter$$anonfun$runCommand$1.apply(DataFrameWriter.scala:668) at org.apache.spark.sql.DataFrameWriter$$anonfun$runCommand$1.apply(DataFrameWriter.scala:668) at org.apache.spark.sql.execution.SQLExecution$$anonfun$withNewExecutionId$1.apply(SQLExecution.scala:78) at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:125) at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:73) at org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:668) at org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:276) at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:270) at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:228) ... 69 elided Caused by: org.apache.hudi.org.apache.hadoop_hive.metastore.api.NoSuchObjectException: <hiveDB>.<tableName> table not found at org.apache.hudi.org.apache.hadoop_hive.metastore.api.ThriftHiveMetastore$get_partitions_result$get_partitions_resultStandardScheme.read(ThriftHiveMetastore.java) at org.apache.hudi.org.apache.hadoop_hive.metastore.api.ThriftHiveMetastore$get_partitions_result$get_partitions_resultStandardScheme.read(ThriftHiveMetastore.java) at org.apache.hudi.org.apache.hadoop_hive.metastore.api.ThriftHiveMetastore$get_partitions_result.read(ThriftHiveMetastore.java) at org.apache.thrift.TServiceClient.receiveBase(TServiceClient.java:86) at org.apache.hudi.org.apache.hadoop_hive.metastore.api.ThriftHiveMetastore$Client.recv_get_partitions(ThriftHiveMetastore.java:2377) at org.apache.hudi.org.apache.hadoop_hive.metastore.api.ThriftHiveMetastore$Client.get_partitions(ThriftHiveMetastore.java:2362) at org.apache.hudi.org.apache.hadoop_hive.metastore.HiveMetaStoreClient.listPartitions(HiveMetaStoreClient.java:1162) at org.apache.hudi.hive.HoodieHiveClient.scanTablePartitions(HoodieHiveClient.java:240) at org.apache.hudi.hive.HiveSyncTool.syncPartitions(HiveSyncTool.java:162) ... 95 more Below is the code, spark-shell --master yarn --deploy-mode client --conf spark.shuffle.spill=true \ --conf spark.scheduler.mode=FIFO \ --conf spark.executor.extraJavaOptions=-XX:MaxPermSize=1024m \ --conf spark.sql.planner.externalSort=true --conf spark.shuffle.manager=sort \ --conf spark.ui.port=8088 --conf spark.executor.memoryOverhead=2g \ --conf spark.rpc.message.maxSize=1024 --conf spark.file.transferTo=false \ --conf spark.driver.maxResultSize=3g --conf spark.rdd.compress=true \ --conf spark.executor.extraJavaOptions="-Dconfig.resource=spark-defaults.conf" \ --conf spark.driver.JavaOptions="-Dspark.yarn.app.container.log.dir=/mnt/var/log/hadoop" \ --conf spark.driver.extraJavaOptions="-Dconfig.file=spark-defaults.conf" \ --conf spark.sql.parquet.writeLegacyFormat=true \ --conf spark.enable.dynamicAllocation=true \ --conf spark.dynamicAllocation.maxExecutors=10 \ --conf spark.dynamicAllocation.minExecutors=1 \ --conf spark.executor.cores=5 \ --conf spark.executor.memory=3g --conf spark.driver.memory=2g \ --conf spark.executor.instances=4 --conf spark.serializer=org.apache.spark.serializer.KryoSerializer \ --name gwpl_staging_load_hudi \ --files /etc/spark/conf/hive-site.xml \ --properties-file /usr/lib/spark/conf/spark-defaults.conf \ --jars /home/hadoop/hudi/hudi-spark-bundle-0.5.1-SNAPSHOT.jar import org.apache.hudi.DataSourceWriteOptions import org.apache.hudi.config.HoodieWriteConfig import org.apache.spark.sql._ import org.apache.spark.sql.SaveMode._ import org.apache.spark.sql.expressions.Window import org.apache.spark.sql.functions._ import org.joda.time.format.DateTimeFormat val stagePrefix="stg_gwpl" val harmonizedStageDB="uat_edf_staging" val harmonizedstagePath="s3://sa-l3-uat-emr-edl-processed/staging" val table="pc_policy" val incrementalData=spark.sql("select * from uat_connect_gwpl_data_processed.pc_policy limit 100").cache incrementalData.write. format("org.apache.hudi"). option(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY,"ID"). option(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY, "ingestiondt"). option(DataSourceWriteOptions.PRECOMBINE_FIELD_OPT_KEY, "UpdateTime"). option(HoodieWriteConfig.TABLE_NAME, stagePrefix + "_hudi_" + table). option(DataSourceWriteOptions.HIVE_URL_OPT_KEY, "jdbc:hive2:hiveserver:10000"). option(DataSourceWriteOptions.HIVE_USER_OPT_KEY, "hive"). option(DataSourceWriteOptions.HIVE_PASS_OPT_KEY, "hive"). option("hoodie.datasource.hive_sync.enable", true). option("hoodie.datasource.hive_sync.database",harmonizedStageDB). option("hoodie.datasource.hive_sync.table",stagePrefix + "_hudi_" + table). option("hoodie.datasource.hive_sync.partition_fields","ingestiondt"). mode(SaveMode.Overwrite). save(s"${harmonizedstagePath}/hudi/$table") Please let me know if I can provide more details to it.
---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [email protected] With regards, Apache Git Services
