bithw1 opened a new issue, #12009:
URL: https://github.com/apache/hudi/issues/12009

   Hi,
   I am using 0.15.0,  I am using following code snippet on the 
spark-shell,trying to save the spark dataframe as hudi table.
   
   When I run the code, an exception, the full exception stack trace is pasted 
below, the main problem is :`Caused by: java.lang.IllegalArgumentException: 
Partition-path field has to be non-empty!`,but i have set the partitionField, 
no sure where the problem is.
   `
   
   ```
   import org.apache.hudi.DataSourceWriteOptions
   import org.apache.hudi.DataSourceWriteOptions._
   import org.apache.hudi.config.HoodieWriteConfig
   import org.apache.hudi.config.HoodieWriteConfig.TBL_NAME
   import org.apache.hudi.hive.MultiPartKeysValueExtractor
   import org.apache.hudi.keygen.{ComplexKeyGenerator,SimpleKeyGenerator}
   import org.apache.spark.sql.SaveMode.{Append, Overwrite}
   import org.apache.spark.sql.hudi.command.UuidKeyGenerator
   import org.apache.spark.sql.{DataFrame, SaveMode, SparkSession}
   import spark.implicits._
   
     def save2HudiSyncHiveWithPrimaryKey(df: DataFrame, databaseName: String, 
tableName: String, primaryKey: String, preCombineField: String,
                                         partitionField: String, operation: 
String, mode: SaveMode): Unit = {
        println("partitionField:" + partitionField)
       println("PARTITIONPATH_FIELD.key:" + PARTITIONPATH_FIELD.key)    
       df.
         write.format("hudi").
         option(RECORDKEY_FIELD.key, primaryKey). 
         option(PRECOMBINE_FIELD.key, preCombineField).
         option(PARTITIONPATH_FIELD.key, partitionField).
         option(TBL_NAME.key, tableName).
         option(KEYGENERATOR_CLASS_NAME.key(), 
classOf[SimpleKeyGenerator].getName). 
         option(OPERATION.key(), operation).
         option(DataSourceWriteOptions.HIVE_TABLE_SERDE_PROPERTIES.key, 
s"primaryKey=$primaryKey").
         mode(mode)
         .saveAsTable(databaseName + "." + tableName )
     }
     
     
     
       val df = Seq((1, "a1", 10, 1000, "2022-05-12")).toDF("id", "name", 
"value", "ts", "dt")
       val databaseName = "default"
       val tableName1 = "test_hudi_table_7"
       val primaryKey = "id"
       val preCombineField = "ts"
       val partitionField = "dt"
       save2HudiSyncHiveWithPrimaryKey(df, databaseName, tableName1, 
primaryKey, preCombineField, partitionField,
       UPSERT_OPERATION_OPT_VAL, Overwrite)
   
   ```
   
   The full exception is:
   
   ```
   scala>     save2HudiSyncHiveWithPrimaryKey(df, databaseName, tableName1, 
primaryKey, preCombineField, partitionField,
        |     UPSERT_OPERATION_OPT_VAL, Overwrite)
   partitionField:dt
   PARTITIONPATH_FIELD.key:hoodie.datasource.write.partitionpath.field
   24/09/26 10:56:13 WARN TableSchemaResolver: Could not find any data file 
written for commit, so could not get schema for table 
hdfs://hadoop.master:9000/user/hive/warehouse/test_hudi_table_7
   org.apache.hudi.exception.HoodieException: Unable to instantiate class 
org.apache.hudi.keygen.SimpleKeyGenerator
     at 
org.apache.hudi.common.util.ReflectionUtils.loadClass(ReflectionUtils.java:75)
     at 
org.apache.hudi.common.util.ReflectionUtils.loadClass(ReflectionUtils.java:123)
     at 
org.apache.hudi.keygen.factory.HoodieSparkKeyGeneratorFactory.createKeyGenerator(HoodieSparkKeyGeneratorFactory.java:94)
     at 
org.apache.hudi.keygen.factory.HoodieSparkKeyGeneratorFactory.createKeyGenerator(HoodieSparkKeyGeneratorFactory.java:83)
     at 
org.apache.hudi.HoodieSparkSqlWriterInternal.writeInternal(HoodieSparkSqlWriter.scala:264)
     at 
org.apache.hudi.HoodieSparkSqlWriterInternal.write(HoodieSparkSqlWriter.scala:187)
     at 
org.apache.hudi.HoodieSparkSqlWriter$.write(HoodieSparkSqlWriter.scala:125)
     at 
org.apache.spark.sql.hudi.command.InsertIntoHoodieTableCommand$.run(InsertIntoHoodieTableCommand.scala:100)
     at 
org.apache.spark.sql.hudi.command.CreateHoodieTableAsSelectCommand.run(CreateHoodieTableAsSelectCommand.scala:106)
     at 
org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:75)
     at 
org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:73)
     at 
org.apache.spark.sql.execution.command.ExecutedCommandExec.executeCollect(commands.scala:84)
     at 
org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.$anonfun$applyOrElse$1(QueryExecution.scala:98)
     at 
org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$6(SQLExecution.scala:109)
     at 
org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:169)
     at 
org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:95)
     at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:779)
     at 
org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64)
     at 
org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:98)
     at 
org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:94)
     at 
org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$1(TreeNode.scala:584)
     at 
org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:176)
     at 
org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:584)
     at 
org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org$apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDownWithPruning(LogicalPlan.scala:30)
     at 
org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning(AnalysisHelper.scala:267)
     at 
org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning$(AnalysisHelper.scala:263)
     at 
org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30)
     at 
org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30)
     at 
org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:560)
     at 
org.apache.spark.sql.execution.QueryExecution.eagerlyExecuteCommands(QueryExecution.scala:94)
     at 
org.apache.spark.sql.execution.QueryExecution.commandExecuted$lzycompute(QueryExecution.scala:81)
     at 
org.apache.spark.sql.execution.QueryExecution.commandExecuted(QueryExecution.scala:79)
     at 
org.apache.spark.sql.execution.QueryExecution.assertCommandExecuted(QueryExecution.scala:116)
     at 
org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:860)
     at 
org.apache.spark.sql.DataFrameWriter.createTable(DataFrameWriter.scala:701)
     at 
org.apache.spark.sql.DataFrameWriter.saveAsTable(DataFrameWriter.scala:679)
     at 
org.apache.spark.sql.DataFrameWriter.saveAsTable(DataFrameWriter.scala:573)
     at save2HudiSyncHiveWithPrimaryKey(<console>:209)
     ... 96 elided
   Caused by: java.lang.reflect.InvocationTargetException: 
java.lang.IllegalArgumentException: Partition-path field has to be non-empty!
     at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
     at 
sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
     at 
sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
     at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
     at 
org.apache.hudi.common.util.ReflectionUtils.loadClass(ReflectionUtils.java:73)
     ... 133 more
   Caused by: java.lang.IllegalArgumentException: Partition-path field has to 
be non-empty!
     at 
org.apache.hudi.common.util.ValidationUtils.checkArgument(ValidationUtils.java:42)
     at 
org.apache.hudi.keygen.SimpleKeyGenerator.validatePartitionPath(SimpleKeyGenerator.java:114)
     at 
org.apache.hudi.keygen.SimpleKeyGenerator.<init>(SimpleKeyGenerator.java:54)
     at 
org.apache.hudi.keygen.SimpleKeyGenerator.<init>(SimpleKeyGenerator.java:42)
     ... 138 more
   
   
   
   ```
   
   
   
   
   
   
   
   
   
   


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to