santoshsb opened a new issue, #5388:
URL: https://github.com/apache/hudi/issues/5388

   Hi Team,
   
   Environment: AWS
   emr-6.5.0
   Applications: Hive 3.1.2, Tez 0.9.2, Presto 0.261, Spark 3.1.2
   
    I have created a hudi table using spark/spark-session using the following 
code
   `val hudiOptions = Map[String,String](
     HoodieWriteConfig.TABLE_NAME -> "patient_hudi",
     DataSourceWriteOptions.TABLE_TYPE_OPT_KEY -> "COPY_ON_WRITE", 
     DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY -> "id",
     DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY -> "meta.source",
     DataSourceWriteOptions.PRECOMBINE_FIELD_OPT_KEY -> "meta.lastUpdated"
   )
   
   readJson.write
       .format("org.apache.hudi")
       .option(DataSourceWriteOptions.OPERATION_OPT_KEY, 
DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL)
       .options(hudiOptions)
       .mode(SaveMode.Overwrite)
       .save("s3a://xyz/data/hudi/patient/")`
   
   Now I want to use the spark-sql and load the data from this table for 
manipulation, followed the documentation to create a new table using existing 
HUDI table and ran the following query
   `spark-sql> create table patient_sql using hudi options (primaryKey = 'id', 
preCombineField = 'meta.lastUpdated') partitioned by (meta.source) location 
'xyz/data/hudi/patient/';`
   
   And getting the following error
   
   Error in query: Cannot partition by nested column: meta.source
   
   Is partitioning by nested column not supported while using spark-sql or am I 
missing something here.
   
   And when I take the partitioned by clause from the query (just to test) the 
following error is thrown,
   
   `java.lang.IllegalArgumentException: Can't find preCombineKey 
`meta.lastUpdated` in root
    |-- _hoodie_commit_time: string (nullable = true)
    |-- _hoodie_commit_seqno: string (nullable = true)
    |-- _hoodie_record_key: string (nullable = true)
    |-- _hoodie_partition_path: string (nullable = true)
    |-- _hoodie_file_name: string (nullable = true)
    |-- address: array (nullable = true)
    |    |-- element: struct (containsNull = true)
    |    |    |-- city: string (nullable = true)
    |    |    |-- country: string (nullable = true)
    |    |    |-- extension: array (nullable = true)
    |    |    |    |-- element: struct (containsNull = true)
    |    |    |    |    |-- extension: array (nullable = true)
    |    |    |    |    |    |-- element: struct (containsNull = true)
    |    |    |    |    |    |    |-- url: string (nullable = true)
    |    |    |    |    |    |    |-- valueDecimal: double (nullable = true)
    |    |    |    |    |-- url: string (nullable = true)
    |    |    |-- line: array (nullable = true)
    |    |    |    |-- element: string (containsNull = true)
    |    |    |-- postalCode: string (nullable = true)
    |    |    |-- state: string (nullable = true)
    |-- birthDate: string (nullable = true)
    |-- communication: array (nullable = true)
    |    |-- element: struct (containsNull = true)
    |    |    |-- language: struct (nullable = true)
    |    |    |    |-- coding: array (nullable = true)
    |    |    |    |    |-- element: struct (containsNull = true)
    |    |    |    |    |    |-- code: string (nullable = true)
    |    |    |    |    |    |-- display: string (nullable = true)
    |    |    |    |    |    |-- system: string (nullable = true)
    |    |    |    |-- text: string (nullable = true)
    |-- extension: array (nullable = true)
    |    |-- element: struct (containsNull = true)
    |    |    |-- extension: array (nullable = true)
    |    |    |    |-- element: struct (containsNull = true)
    |    |    |    |    |-- url: string (nullable = true)
    |    |    |    |    |-- valueCoding: struct (nullable = true)
    |    |    |    |    |    |-- code: string (nullable = true)
    |    |    |    |    |    |-- display: string (nullable = true)
    |    |    |    |    |    |-- system: string (nullable = true)
    |    |    |    |    |-- valueString: string (nullable = true)
    |    |    |-- url: string (nullable = true)
    |    |    |-- valueAddress: struct (nullable = true)
    |    |    |    |-- city: string (nullable = true)
    |    |    |    |-- country: string (nullable = true)
    |    |    |    |-- state: string (nullable = true)
    |    |    |-- valueCode: string (nullable = true)
    |    |    |-- valueDecimal: double (nullable = true)
    |    |    |-- valueString: string (nullable = true)
    |-- gender: string (nullable = true)
    |-- id: string (nullable = true)
    |-- identifier: array (nullable = true)
    |    |-- element: struct (containsNull = true)
    |    |    |-- system: string (nullable = true)
    |    |    |-- type: struct (nullable = true)
    |    |    |    |-- coding: array (nullable = true)
    |    |    |    |    |-- element: struct (containsNull = true)
    |    |    |    |    |    |-- code: string (nullable = true)
    |    |    |    |    |    |-- display: string (nullable = true)
    |    |    |    |    |    |-- system: string (nullable = true)
    |    |    |    |-- text: string (nullable = true)
    |    |    |-- value: string (nullable = true)
    |-- managingOrganization: struct (nullable = true)
    |    |-- reference: string (nullable = true)
    |    |-- type: string (nullable = true)
    |-- maritalStatus: struct (nullable = true)
    |    |-- coding: array (nullable = true)
    |    |    |-- element: struct (containsNull = true)
    |    |    |    |-- code: string (nullable = true)
    |    |    |    |-- display: string (nullable = true)
    |    |    |    |-- system: string (nullable = true)
    |    |-- text: string (nullable = true)
    |-- meta: struct (nullable = true)
    |    |-- extension: array (nullable = true)
    |    |    |-- element: struct (containsNull = true)
    |    |    |    |-- url: string (nullable = true)
    |    |    |    |-- valueString: string (nullable = true)
    |    |-- lastUpdated: string (nullable = true)
    |    |-- source: string (nullable = true)
    |    |-- versionId: string (nullable = true)
    |-- multipleBirthBoolean: boolean (nullable = true)
    |-- multipleBirthInteger: long (nullable = true)
    |-- name: array (nullable = true)
    |    |-- element: struct (containsNull = true)
    |    |    |-- family: string (nullable = true)
    |    |    |-- given: array (nullable = true)
    |    |    |    |-- element: string (containsNull = true)
    |    |    |-- prefix: array (nullable = true)
    |    |    |    |-- element: string (containsNull = true)
    |    |    |-- use: string (nullable = true)
    |-- resourceType: string (nullable = true)
    |-- telecom: array (nullable = true)
    |    |-- element: struct (containsNull = true)
    |    |    |-- system: string (nullable = true)
    |    |    |-- use: string (nullable = true)
    |    |    |-- value: string (nullable = true)
    |-- text: struct (nullable = true)
    |    |-- div: string (nullable = true)
    |    |-- status: string (nullable = true)
   .
        at 
org.apache.hudi.common.util.ValidationUtils.checkArgument(ValidationUtils.java:40)
        at 
org.apache.spark.sql.hudi.HoodieOptionConfig$.validateTable(HoodieOptionConfig.scala:209)
        at 
org.apache.spark.sql.catalyst.catalog.HoodieCatalogTable.parseSchemaAndConfigs(HoodieCatalogTable.scala:214)
        at 
org.apache.spark.sql.catalyst.catalog.HoodieCatalogTable.initHoodieTable(HoodieCatalogTable.scala:156)
        at 
org.apache.spark.sql.hudi.command.CreateHoodieTableCommand.run(CreateHoodieTableCommand.scala:67)
        at 
org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:70)
        at 
org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:68)
        at 
org.apache.spark.sql.execution.command.ExecutedCommandExec.executeCollect(commands.scala:79)
        at 
org.apache.spark.sql.Dataset.$anonfun$logicalPlan$1(Dataset.scala:230)
        at 
org.apache.spark.sql.Dataset.$anonfun$withAction$1(Dataset.scala:3751)
        at 
org.apache.spark.sql.catalyst.QueryPlanningTracker$.withTracker(QueryPlanningTracker.scala:107)
        at 
org.apache.spark.sql.execution.SQLExecution$.withTracker(SQLExecution.scala:232)
        at 
org.apache.spark.sql.execution.SQLExecution$.executeQuery$1(SQLExecution.scala:110)
        at 
org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$6(SQLExecution.scala:135)
        at 
org.apache.spark.sql.catalyst.QueryPlanningTracker$.withTracker(QueryPlanningTracker.scala:107)
        at 
org.apache.spark.sql.execution.SQLExecution$.withTracker(SQLExecution.scala:232)
        at 
org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:135)
        at 
org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:253)
        at 
org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:134)
        at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775)
        at 
org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:68)
        at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3749)
        at org.apache.spark.sql.Dataset.<init>(Dataset.scala:230)
        at org.apache.spark.sql.Dataset$.$anonfun$ofRows$2(Dataset.scala:101)
        at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775)
        at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:98)
        at 
org.apache.spark.sql.SparkSession.$anonfun$sql$1(SparkSession.scala:618)
        at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775)
        at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:613)
        at org.apache.spark.sql.SQLContext.sql(SQLContext.scala:650)
        at 
org.apache.spark.sql.hive.thriftserver.SparkSQLDriver.run(SparkSQLDriver.scala:67)
        at 
org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.processCmd(SparkSQLCLIDriver.scala:381)
        at 
org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.$anonfun$processLine$1(SparkSQLCLIDriver.scala:500)
        at 
org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.$anonfun$processLine$1$adapted(SparkSQLCLIDriver.scala:494)
        at scala.collection.Iterator.foreach(Iterator.scala:941)
        at scala.collection.Iterator.foreach$(Iterator.scala:941)
        at scala.collection.AbstractIterator.foreach(Iterator.scala:1429)
        at scala.collection.IterableLike.foreach(IterableLike.scala:74)
        at scala.collection.IterableLike.foreach$(IterableLike.scala:73)
        at scala.collection.AbstractIterable.foreach(Iterable.scala:56)
        at 
org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.processLine(SparkSQLCLIDriver.scala:494)
        at 
org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver$.main(SparkSQLCLIDriver.scala:284)
        at 
org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.main(SparkSQLCLIDriver.scala)
        at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
        at 
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
        at 
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
        at java.lang.reflect.Method.invoke(Method.java:498)
        at 
org.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)
        at 
org.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:959)
        at 
org.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)
        at org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)
        at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)
        at 
org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1047)
        at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1056)
        at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)`
   Thanks for the help.
   
   


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to