garyli1019 commented on a change in pull request #2497:
URL: https://github.com/apache/hudi/pull/2497#discussion_r566552361



##########
File path: 
hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/hudi/HoodieMergeOnReadRDD.scala
##########
@@ -285,7 +289,14 @@ class HoodieMergeOnReadRDD(@transient sc: SparkContext,
 
       private def mergeRowWithLog(curRow: InternalRow, curKey: String) = {
         val historyAvroRecord = 
serializer.serialize(curRow).asInstanceOf[GenericRecord]
-        
logRecords.get(curKey).getData.combineAndGetUpdateValue(historyAvroRecord, 
tableAvroSchema)
+        if (preCombineField != null) {
+          val payloadProps = new Properties()

Review comment:
       we are creating a new `Properties` in every call, can we put this 
outside?

##########
File path: hudi-cli/src/main/java/org/apache/hudi/cli/commands/TableCommand.java
##########
@@ -108,7 +108,7 @@ public String createTable(
 
     final HoodieTableType tableType = HoodieTableType.valueOf(tableTypeStr);
     HoodieTableMetaClient.initTableType(HoodieCLI.conf, path, tableType, name, 
archiveFolder,
-        payloadClass, layoutVersion);
+        payloadClass, null, layoutVersion);

Review comment:
       can we add a new method `initTableType` to handle all the null being 
added?

##########
File path: 
hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/hudi/MergeOnReadSnapshotRelation.scala
##########
@@ -50,7 +50,8 @@ case class HoodieMergeOnReadTableState(tableStructSchema: 
StructType,
                                        requiredStructSchema: StructType,
                                        tableAvroSchema: String,
                                        requiredAvroSchema: String,
-                                       hoodieRealtimeFileSplits: 
List[HoodieMergeOnReadFileSplit])
+                                       hoodieRealtimeFileSplits: 
List[HoodieMergeOnReadFileSplit],
+                                       preCombineField: String)

Review comment:
       can we make this field `option` instead of using `null`?

##########
File path: 
hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/hudi/MergeOnReadIncrementalRelation.scala
##########
@@ -78,7 +78,16 @@ class MergeOnReadIncrementalRelation(val sqlContext: 
SQLContext,
   private val tableStructSchema = 
AvroConversionUtils.convertAvroSchemaToStructType(tableAvroSchema)
   private val maxCompactionMemoryInBytes = 
getMaxCompactionMemoryInBytes(jobConf)
   private val fileIndex = buildFileIndex()
-
+  private val preCombineField = {
+    val fieldFromTableConfig = metaClient.getTableConfig.getPreCombineField
+    if (fieldFromTableConfig != null) {
+      fieldFromTableConfig
+    } else if 
(optParams.contains(DataSourceWriteOptions.PRECOMBINE_FIELD_OPT_KEY)) {

Review comment:
       can we use the `HoodieTableConfig` instead? or somehow translate all 
precombine field options into one place and deprecate others. Using the write 
option while reading sounds a bit odd. 

##########
File path: 
hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/hudi/MergeOnReadIncrementalRelation.scala
##########
@@ -78,7 +78,16 @@ class MergeOnReadIncrementalRelation(val sqlContext: 
SQLContext,
   private val tableStructSchema = 
AvroConversionUtils.convertAvroSchemaToStructType(tableAvroSchema)
   private val maxCompactionMemoryInBytes = 
getMaxCompactionMemoryInBytes(jobConf)
   private val fileIndex = buildFileIndex()
-
+  private val preCombineField = {
+    val fieldFromTableConfig = metaClient.getTableConfig.getPreCombineField

Review comment:
       `preCombineFieldFromTableConfig` sounds better?

##########
File path: 
hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/hudi/HoodieMergeOnReadRDD.scala
##########
@@ -18,6 +18,8 @@
 
 package org.apache.hudi
 
+import java.util.Properties

Review comment:
       nit: this import should be in the next group

##########
File path: 
hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/hudi/MergeOnReadIncrementalRelation.scala
##########
@@ -78,7 +78,16 @@ class MergeOnReadIncrementalRelation(val sqlContext: 
SQLContext,
   private val tableStructSchema = 
AvroConversionUtils.convertAvroSchemaToStructType(tableAvroSchema)
   private val maxCompactionMemoryInBytes = 
getMaxCompactionMemoryInBytes(jobConf)
   private val fileIndex = buildFileIndex()
-
+  private val preCombineField = {
+    val fieldFromTableConfig = metaClient.getTableConfig.getPreCombineField
+    if (fieldFromTableConfig != null) {

Review comment:
       If the field does not exist, will this be an empty string or null?




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to