Voon Hou created HUDI-9380:
------------------------------

             Summary: Fix HoodieTableMetadataUtil#collectColumnRangeMetadata to 
handle null date types
                 Key: HUDI-9380
                 URL: https://issues.apache.org/jira/browse/HUDI-9380
             Project: Apache Hudi
          Issue Type: Bug
            Reporter: Voon Hou
            Assignee: Voon Hou


HoodieTableMetadataUtil#collectColumnRangeMetadata is not able to handle cases 
where there are null date types. 

This case can be triggered with the following routines:
{code:java}
test("Create table for comprehensive type testing") {
  withTempDir { tmp =>
    val tableName = "hudi_type_test_mor"
    spark.sql(
      s"""
         |CREATE TABLE $tableName (
         |  uuid STRING,
         |  precombine_field LONG,
         |  col_double DOUBLE,
         |  array_struct ARRAY<STRUCT<inner_f3: TIMESTAMP, inner_f4: STRING>>,
         |  part_col STRING
         |) USING hudi
         | LOCATION '${tmp.getCanonicalPath}'
         | TBLPROPERTIES (
         |  primaryKey = 'uuid',
         |  type = 'mor',
         |  preCombineField = 'precombine_field'
         | )
         | PARTITIONED BY (part_col)
      """.stripMargin)
    // directly write to new parquet file
    spark.sql(s"set hoodie.parquet.small.file.limit=0")
    spark.sql(s"set hoodie.metadata.compact.max.delta.commits=1")
    // partition stats index is enabled together with column stats index
    spark.sql(s"set hoodie.metadata.index.column.stats.enable=true")
    spark.sql(s"set hoodie.metadata.record.index.enable=true")
    spark.sql(s"set hoodie.metadata.index.secondary.enable=true")

    // Insert row 1 into partition 'A'
    spark.sql(
      s"""
         | INSERT INTO $tableName VALUES (
         |  'uuid1', 1000L, 1.1,
         |  array(struct(cast('2023-11-11 11:11:11' as timestamp), 'asd'), 
struct(cast('2023-11-11 11:11:11' as timestamp), 'ghj')),
         |  'A'
         | )
    """.stripMargin)

    spark.sql(s"CREATE INDEX idx_double ON $tableName (col_double)")

    spark.sql(s"select * from $tableName").show(truncate=false)

    // Generate log files through updates on partition 'A'
    spark.sql(s"UPDATE $tableName SET col_double = col_double + 100, 
precombine_field = precombine_field + 1 WHERE part_col = 'A'")
  }
} {code}
Error:
{code:java}
Caused by: org.apache.hudi.exception.HoodieAppendException: Failed while 
appending records to 
file:/private/var/folders/vh/zgs02hf51dn7r08pbl5m2jc00000gn/T/spark-23f1d487-3b77-48df-b923-ffb219a4d835/part_col=B/.48c1060a-3d1d-43af-9e9c-abbfd8cca16d-0_20250506045258975.log.1_0-165-479
    at 
org.apache.hudi.io.HoodieAppendHandle.appendDataAndDeleteBlocks(HoodieAppendHandle.java:497)
    at 
org.apache.hudi.io.HoodieAppendHandle.doAppend(HoodieAppendHandle.java:456)
    at 
org.apache.hudi.table.action.deltacommit.BaseSparkDeltaCommitActionExecutor.handleUpdate(BaseSparkDeltaCommitActionExecutor.java:83)
    at 
org.apache.hudi.table.action.commit.BaseSparkCommitActionExecutor.handleUpsertPartition(BaseSparkCommitActionExecutor.java:321)
    ... 33 more
Caused by: java.lang.NullPointerException
    at 
org.apache.hudi.metadata.HoodieTableMetadataUtil.lambda$null$1(HoodieTableMetadataUtil.java:277)
    at java.util.ArrayList.forEach(ArrayList.java:1259)
    at 
org.apache.hudi.metadata.HoodieTableMetadataUtil.lambda$collectColumnRangeMetadata$2(HoodieTableMetadataUtil.java:269)
    at java.util.ArrayList.forEach(ArrayList.java:1259)
    at 
org.apache.hudi.metadata.HoodieTableMetadataUtil.collectColumnRangeMetadata(HoodieTableMetadataUtil.java:266)
    at 
org.apache.hudi.io.HoodieAppendHandle.processAppendResult(HoodieAppendHandle.java:435)
    at 
org.apache.hudi.io.HoodieAppendHandle.appendDataAndDeleteBlocks(HoodieAppendHandle.java:490)
    ... 36 more {code}



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

Reply via email to