[
https://issues.apache.org/jira/browse/HUDI-3708?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Raymond Xu updated HUDI-3708:
-----------------------------
Sprint: Hudi-Sprint-Mar-22
> Upsert to metadata table fails due to schema change
> ---------------------------------------------------
>
> Key: HUDI-3708
> URL: https://issues.apache.org/jira/browse/HUDI-3708
> Project: Apache Hudi
> Issue Type: Bug
> Reporter: Ethan Guo
> Priority: Blocker
> Fix For: 0.11.0
>
>
> Scenario: Deltastreamer continuous mode, COW table, single writer with async
> clustering and cleaning. Only files partition is enabled in metadata table.
> The table is written before the metadata schema change (adding "columnName").
> When using the new writer with the new schema, the upsert to metadata table
> fails with schema compatibility check.
> {code:java}
> 22/03/23 23:11:38 WARN CleanActionExecutor: Failed to perform previous clean
> operation, instant: [==>20220314172020474__clean__INFLIGHT]
> org.apache.hudi.exception.HoodieUpsertException: Failed upsert schema
> compatibility check.
> at
> org.apache.hudi.table.HoodieTable.validateUpsertSchema(HoodieTable.java:729)
> at
> org.apache.hudi.client.SparkRDDWriteClient.upsertPreppedRecords(SparkRDDWriteClient.java:169)
> at
> org.apache.hudi.metadata.SparkHoodieBackedTableMetadataWriter.commit(SparkHoodieBackedTableMetadataWriter.java:154)
> at
> org.apache.hudi.metadata.HoodieBackedTableMetadataWriter.processAndCommit(HoodieBackedTableMetadataWriter.java:670)
> at
> org.apache.hudi.metadata.HoodieBackedTableMetadataWriter.update(HoodieBackedTableMetadataWriter.java:694)
> at
> org.apache.hudi.table.action.BaseActionExecutor.lambda$writeTableMetadata$1(BaseActionExecutor.java:69)
> at org.apache.hudi.common.util.Option.ifPresent(Option.java:97)
> at
> org.apache.hudi.table.action.BaseActionExecutor.writeTableMetadata(BaseActionExecutor.java:69)
> at
> org.apache.hudi.table.action.clean.CleanActionExecutor.runClean(CleanActionExecutor.java:211)
> at
> org.apache.hudi.table.action.clean.CleanActionExecutor.runPendingClean(CleanActionExecutor.java:176)
> at
> org.apache.hudi.table.action.clean.CleanActionExecutor.lambda$execute$6(CleanActionExecutor.java:238)
> at java.util.ArrayList.forEach(ArrayList.java:1259)
> at
> org.apache.hudi.table.action.clean.CleanActionExecutor.execute(CleanActionExecutor.java:232)
> at
> org.apache.hudi.table.HoodieSparkCopyOnWriteTable.clean(HoodieSparkCopyOnWriteTable.java:339)
> at
> org.apache.hudi.client.BaseHoodieWriteClient.clean(BaseHoodieWriteClient.java:781)
> at
> org.apache.hudi.client.BaseHoodieWriteClient.clean(BaseHoodieWriteClient.java:738)
> at
> org.apache.hudi.async.AsyncCleanerService.lambda$startService$0(AsyncCleanerService.java:55)
> at
> java.util.concurrent.CompletableFuture$AsyncSupply.run(CompletableFuture.java:1604)
> at
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
> at
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
> at java.lang.Thread.run(Thread.java:748)
> Caused by: org.apache.hudi.exception.HoodieException: Failed schema
> compatibility check for writerSchema
> :{"type":"record","name":"HoodieMetadataRecord","namespace":"org.apache.hudi.avro.model","doc":"A
> record saved within the Metadata
> Table","fields":[{"name":"_hoodie_commit_time","type":["null","string"],"doc":"","default":null},{"name":"_hoodie_commit_seqno","type":["null","string"],"doc":"","default":null},{"name":"_hoodie_record_key","type":["null","string"],"doc":"","default":null},{"name":"_hoodie_partition_path","type":["null","string"],"doc":"","default":null},{"name":"_hoodie_file_name","type":["null","string"],"doc":"","default":null},{"name":"key","type":{"type":"string","avro.java.string":"String"}},{"name":"type","type":"int","doc":"Type
> of the metadata
> record"},{"name":"filesystemMetadata","type":["null",{"type":"map","values":{"type":"record","name":"HoodieMetadataFileInfo","fields":[{"name":"size","type":"long","doc":"Size
> of the file"},{"name":"isDeleted","type":"boolean","doc":"True if this file
> has been deleted"}]},"avro.java.string":"String"}],"doc":"Contains
> information about partitions and files within the
> dataset"},{"name":"BloomFilterMetadata","type":["null",{"type":"record","name":"HoodieMetadataBloomFilter","doc":"Data
> file bloom filter
> details","fields":[{"name":"type","type":{"type":"string","avro.java.string":"String"},"doc":"Bloom
> filter type
> code"},{"name":"timestamp","type":{"type":"string","avro.java.string":"String"},"doc":"Instant
> timestamp when this metadata was
> created/updated"},{"name":"bloomFilter","type":"bytes","doc":"Bloom filter
> binary byte array"},{"name":"isDeleted","type":"boolean","doc":"Bloom filter
> entry valid/deleted flag"}]}],"doc":"Metadata Index of bloom filters for all
> data files in the user
> table","default":null},{"name":"ColumnStatsMetadata","type":["null",{"type":"record","name":"HoodieMetadataColumnStats","doc":"Data
> file column
> statistics","fields":[{"name":"fileName","type":["null",{"type":"string","avro.java.string":"String"}],"doc":"File
> name for which this column statistics
> applies"},{"name":"columnName","type":["null",{"type":"string","avro.java.string":"String"}],"doc":"Column
> name for which this column statistics
> applies"},{"name":"minValue","type":["null",{"type":"string","avro.java.string":"String"}],"doc":"Minimum
> value in the range. Based on user data table schema, we can convert this to
> appropriate
> type"},{"name":"maxValue","type":["null",{"type":"string","avro.java.string":"String"}],"doc":"Maximum
> value in the range. Based on user data table schema, we can convert it to
> appropriate type"},{"name":"valueCount","type":["null","long"],"doc":"Total
> count of values"},{"name":"nullCount","type":["null","long"],"doc":"Total
> count of null
> values"},{"name":"totalSize","type":["null","long"],"doc":"Total storage size
> on disk"},{"name":"totalUncompressedSize","type":["null","long"],"doc":"Total
> uncompressed storage size on
> disk"},{"name":"isDeleted","type":"boolean","doc":"Column range entry
> valid/deleted flag"}]}],"doc":"Metadata Index of column statistics for all
> data files in the user table","default":null}]}, table schema
> :{"type":"record","name":"HoodieMetadataRecord","namespace":"org.apache.hudi.avro.model","doc":"A
> record saved within the Metadata
> Table","fields":[{"name":"_hoodie_commit_time","type":["null","string"],"doc":"","default":null},{"name":"_hoodie_commit_seqno","type":["null","string"],"doc":"","default":null},{"name":"_hoodie_record_key","type":["null","string"],"doc":"","default":null},{"name":"_hoodie_partition_path","type":["null","string"],"doc":"","default":null},{"name":"_hoodie_file_name","type":["null","string"],"doc":"","default":null},{"name":"key","type":{"type":"string","avro.java.string":"String"}},{"name":"type","type":"int","doc":"Type
> of the metadata
> record"},{"name":"filesystemMetadata","type":["null",{"type":"map","values":{"type":"record","name":"HoodieMetadataFileInfo","fields":[{"name":"size","type":"long","doc":"Size
> of the file"},{"name":"isDeleted","type":"boolean","doc":"True if this file
> has been deleted"}]},"avro.java.string":"String"}],"doc":"Contains
> information about partitions and files within the
> dataset"},{"name":"BloomFilterMetadata","type":["null",{"type":"record","name":"HoodieMetadataBloomFilter","doc":"Data
> file bloom filter
> details","fields":[{"name":"type","type":{"type":"string","avro.java.string":"String"},"doc":"Bloom
> filter type
> code"},{"name":"timestamp","type":{"type":"string","avro.java.string":"String"},"doc":"Instant
> timestamp when this metadata was
> created/updated"},{"name":"bloomFilter","type":"bytes","doc":"Bloom filter
> binary byte array"},{"name":"isDeleted","type":"boolean","doc":"Bloom filter
> entry valid/deleted flag"}]}],"doc":"Metadata Index of bloom filters for all
> data files in the user
> table","default":null},{"name":"ColumnStatsMetadata","type":["null",{"type":"record","name":"HoodieMetadataColumnStats","doc":"Data
> file column
> statistics","fields":[{"name":"fileName","type":["null",{"type":"string","avro.java.string":"String"}],"doc":"File
> name for which this column statistics
> applies"},{"name":"minValue","type":["null",{"type":"string","avro.java.string":"String"}],"doc":"Minimum
> value in the range. Based on user data table schema, we can convert this to
> appropriate
> type"},{"name":"maxValue","type":["null",{"type":"string","avro.java.string":"String"}],"doc":"Maximum
> value in the range. Based on user data table schema, we can convert it to
> appropriate type"},{"name":"valueCount","type":["null","long"],"doc":"Total
> count of values"},{"name":"nullCount","type":["null","long"],"doc":"Total
> count of null
> values"},{"name":"totalSize","type":["null","long"],"doc":"Total storage size
> on disk"},{"name":"totalUncompressedSize","type":["null","long"],"doc":"Total
> uncompressed storage size on
> disk"},{"name":"isDeleted","type":"boolean","doc":"Column range entry
> valid/deleted flag"}]}],"doc":"Metadata Index of column statistics for all
> data files in the user table","default":null}]}, base path
> :file:/Users/ethan/Work/scripts/mt_rollout_testing/deploy_b_single_writer_async_services/b3_ds_cow_010mt_011mt_conf_fix2/test_table/.hoodie/metadata
> at org.apache.hudi.table.HoodieTable.validateSchema(HoodieTable.java:721)
> at
> org.apache.hudi.table.HoodieTable.validateUpsertSchema(HoodieTable.java:727)
> ... 20 more {code}
>
>
--
This message was sent by Atlassian Jira
(v8.20.1#820001)