[ https://issues.apache.org/jira/browse/HUDI-3005?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
waywtdcc updated HUDI-3005: --------------------------- Fix Version/s: 0.9.0 > flink write hudi throw Not an Avro data file exception > ------------------------------------------------------ > > Key: HUDI-3005 > URL: https://issues.apache.org/jira/browse/HUDI-3005 > Project: Apache Hudi > Issue Type: Bug > Components: flink > Affects Versions: 0.9.0 > Reporter: waywtdcc > Priority: Major > Fix For: 0.9.0 > > Attachments: screenshot-1.png > > > An error is reported when the Flink is written. > *code:* > {code:java} > CREATE TABLE datagen_test ( > id BIGINT, > name VARCHAR(20), > age int, > birthday TIMESTAMP(3), > ts TIMESTAMP(3) > ) WITH ( > 'connector' = 'datagen', > 'rows-per-second'= '20', > 'fields.id.min' = '1', > 'fields.id.max' = '10000' > ); > CREATE TABLE datagen_hudi_test2( > id bigint , > name string, > birthday TIMESTAMP(3), > ts TIMESTAMP(3), > `partition_str` VARCHAR(20), > primary key(id) not enforced --必须指定uuid 主键 > ) > PARTITIONED BY (`partition_str`) > with( > 'connector'='hudi', > 'path'= 'hdfs:///user/hive/warehouse/hudi.db/datagen_hudi_test2' > , 'hoodie.datasource.write.recordkey.field'= 'id'-- 主键 > , 'write.precombine.field'= 'ts'-- 自动precombine的字段 > , 'write.tasks'= '1' > , 'compaction.tasks'= '1' > , 'write.rate.limit'= '2000'-- 限速 > , 'table.type'= 'MERGE_ON_READ'-- 默认COPY_ON_WRITE,可选MERGE_ON_READ > , 'compaction.async.enabled'= 'true'-- 是否开启异步压缩 > , 'compaction.trigger.strategy'= 'num_commits'-- 按次数压缩 > , 'compaction.delta_commits'= '5', -- 默认为5 > 'hive_sync.enable' = 'true', > 'hive_sync.mode' = 'hms' , > 'hive_sync.metastore.uris' = '***', > 'hive_sync.table'='datagen_hudi_test2_hivesync', > 'hive_sync.db'='hudi' , > 'index.global.enabled' = 'true' > ); > insert into test.datagen_hudi_test2 > select id,name,birthday,ts as ts,DATE_FORMAT(birthday, 'yyyyMMdd') as > `partition_str` > from test.datagen_test; > {code} > *error:* > {code:java} > 2021-12-14 10:18:53,554 INFO > org.apache.hudi.common.table.log.HoodieLogFormatWriter [] - > HoodieLogFile{pathStr='hdfs:/user/hive/warehouse/hudi.db/datagen_hudi_test2/.hoodie/.commits_.archive.1_1-0-1', > fileLen=0} exists. Appending to existing file > 2021-12-14 10:18:53,880 ERROR org.apache.hudi.table.HoodieTimelineArchiveLog > [] - Failed to archive commits, .commit file: > 20211210103323.rollback > org.apache.hudi.org.apache.avro.InvalidAvroMagicException: Not an Avro data > file > at > org.apache.hudi.org.apache.avro.file.DataFileReader.openReader(DataFileReader.java:56) > ~[hudi-flink-bundle_2.11-0.9.0.jar:0.9.0] > at > org.apache.hudi.common.table.timeline.TimelineMetadataUtils.deserializeAvroMetadata(TimelineMetadataUtils.java:178) > ~[hudi-flink-bundle_2.11-0.9.0.jar:0.9.0] > at > org.apache.hudi.client.utils.MetadataConversionUtils.createMetaWrapper(MetadataConversionUtils.java:103) > ~[hudi-flink-bundle_2.11-0.9.0.jar:0.9.0] > at > org.apache.hudi.table.HoodieTimelineArchiveLog.convertToAvroRecord(HoodieTimelineArchiveLog.java:341) > ~[hudi-flink-bundle_2.11-0.9.0.jar:0.9.0] > at > org.apache.hudi.table.HoodieTimelineArchiveLog.archive(HoodieTimelineArchiveLog.java:305) > ~[hudi-flink-bundle_2.11-0.9.0.jar:0.9.0] > at > org.apache.hudi.table.HoodieTimelineArchiveLog.archiveIfRequired(HoodieTimelineArchiveLog.java:128) > ~[hudi-flink-bundle_2.11-0.9.0.jar:0.9.0] > at > org.apache.hudi.client.HoodieFlinkWriteClient.postCommit(HoodieFlinkWriteClient.java:323) > ~[hudi-flink-bundle_2.11-0.9.0.jar:0.9.0] > at > org.apache.hudi.client.AbstractHoodieWriteClient.commitStats(AbstractHoodieWriteClient.java:191) > ~[hudi-flink-bundle_2.11-0.9.0.jar:0.9.0] > at > org.apache.hudi.client.HoodieFlinkWriteClient.commit(HoodieFlinkWriteClient.java:120) > ~[hudi-flink-bundle_2.11-0.9.0.jar:0.9.0] > at > org.apache.hudi.sink.StreamWriteOperatorCoordinator.doCommit(StreamWriteOperatorCoordinator.java:462) > ~[hudi-flink-bundle_2.11-0.9.0.jar:0.9.0] > at > org.apache.hudi.sink.StreamWriteOperatorCoordinator.commitInstant(StreamWriteOperatorCoordinator.java:438) > ~[hudi-flink-bundle_2.11-0.9.0.jar:0.9.0] > at > org.apache.hudi.sink.StreamWriteOperatorCoordinator.lambda$notifyCheckpointComplete$1(StreamWriteOperatorCoordinator.java:207) > ~[hudi-flink-bundle_2.11-0.9.0.jar:0.9.0] > at > org.apache.hudi.sink.utils.NonThrownExecutor.lambda$execute$0(NonThrownExecutor.java:67) > ~[hudi-flink-bundle_2.11-0.9.0.jar:0.9.0] > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > [?:1.8.0_221] > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > [?:1.8.0_221] > at java.lang.Thread.run(Thread.java:748) [?:1.8.0_221] > 2021-12-14 10:18:53,895 ERROR > org.apache.hudi.sink.StreamWriteOperatorCoordinator [] - Executor > executes action [commits the instant 20211214101738] error > org.apache.hudi.exception.HoodieCommitException: Failed to archive commits > at > org.apache.hudi.table.HoodieTimelineArchiveLog.archive(HoodieTimelineArchiveLog.java:318) > ~[hudi-flink-bundle_2.11-0.9.0.jar:0.9.0] > at > org.apache.hudi.table.HoodieTimelineArchiveLog.archiveIfRequired(HoodieTimelineArchiveLog.java:128) > ~[hudi-flink-bundle_2.11-0.9.0.jar:0.9.0] > at > org.apache.hudi.client.HoodieFlinkWriteClient.postCommit(HoodieFlinkWriteClient.java:323) > ~[hudi-flink-bundle_2.11-0.9.0.jar:0.9.0] > at > org.apache.hudi.client.AbstractHoodieWriteClient.commitStats(AbstractHoodieWriteClient.java:191) > ~[hudi-flink-bundle_2.11-0.9.0.jar:0.9.0] > at > org.apache.hudi.client.HoodieFlinkWriteClient.commit(HoodieFlinkWriteClient.java:120) > ~[hudi-flink-bundle_2.11-0.9.0.jar:0.9.0] > at > org.apache.hudi.sink.StreamWriteOperatorCoordinator.doCommit(StreamWriteOperatorCoordinator.java:462) > ~[hudi-flink-bundle_2.11-0.9.0.jar:0.9.0] > at > org.apache.hudi.sink.StreamWriteOperatorCoordinator.commitInstant(StreamWriteOperatorCoordinator.java:438) > ~[hudi-flink-bundle_2.11-0.9.0.jar:0.9.0] > at > org.apache.hudi.sink.StreamWriteOperatorCoordinator.lambda$notifyCheckpointComplete$1(StreamWriteOperatorCoordinator.java:207) > ~[hudi-flink-bundle_2.11-0.9.0.jar:0.9.0] > at > org.apache.hudi.sink.utils.NonThrownExecutor.lambda$execute$0(NonThrownExecutor.java:67) > ~[hudi-flink-bundle_2.11-0.9.0.jar:0.9.0] > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > [?:1.8.0_221] > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > [?:1.8.0_221] > at java.lang.Thread.run(Thread.java:748) [?:1.8.0_221] > Caused by: org.apache.hudi.org.apache.avro.InvalidAvroMagicException: Not an > Avro data file > at > org.apache.hudi.org.apache.avro.file.DataFileReader.openReader(DataFileReader.java:56) > ~[hudi-flink-bundle_2.11-0.9.0.jar:0.9.0] > at > org.apache.hudi.common.table.timeline.TimelineMetadataUtils.deserializeAvroMetadata(TimelineMetadataUtils.java:178) > ~[hudi-flink-bundle_2.11-0.9.0.jar:0.9.0] > at > org.apache.hudi.client.utils.MetadataConversionUtils.createMetaWrapper(MetadataConversionUtils.java:103) > ~[hudi-flink-bundle_2.11-0.9.0.jar:0.9.0] > at > org.apache.hudi.table.HoodieTimelineArchiveLog.convertToAvroRecord(HoodieTimelineArchiveLog.java:341) > ~[hudi-flink-bundle_2.11-0.9.0.jar:0.9.0] > at > org.apache.hudi.table.HoodieTimelineArchiveLog.archive(HoodieTimelineArchiveLog.java:305) > ~[hudi-flink-bundle_2.11-0.9.0.jar:0.9.0] > ... 11 more > 2021-12-14 10:18:53,896 INFO > org.apache.hudi.sink.StreamWriteOperatorCoordinator [] - Executor > executes action [taking checkpoint 2] success! > 2021-12-14 10:18:53,912 INFO org.apache.flink.runtime.jobmaster.JobMaster > [] - Trying to recover from a global failure. > org.apache.flink.util.FlinkException: Global failure triggered by > OperatorCoordinator for 'hoodie_stream_write' (operator > 37faf88697f0a69c783562897fa7eaeb). > at > org.apache.flink.runtime.operators.coordination.OperatorCoordinatorHolder$LazyInitializedCoordinatorContext.failJob(OperatorCoordinatorHolder.java:492) > ~[flink-dist_2.11-1.12.2.jar:1.12.2] > at > org.apache.hudi.sink.utils.CoordinatorExecutor.exceptionHook(CoordinatorExecutor.java:44) > ~[hudi-flink-bundle_2.11-0.9.0.jar:0.9.0] > at > org.apache.hudi.sink.utils.NonThrownExecutor.lambda$execute$0(NonThrownExecutor.java:76) > ~[hudi-flink-bundle_2.11-0.9.0.jar:0.9.0] > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > ~[?:1.8.0_221] > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > ~[?:1.8.0_221] > at java.lang.Thread.run(Thread.java:748) ~[?:1.8.0_221] > Caused by: org.apache.hudi.exception.HoodieException: Executor executes > action [commits the instant 20211214101738] error > ... 5 more > Caused by: org.apache.hudi.exception.HoodieCommitException: Failed to archive > commits > at > org.apache.hudi.table.HoodieTimelineArchiveLog.archive(HoodieTimelineArchiveLog.java:318) > ~[hudi-flink-bundle_2.11-0.9.0.jar:0.9.0] > at > org.apache.hudi.table.HoodieTimelineArchiveLog.archiveIfRequired(HoodieTimelineArchiveLog.java:128) > ~[hudi-flink-bundle_2.11-0.9.0.jar:0.9.0] > at > org.apache.hudi.client.HoodieFlinkWriteClient.postCommit(HoodieFlinkWriteClient.java:323) > ~[hudi-flink-bundle_2.11-0.9.0.jar:0.9.0] > at > org.apache.hudi.client.AbstractHoodieWriteClient.commitStats(AbstractHoodieWriteClient.java:191) > ~[hudi-flink-bundle_2.11-0.9.0.jar:0.9.0] > at > org.apache.hudi.client.HoodieFlinkWriteClient.commit(HoodieFlinkWriteClient.java:120) > ~[hudi-flink-bundle_2.11-0.9.0.jar:0.9.0] > at > org.apache.hudi.sink.StreamWriteOperatorCoordinator.doCommit(StreamWriteOperatorCoordinator.java:462) > ~[hudi-flink-bundle_2.11-0.9.0.jar:0.9.0] > at > org.apache.hudi.sink.StreamWriteOperatorCoordinator.commitInstant(StreamWriteOperatorCoordinator.java:438) > ~[hudi-flink-bundle_2.11-0.9.0.jar:0.9.0] > at > org.apache.hudi.sink.StreamWriteOperatorCoordinator.lambda$notifyCheckpointComplete$1(StreamWriteOperatorCoordinator.java:207) > ~[hudi-flink-bundle_2.11-0.9.0.jar:0.9.0] > at > org.apache.hudi.sink.utils.NonThrownExecutor.lambda$execute$0(NonThrownExecutor.java:67) > ~[hudi-flink-bundle_2.11-0.9.0.jar:0.9.0] > ... 3 more > Caused by: org.apache.hudi.org.apache.avro.InvalidAvroMagicException: Not an > Avro data file > at > org.apache.hudi.org.apache.avro.file.DataFileReader.openReader(DataFileReader.java:56) > ~[hudi-flink-bundle_2.11-0.9.0.jar:0.9.0] > at > org.apache.hudi.common.table.timeline.TimelineMetadataUtils.deserializeAvroMetadata(TimelineMetadataUtils.java:178) > ~[hudi-flink-bundle_2.11-0.9.0.jar:0.9.0] > at > org.apache.hudi.client.utils.MetadataConversionUtils.createMetaWrapper(MetadataConversionUtils.java:103) > ~[hudi-flink-bundle_2.11-0.9.0.jar:0.9.0] > at > org.apache.hudi.table.HoodieTimelineArchiveLog.convertToAvroRecord(HoodieTimelineArchiveLog.java:341) > ~[hudi-flink-bundle_2.11-0.9.0.jar:0.9.0] > at > org.apache.hudi.table.HoodieTimelineArchiveLog.archive(HoodieTimelineArchiveLog.java:305) > ~[hudi-flink-bundle_2.11-0.9.0.jar:0.9.0] > at > org.apache.hudi.table.HoodieTimelineArchiveLog.archiveIfRequired(HoodieTimelineArchiveLog.java:128) > ~[hudi-flink-bundle_2.11-0.9.0.jar:0.9.0] > at > org.apache.hudi.client.HoodieFlinkWriteClient.postCommit(HoodieFlinkWriteClient.java:323) > ~[hudi-flink-bundle_2.11-0.9.0.jar:0.9.0] > at > org.apache.hudi.client.AbstractHoodieWriteClient.commitStats(AbstractHoodieWriteClient.java:191) > ~[hudi-flink-bundle_2.11-0.9.0.jar:0.9.0] > at > org.apache.hudi.client.HoodieFlinkWriteClient.commit(HoodieFlinkWriteClient.java:120) > ~[hudi-flink-bundle_2.11-0.9.0.jar:0.9.0] > at > org.apache.hudi.sink.StreamWriteOperatorCoordinator.doCommit(StreamWriteOperatorCoordinator.java:462) > ~[hudi-flink-bundle_2.11-0.9.0.jar:0.9.0] > at > org.apache.hudi.sink.StreamWriteOperatorCoordinator.commitInstant(StreamWriteOperatorCoordinator.java:438) > ~[hudi-flink-bundle_2.11-0.9.0.jar:0.9.0] > at > org.apache.hudi.sink.StreamWriteOperatorCoordinator.lambda$notifyCheckpointComplete$1(StreamWriteOperatorCoordinator.java:207) > ~[hudi-flink-bundle_2.11-0.9.0.jar:0.9.0] > at > org.apache.hudi.sink.utils.NonThrownExecutor.lambda$execute$0(NonThrownExecutor.java:67) > ~[hudi-flink-bundle_2.11-0.9.0.jar:0.9.0] > ... 3 more{code} -- This message was sent by Atlassian Jira (v8.20.1#820001)