[
https://issues.apache.org/jira/browse/HUDI-6753?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17761294#comment-17761294
]
Lokesh Jain commented on HUDI-6753:
-----------------------------------
The stacktrace is always seen for the test even when test passes.
The stacktrace shows that the file schema and requested schema differs while
reading parquet. FileSchema is the schema from parquet file and requested
schema is schema during read.
File schema:
{code:java}
message spark_schema {
optional binary _hoodie_commit_time (STRING);
optional binary _hoodie_commit_seqno (STRING);
optional binary _hoodie_record_key (STRING);
optional binary _hoodie_partition_path (STRING);
optional binary _hoodie_file_name (STRING);
required int64 timestamp;
required binary _row_key (STRING);
required binary partition_path (STRING);
required binary trip_type (STRING);
required binary rider (STRING);
required binary driver (STRING);
required double begin_lat;
required double begin_lon;
required double end_lat;
required double end_lon;
required int32 distance_in_meters;
required int64 seconds_since_epoch;
required float weight;
required binary nation;
required int32 current_date (DATE);
required int64 current_ts;
required int64 height (DECIMAL(10,6));
required group city_to_state (MAP) {
repeated group key_value
{ required binary key (STRING); required binary value (STRING); }
}
required group fare { required double amount; required binary currency
(STRING); }
required group tip_history (LIST) {
repeated group list {
required group element { required double amount; required binary currency
(STRING); }
}
}
required boolean _hoodie_is_deleted;
required double haversine_distance;
}{code}
Requested schema:
{code:java}
message triprec {
optional binary _hoodie_commit_time (STRING);
optional binary _hoodie_commit_seqno (STRING);
optional binary _hoodie_record_key (STRING);
optional binary _hoodie_partition_path (STRING);
optional binary _hoodie_file_name (STRING);
required int64 timestamp;
required binary _row_key (STRING);
required binary partition_path (STRING);
required binary trip_type (STRING);
required binary rider (STRING);
required binary driver (STRING);
required double begin_lat;
required double begin_lon;
required double end_lat;
required double end_lon;
required int32 distance_in_meters;
required int64 seconds_since_epoch;
required float weight;
required binary nation;
required int32 current_date (DATE);
required int64 current_ts;
required fixed_len_byte_array(5) height (DECIMAL(10,6));
required group city_to_state (MAP) {
repeated group key_value { required binary key (STRING); required binary value
(STRING); }
}
required group fare
{ required double amount; required binary currency (STRING); }
required group tip_history (LIST) {
repeated group list {
required group element
{ required double amount; required binary currency (STRING); }
}
}
required boolean _hoodie_is_deleted;
required double haversine_distance;
}
{code}
> Fix parquet inline reading flaky test
> -------------------------------------
>
> Key: HUDI-6753
> URL: https://issues.apache.org/jira/browse/HUDI-6753
> Project: Apache Hudi
> Issue Type: Bug
> Components: reader-core
> Reporter: sivabalan narayanan
> Priority: Major
>
> Sometimes we see some flakiness around parquet inline reading.
>
> Ref:
> [https://dev.azure.com/apache-hudi-ci-org/785b6ef4-2f42-4a89-8f0e-5f0d7039a0cc/_apis/build/builds/19457/logs/8]
>
>
> {code:java}
> 2023-08-25T05:00:14.1359469Z 1389627 [Executor task launch worker for task
> 1.0 in stage 4124.0 (TID 5621)] ERROR
> org.apache.hudi.common.table.log.AbstractHoodieLogRecordReader [] - Got
> exception when reading log file
> 2023-08-25T05:00:14.1360427Z org.apache.hudi.exception.HoodieException:
> unable to read next record from parquet file
> 2023-08-25T05:00:14.1361525Z at
> org.apache.hudi.common.util.ParquetReaderIterator.hasNext(ParquetReaderIterator.java:54)
> ~[hudi-common-1.0.0-SNAPSHOT.jar:1.0.0-SNAPSHOT]
> 2023-08-25T05:00:14.1362403Z at
> org.apache.hudi.common.util.collection.MappingIterator.hasNext(MappingIterator.java:39)
> ~[hudi-common-1.0.0-SNAPSHOT.jar:1.0.0-SNAPSHOT]
> 2023-08-25T05:00:14.1363340Z at
> org.apache.hudi.common.util.collection.MappingIterator.hasNext(MappingIterator.java:39)
> ~[hudi-common-1.0.0-SNAPSHOT.jar:1.0.0-SNAPSHOT]
> 2023-08-25T05:00:14.1364854Z at
> org.apache.hudi.common.table.log.AbstractHoodieLogRecordReader.processDataBlock(AbstractHoodieLogRecordReader.java:625)
> ~[hudi-common-1.0.0-SNAPSHOT.jar:1.0.0-SNAPSHOT]
> 2023-08-25T05:00:14.1365985Z at
> org.apache.hudi.common.table.log.AbstractHoodieLogRecordReader.processQueuedBlocksForInstant(AbstractHoodieLogRecordReader.java:667)
> ~[hudi-common-1.0.0-SNAPSHOT.jar:1.0.0-SNAPSHOT]
> 2023-08-25T05:00:14.1367473Z at
> org.apache.hudi.common.table.log.AbstractHoodieLogRecordReader.scanInternalV1(AbstractHoodieLogRecordReader.java:362)
> ~[hudi-common-1.0.0-SNAPSHOT.jar:1.0.0-SNAPSHOT]
> 2023-08-25T05:00:14.1368371Z at
> org.apache.hudi.common.table.log.AbstractHoodieLogRecordReader.scanInternal(AbstractHoodieLogRecordReader.java:220)
> ~[hudi-common-1.0.0-SNAPSHOT.jar:1.0.0-SNAPSHOT]
> 2023-08-25T05:00:14.1369127Z at
> org.apache.hudi.common.table.log.HoodieMergedLogRecordScanner.performScan(HoodieMergedLogRecordScanner.java:201)
> ~[hudi-common-1.0.0-SNAPSHOT.jar:1.0.0-SNAPSHOT]
> 2023-08-25T05:00:14.1369901Z at
> org.apache.hudi.common.table.log.HoodieMergedLogRecordScanner.<init>(HoodieMergedLogRecordScanner.java:117)
> ~[hudi-common-1.0.0-SNAPSHOT.jar:1.0.0-SNAPSHOT]
> 2023-08-25T05:00:14.1370633Z at
> org.apache.hudi.common.table.log.HoodieMergedLogRecordScanner.<init>(HoodieMergedLogRecordScanner.java:76)
> ~[hudi-common-1.0.0-SNAPSHOT.jar:1.0.0-SNAPSHOT]
> 2023-08-25T05:00:14.1371380Z at
> org.apache.hudi.common.table.log.HoodieMergedLogRecordScanner$Builder.build(HoodieMergedLogRecordScanner.java:466)
> ~[hudi-common-1.0.0-SNAPSHOT.jar:1.0.0-SNAPSHOT]
> 2023-08-25T05:00:14.1372312Z at
> org.apache.hudi.LogFileIterator$.scanLog(Iterators.scala:371)
> ~[hudi-spark-common_2.12-1.0.0-SNAPSHOT.jar:1.0.0-SNAPSHOT]
> 2023-08-25T05:00:14.1372915Z at
> org.apache.hudi.LogFileIterator.<init>(Iterators.scala:110)
> ~[hudi-spark-common_2.12-1.0.0-SNAPSHOT.jar:1.0.0-SNAPSHOT]
> 2023-08-25T05:00:14.1373549Z at
> org.apache.hudi.RecordMergingFileIterator.<init>(Iterators.scala:201)
> ~[hudi-spark-common_2.12-1.0.0-SNAPSHOT.jar:1.0.0-SNAPSHOT]
> 2023-08-25T05:00:14.1374172Z at
> org.apache.hudi.RecordMergingFileIterator.<init>(Iterators.scala:212)
> ~[hudi-spark-common_2.12-1.0.0-SNAPSHOT.jar:1.0.0-SNAPSHOT]
> 2023-08-25T05:00:14.1374809Z at
> org.apache.hudi.RecordMergingFileIterator.<init>(Iterators.scala:217)
> ~[hudi-spark-common_2.12-1.0.0-SNAPSHOT.jar:1.0.0-SNAPSHOT]
> 2023-08-25T05:00:14.1375480Z at
> org.apache.hudi.HoodieMergeOnReadRDD.compute(HoodieMergeOnReadRDD.scala:109)
> ~[hudi-spark-common_2.12-1.0.0-SNAPSHOT.jar:1.0.0-SNAPSHOT]
> 2023-08-25T05:00:14.1376156Z at
> org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:373)
> ~[spark-core_2.12-3.2.3.jar:3.2.3]
> 2023-08-25T05:00:14.1376653Z at
> org.apache.spark.rdd.RDD.iterator(RDD.scala:337)
> ~[spark-core_2.12-3.2.3.jar:3.2.3]
> 2023-08-25T05:00:14.1377283Z at
> org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
> ~[spark-core_2.12-3.2.3.jar:3.2.3]
> 2023-08-25T05:00:14.1377837Z at
> org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:373)
> ~[spark-core_2.12-3.2.3.jar:3.2.3]
> 2023-08-25T05:00:14.1378323Z at
> org.apache.spark.rdd.RDD.iterator(RDD.scala:337)
> ~[spark-core_2.12-3.2.3.jar:3.2.3]
> 2023-08-25T05:00:14.1378855Z at
> org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
> ~[spark-core_2.12-3.2.3.jar:3.2.3]
> 2023-08-25T05:00:14.1379397Z at
> org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:373)
> ~[spark-core_2.12-3.2.3.jar:3.2.3]
> 2023-08-25T05:00:14.1379899Z at
> org.apache.spark.rdd.RDD.iterator(RDD.scala:337)
> ~[spark-core_2.12-3.2.3.jar:3.2.3]
> 2023-08-25T05:00:14.1380446Z at
> org.apache.spark.shuffle.ShuffleWriteProcessor.write(ShuffleWriteProcessor.scala:59)
> ~[spark-core_2.12-3.2.3.jar:3.2.3]
> 2023-08-25T05:00:14.1381328Z at
> org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:99)
> ~[spark-core_2.12-3.2.3.jar:3.2.3]
> 2023-08-25T05:00:14.1381902Z at
> org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:52)
> ~[spark-core_2.12-3.2.3.jar:3.2.3]
> 2023-08-25T05:00:14.1382409Z at
> org.apache.spark.scheduler.Task.run(Task.scala:131)
> ~[spark-core_2.12-3.2.3.jar:3.2.3]
> 2023-08-25T05:00:14.1383130Z at
> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> ~[spark-core_2.12-3.2.3.jar:3.2.3]
> 2023-08-25T05:00:14.1383688Z at
> org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1491)
> ~[spark-core_2.12-3.2.3.jar:3.2.3]
> 2023-08-25T05:00:14.1384226Z at
> org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> ~[spark-core_2.12-3.2.3.jar:3.2.3]
> 2023-08-25T05:00:14.1384631Z at
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
> ~[?:1.8.0_382]
> 2023-08-25T05:00:14.1385010Z at
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
> ~[?:1.8.0_382]
> 2023-08-25T05:00:14.1385313Z at java.lang.Thread.run(Thread.java:750)
> ~[?:1.8.0_382]
> 2023-08-25T05:00:14.1386139Z Caused by:
> org.apache.parquet.io.ParquetDecodingException: Can not read value at 0 in
> block -1 in file
> inlinefs://tmp/junit8419288775999068556/continuous_mor/2015/03/16/.daf7ee3a-506b-464b-b78f-776c76d7e47a-0_20230825050006621.log.1_1-4089-5572/file/?start_offset=1788&length=50947
> 2023-08-25T05:00:14.1387069Z at
> org.apache.parquet.hadoop.InternalParquetRecordReader.nextKeyValue(InternalParquetRecordReader.java:254)
> ~[parquet-hadoop-1.12.2.jar:1.12.2]
> 2023-08-25T05:00:14.1387683Z at
> org.apache.parquet.hadoop.ParquetReader.read(ParquetReader.java:132)
> ~[parquet-hadoop-1.12.2.jar:1.12.2]
> 2023-08-25T05:00:14.1388227Z at
> org.apache.parquet.hadoop.ParquetReader.read(ParquetReader.java:136)
> ~[parquet-hadoop-1.12.2.jar:1.12.2]
> 2023-08-25T05:00:14.1388947Z at
> org.apache.hudi.common.util.ParquetReaderIterator.hasNext(ParquetReaderIterator.java:49)
> ~[hudi-common-1.0.0-SNAPSHOT.jar:1.0.0-SNAPSHOT]
> 2023-08-25T05:00:14.1389273Z ... 34 more
> 2023-08-25T05:00:14.1389655Z Caused by:
> org.apache.parquet.io.ParquetDecodingException: The requested schema is not
> compatible with the file schema. incompatible types: required
> fixed_len_byte_array(5) height (DECIMAL(10,6)) != required int64 height
> (DECIMAL(10,6))
> 2023-08-25T05:00:14.1390406Z at
> org.apache.parquet.io.ColumnIOFactory$ColumnIOCreatorVisitor.incompatibleSchema(ColumnIOFactory.java:101)
> ~[parquet-column-1.12.2.jar:1.12.2]
> 2023-08-25T05:00:14.1391060Z at
> org.apache.parquet.io.ColumnIOFactory$ColumnIOCreatorVisitor.visit(ColumnIOFactory.java:93)
> ~[parquet-column-1.12.2.jar:1.12.2]
> 2023-08-25T05:00:14.1391655Z at
> org.apache.parquet.schema.PrimitiveType.accept(PrimitiveType.java:602)
> ~[parquet-column-1.12.2.jar:1.12.2]
> 2023-08-25T05:00:14.1392271Z at
> org.apache.parquet.io.ColumnIOFactory$ColumnIOCreatorVisitor.visitChildren(ColumnIOFactory.java:83)
> ~[parquet-column-1.12.2.jar:1.12.2]
> 2023-08-25T05:00:14.1392925Z at
> org.apache.parquet.io.ColumnIOFactory$ColumnIOCreatorVisitor.visit(ColumnIOFactory.java:57)
> ~[parquet-column-1.12.2.jar:1.12.2]
> 2023-08-25T05:00:14.1393517Z at
> org.apache.parquet.schema.MessageType.accept(MessageType.java:55)
> ~[parquet-column-1.12.2.jar:1.12.2]
> 2023-08-25T05:00:14.1394091Z at
> org.apache.parquet.io.ColumnIOFactory.getColumnIO(ColumnIOFactory.java:162)
> ~[parquet-column-1.12.2.jar:1.12.2]
> 2023-08-25T05:00:14.1394744Z at
> org.apache.parquet.hadoop.InternalParquetRecordReader.checkRead(InternalParquetRecordReader.java:135)
> ~[parquet-hadoop-1.12.2.jar:1.12.2]
> 2023-08-25T05:00:14.1395437Z at
> org.apache.parquet.hadoop.InternalParquetRecordReader.nextKeyValue(InternalParquetRecordReader.java:225)
> ~[parquet-hadoop-1.12.2.jar:1.12.2]
> 2023-08-25T05:00:14.1396044Z at
> org.apache.parquet.hadoop.ParquetReader.read(ParquetReader.java:132)
> ~[parquet-hadoop-1.12.2.jar:1.12.2]
> 2023-08-25T05:00:14.1396601Z at
> org.apache.parquet.hadoop.ParquetReader.read(ParquetReader.java:136)
> ~[parquet-hadoop-1.12.2.jar:1.12.2]
> 2023-08-25T05:00:14.1397241Z at
> org.apache.hudi.common.util.ParquetReaderIterator.hasNext(ParquetReaderIterator.java:49)
> ~[hudi-common-1.0.0-SNAPSHOT.jar:1.0.0-SNAPSHOT]
> 2023-08-25T05:00:14.1397572Z ... 34 more
> 2023-08-25T05:00:14.1413335Z 1389628 [Executor task launch worker for task
> 2.0 in stage 4124.0 (TID 5622)] ERROR
> org.apache.hudi.common.table.log.AbstractHoodieLogRecordReader [] - Got
> exception when reading log file
> 2023-08-25T05:00:14.1414164Z org.apache.hudi.exception.HoodieException:
> unable to read next record from parquet file
> 2023-08-25T05:00:14.1417364Z at
> org.apache.hudi.common.util.ParquetReaderIterator.hasNext(ParquetReaderIterator.java:54)
> ~[hudi-common-1.0.0-SNAPSHOT.jar:1.0.0-SNAPSHOT]
> 2023-08-25T05:00:14.1419044Z at
> org.apache.hudi.common.util.collection.MappingIterator.hasNext(MappingIterator.java:39)
> ~[hudi-common-1.0.0-SNAPSHOT.jar:1.0.0-SNAPSHOT]
> 2023-08-25T05:00:14.1419938Z at
> org.apache.hudi.common.util.collection.MappingIterator.hasNext(MappingIterator.java:39)
> ~[hudi-common-1.0.0-SNAPSHOT.jar:1.0.0-SNAPSHOT]
> 2023-08-25T05:00:14.1421149Z at
> org.apache.hudi.common.table.log.AbstractHoodieLogRecordReader.processDataBlock(AbstractHoodieLogRecordReader.java:625)
> ~[hudi-common-1.0.0-SNAPSHOT.jar:1.0.0-SNAPSHOT]
> 2023-08-25T05:00:14.1422075Z at
> org.apache.hudi.common.table.log.AbstractHoodieLogRecordReader.processQueuedBlocksForInstant(AbstractHoodieLogRecordReader.java:667)
> ~[hudi-common-1.0.0-SNAPSHOT.jar:1.0.0-SNAPSHOT]
> 2023-08-25T05:00:14.1423012Z at
> org.apache.hudi.common.table.log.AbstractHoodieLogRecordReader.scanInternalV1(AbstractHoodieLogRecordReader.java:362)
> ~[hudi-common-1.0.0-SNAPSHOT.jar:1.0.0-SNAPSHOT]
> 2023-08-25T05:00:14.1423879Z at
> org.apache.hudi.common.table.log.AbstractHoodieLogRecordReader.scanInternal(AbstractHoodieLogRecordReader.java:220)
> ~[hudi-common-1.0.0-SNAPSHOT.jar:1.0.0-SNAPSHOT]
> 2023-08-25T05:00:14.1425472Z at
> org.apache.hudi.common.table.log.HoodieMergedLogRecordScanner.performScan(HoodieMergedLogRecordScanner.java:201)
> ~[hudi-common-1.0.0-SNAPSHOT.jar:1.0.0-SNAPSHOT]
> 2023-08-25T05:00:14.1426992Z at
> org.apache.hudi.common.table.log.HoodieMergedLogRecordScanner.<init>(HoodieMergedLogRecordScanner.java:117)
> ~[hudi-common-1.0.0-SNAPSHOT.jar:1.0.0-SNAPSHOT]
> 2023-08-25T05:00:14.1427800Z at
> org.apache.hudi.common.table.log.HoodieMergedLogRecordScanner.<init>(HoodieMergedLogRecordScanner.java:76)
> ~[hudi-common-1.0.0-SNAPSHOT.jar:1.0.0-SNAPSHOT]
> 2023-08-25T05:00:14.1428569Z at
> org.apache.hudi.common.table.log.HoodieMergedLogRecordScanner$Builder.build(HoodieMergedLogRecordScanner.java:466)
> ~[hudi-common-1.0.0-SNAPSHOT.jar:1.0.0-SNAPSHOT]
> 2023-08-25T05:00:14.1429241Z at
> org.apache.hudi.LogFileIterator$.scanLog(Iterators.scala:371)
> ~[hudi-spark-common_2.12-1.0.0-SNAPSHOT.jar:1.0.0-SNAPSHOT]
> 2023-08-25T05:00:14.1429848Z at
> org.apache.hudi.LogFileIterator.<init>(Iterators.scala:110)
> ~[hudi-spark-common_2.12-1.0.0-SNAPSHOT.jar:1.0.0-SNAPSHOT]
> 2023-08-25T05:00:14.1430475Z at
> org.apache.hudi.RecordMergingFileIterator.<init>(Iterators.scala:201)
> ~[hudi-spark-common_2.12-1.0.0-SNAPSHOT.jar:1.0.0-SNAPSHOT]
> 2023-08-25T05:00:14.1431098Z at
> org.apache.hudi.RecordMergingFileIterator.<init>(Iterators.scala:212)
> ~[hudi-spark-common_2.12-1.0.0-SNAPSHOT.jar:1.0.0-SNAPSHOT]
> 2023-08-25T05:00:14.1431731Z at
> org.apache.hudi.RecordMergingFileIterator.<init>(Iterators.scala:217)
> ~[hudi-spark-common_2.12-1.0.0-SNAPSHOT.jar:1.0.0-SNAPSHOT]
> 2023-08-25T05:00:14.1432392Z at
> org.apache.hudi.HoodieMergeOnReadRDD.compute(HoodieMergeOnReadRDD.scala:109)
> ~[hudi-spark-common_2.12-1.0.0-SNAPSHOT.jar:1.0.0-SNAPSHOT]
> 2023-08-25T05:00:14.1432960Z at
> org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:373)
> ~[spark-core_2.12-3.2.3.jar:3.2.3]
> 2023-08-25T05:00:14.1433459Z at
> org.apache.spark.rdd.RDD.iterator(RDD.scala:337)
> ~[spark-core_2.12-3.2.3.jar:3.2.3]
> 2023-08-25T05:00:14.1433994Z at
> org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
> ~[spark-core_2.12-3.2.3.jar:3.2.3]
> 2023-08-25T05:00:14.1434520Z at
> org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:373)
> ~[spark-core_2.12-3.2.3.jar:3.2.3]
> 2023-08-25T05:00:14.1435014Z at
> org.apache.spark.rdd.RDD.iterator(RDD.scala:337)
> ~[spark-core_2.12-3.2.3.jar:3.2.3]
> 2023-08-25T05:00:14.1435538Z at
> org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
> ~[spark-core_2.12-3.2.3.jar:3.2.3]
> 2023-08-25T05:00:14.1436053Z at
> org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:373)
> ~[spark-core_2.12-3.2.3.jar:3.2.3]
> 2023-08-25T05:00:14.1436846Z at
> org.apache.spark.rdd.RDD.iterator(RDD.scala:337)
> ~[spark-core_2.12-3.2.3.jar:3.2.3]
> 2023-08-25T05:00:14.1437415Z at
> org.apache.spark.shuffle.ShuffleWriteProcessor.write(ShuffleWriteProcessor.scala:59)
> ~[spark-core_2.12-3.2.3.jar:3.2.3]
> 2023-08-25T05:00:14.1438006Z at
> org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:99)
> ~[spark-core_2.12-3.2.3.jar:3.2.3]
> 2023-08-25T05:00:14.1438563Z at
> org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:52)
> ~[spark-core_2.12-3.2.3.jar:3.2.3]
> 2023-08-25T05:00:14.1439081Z at
> org.apache.spark.scheduler.Task.run(Task.scala:131)
> ~[spark-core_2.12-3.2.3.jar:3.2.3]
> 2023-08-25T05:00:14.1439643Z at
> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> ~[spark-core_2.12-3.2.3.jar:3.2.3]
> 2023-08-25T05:00:14.1440181Z at
> org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1491)
> ~[spark-core_2.12-3.2.3.jar:3.2.3]
> 2023-08-25T05:00:14.1440781Z at
> org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> ~[spark-core_2.12-3.2.3.jar:3.2.3]
> 2023-08-25T05:00:14.1441180Z at
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
> ~[?:1.8.0_382]
> 2023-08-25T05:00:14.1441542Z at
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
> ~[?:1.8.0_382]
> 2023-08-25T05:00:14.1441862Z at java.lang.Thread.run(Thread.java:750)
> ~[?:1.8.0_382]
> 2023-08-25T05:00:14.1442808Z Caused by:
> org.apache.parquet.io.ParquetDecodingException: Can not read value at 0 in
> block -1 in file
> inlinefs://tmp/junit8419288775999068556/continuous_mor/2015/03/17/.76b836b0-2aae-4518-9987-c879a3a9422b-0_20230825050006621.log.1_2-4089-5573/file/?start_offset=1788&length=54731
> 2023-08-25T05:00:14.1443600Z at
> org.apache.parquet.hadoop.InternalParquetRecordReader.nextKeyValue(InternalParquetRecordReader.java:254)
> ~[parquet-hadoop-1.12.2.jar:1.12.2]
> 2023-08-25T05:00:14.1444188Z at
> org.apache.parquet.hadoop.ParquetReader.read(ParquetReader.java:132)
> ~[parquet-hadoop-1.12.2.jar:1.12.2]
> 2023-08-25T05:00:14.1444747Z at
> org.apache.parquet.hadoop.ParquetReader.read(ParquetReader.java:136)
> ~[parquet-hadoop-1.12.2.jar:1.12.2]
> 2023-08-25T05:00:14.1445384Z at
> org.apache.hudi.common.util.ParquetReaderIterator.hasNext(ParquetReaderIterator.java:49)
> ~[hudi-common-1.0.0-SNAPSHOT.jar:1.0.0-SNAPSHOT]
> 2023-08-25T05:00:14.1445700Z ... 34 more
> 2023-08-25T05:00:14.1446104Z Caused by:
> org.apache.parquet.io.ParquetDecodingException: The requested schema is not
> compatible with the file schema. incompatible types: required
> fixed_len_byte_array(5) height (DECIMAL(10,6)) != required int64 height
> (DECIMAL(10,6)){code}
--
This message was sent by Atlassian Jira
(v8.20.10#820010)