[ 
https://issues.apache.org/jira/browse/HUDI-3684?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Alexey Kudinkin updated HUDI-3684:
----------------------------------
    Epic Link: HUDI-1822

> NPE in ParquetUtils
> -------------------
>
>                 Key: HUDI-3684
>                 URL: https://issues.apache.org/jira/browse/HUDI-3684
>             Project: Apache Hudi
>          Issue Type: Task
>            Reporter: Alexey Kudinkin
>            Assignee: Alexey Kudinkin
>            Priority: Blocker
>             Fix For: 0.11.0
>
>
> ParquetUtils is failing w/ NPE whenever returned min/max statistic from 
> Parquet is null
> {code:java}
> Caused by: java.lang.NullPointerException
>   at 
> org.apache.hudi.common.util.ParquetUtils.convertToNativeJavaType(ParquetUtils.java:390)
>   at 
> org.apache.hudi.common.util.ParquetUtils.lambda$null$2(ParquetUtils.java:305)
>   at java.util.stream.ReferencePipeline$3$1.accept(ReferencePipeline.java:193)
>   at java.util.stream.ReferencePipeline$2$1.accept(ReferencePipeline.java:175)
>   at 
> java.util.ArrayList$ArrayListSpliterator.forEachRemaining(ArrayList.java:1384)
>   at java.util.stream.AbstractPipeline.copyInto(AbstractPipeline.java:482)
>   at 
> java.util.stream.AbstractPipeline.wrapAndCopyInto(AbstractPipeline.java:472)
>   at 
> java.util.stream.ForEachOps$ForEachOp.evaluateSequential(ForEachOps.java:150)
>   at 
> java.util.stream.ForEachOps$ForEachOp$OfRef.evaluateSequential(ForEachOps.java:173)
>   at java.util.stream.AbstractPipeline.evaluate(AbstractPipeline.java:234)
>   at java.util.stream.ReferencePipeline.forEach(ReferencePipeline.java:485)
>   at java.util.stream.ReferencePipeline$7$1.accept(ReferencePipeline.java:272)
>   at 
> java.util.ArrayList$ArrayListSpliterator.forEachRemaining(ArrayList.java:1384)
>   at java.util.stream.AbstractPipeline.copyInto(AbstractPipeline.java:482)
>   at 
> java.util.stream.AbstractPipeline.wrapAndCopyInto(AbstractPipeline.java:472)
>   at 
> java.util.stream.ReduceOps$ReduceOp.evaluateSequential(ReduceOps.java:708)
>   at java.util.stream.AbstractPipeline.evaluate(AbstractPipeline.java:234)
>   at java.util.stream.ReferencePipeline.collect(ReferencePipeline.java:566)
>   at 
> org.apache.hudi.common.util.ParquetUtils.readRangeFromParquetMetadata(ParquetUtils.java:315)
>   at 
> org.apache.hudi.metadata.HoodieTableMetadataUtil.getColumnStats(HoodieTableMetadataUtil.java:958)
>   at 
> org.apache.hudi.metadata.HoodieTableMetadataUtil.translateWriteStatToColumnStats(HoodieTableMetadataUtil.java:942)
>   at 
> org.apache.hudi.metadata.HoodieTableMetadataUtil.lambda$convertMetadataToColumnStatsRecords$6fd51462$1(HoodieTableMetadataUtil.java:895)
>   at 
> org.apache.hudi.data.HoodieJavaRDD.lambda$flatMap$a6598fcb$1(HoodieJavaRDD.java:117)
>   at 
> org.apache.spark.api.java.JavaRDDLike.$anonfun$flatMap$1(JavaRDDLike.scala:125)
>   at scala.collection.Iterator$$anon$11.nextCur(Iterator.scala:486)
>   at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:492)
>   at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460)
>   at 
> org.apache.spark.storage.memory.MemoryStore.putIterator(MemoryStore.scala:223)
>   at 
> org.apache.spark.storage.memory.MemoryStore.putIteratorAsBytes(MemoryStore.scala:352)
>   at 
> org.apache.spark.storage.BlockManager.$anonfun$doPutIterator$1(BlockManager.scala:1498)
>   at 
> org.apache.spark.storage.BlockManager.org$apache$spark$storage$BlockManager$$doPut(BlockManager.scala:1408)
>   at 
> org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1472)
>   at 
> org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:1295)
>   at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:384)
>   at org.apache.spark.rdd.RDD.iterator(RDD.scala:335)
>   at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
>   at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:373)
>   at org.apache.spark.rdd.RDD.iterator(RDD.scala:337)
>   at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
>   at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:373)
>   at org.apache.spark.rdd.RDD.iterator(RDD.scala:337)
>   at 
> org.apache.spark.shuffle.ShuffleWriteProcessor.write(ShuffleWriteProcessor.scala:59)
>   at 
> org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:99)
>   at 
> org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:52)
>   at org.apache.spark.scheduler.Task.run(Task.scala:131)
>   at 
> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
>   at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
>   at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
>   at 
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
>   at 
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
>   at java.lang.Thread.run(Thread.java:748) {code}



--
This message was sent by Atlassian Jira
(v8.20.1#820001)

Reply via email to