[ https://issues.apache.org/jira/browse/SPARK-51512?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Kent Yao resolved SPARK-51512. ------------------------------ Fix Version/s: 4.0.0 Resolution: Fixed Issue resolved by pull request 50277 [https://github.com/apache/spark/pull/50277] > Filter out null MapStatus when cleaning up shuffle data with > ExternalShuffleService > ----------------------------------------------------------------------------------- > > Key: SPARK-51512 > URL: https://issues.apache.org/jira/browse/SPARK-51512 > Project: Spark > Issue Type: Task > Components: Spark Core > Affects Versions: 4.0.0 > Reporter: Wan Kun > Assignee: Wan Kun > Priority: Minor > Labels: pull-request-available > Fix For: 4.0.0 > > > When the application crashes unexpectedly, the registered map statuses may > contain null values, therefore we should skip these null values when cleaning > up shuffle data with ExternalShuffleService. > {code:java} > 25/03/14 15:41:35 ERROR ContextCleaner: Error cleaning shuffle 4 > org.apache.spark.SparkException: Exception thrown in awaitResult: > at > org.apache.spark.util.SparkThreadUtils$.awaitResult(SparkThreadUtils.scala:53) > at org.apache.spark.util.ThreadUtils$.awaitResult(ThreadUtils.scala:342) > at org.apache.spark.rpc.RpcTimeout.awaitResult(RpcTimeout.scala:75) > at org.apache.spark.rpc.RpcEndpointRef.askSync(RpcEndpointRef.scala:101) > at org.apache.spark.rpc.RpcEndpointRef.askSync(RpcEndpointRef.scala:85) > at > org.apache.spark.storage.BlockManagerMaster.removeShuffle(BlockManagerMaster.scala:204) > at > org.apache.spark.shuffle.sort.io.LocalDiskShuffleDriverComponents.removeShuffle(LocalDiskShuffleDriverComponents.java:47) > at > org.apache.spark.ContextCleaner.doCleanupShuffle(ContextCleaner.scala:241) > at > org.apache.spark.ContextCleaner.$anonfun$keepCleaning$3(ContextCleaner.scala:203) > at > org.apache.spark.ContextCleaner.$anonfun$keepCleaning$3$adapted(ContextCleaner.scala:196) > at scala.Option.foreach(Option.scala:437) > at > org.apache.spark.ContextCleaner.$anonfun$keepCleaning$1(ContextCleaner.scala:196) > at org.apache.spark.util.Utils$.tryOrStopSparkContext(Utils.scala:1383) > at > org.apache.spark.ContextCleaner.org$apache$spark$ContextCleaner$$keepCleaning(ContextCleaner.scala:190) > at org.apache.spark.ContextCleaner$$anon$1.run(ContextCleaner.scala:80) > Caused by: java.lang.NullPointerException: Cannot invoke > "org.apache.spark.scheduler.MapStatus.location()" because "mapStatus" is null > at > org.apache.spark.storage.BlockManagerMasterEndpoint.$anonfun$removeShuffle$3(BlockManagerMasterEndpoint.scala:423) > at scala.collection.ArrayOps$.foreach$extension(ArrayOps.scala:1323) > at > org.apache.spark.storage.BlockManagerMasterEndpoint.$anonfun$removeShuffle$2(BlockManagerMasterEndpoint.scala:421) > at > org.apache.spark.storage.BlockManagerMasterEndpoint.$anonfun$removeShuffle$2$adapted(BlockManagerMasterEndpoint.scala:420) > at > org.apache.spark.ShuffleStatus.$anonfun$withMapStatuses$1(MapOutputTracker.scala:435) > at > org.apache.spark.ShuffleStatus.withReadLock(MapOutputTracker.scala:72) > at > org.apache.spark.ShuffleStatus.withMapStatuses(MapOutputTracker.scala:435) > at > org.apache.spark.storage.BlockManagerMasterEndpoint.$anonfun$removeShuffle$1(BlockManagerMasterEndpoint.scala:420) > at > org.apache.spark.storage.BlockManagerMasterEndpoint.$anonfun$removeShuffle$1$adapted(BlockManagerMasterEndpoint.scala:419) > at scala.Option.foreach(Option.scala:437) > at > org.apache.spark.storage.BlockManagerMasterEndpoint.org$apache$spark$storage$BlockManagerMasterEndpoint$$removeShuffle(BlockManagerMasterEndpoint.scala:419) > at > org.apache.spark.storage.BlockManagerMasterEndpoint$$anonfun$receiveAndReply$1.applyOrElse(BlockManagerMasterEndpoint.scala:201) > at org.apache.spark.rpc.netty.Inbox.$anonfun$process$1(Inbox.scala:104) > at org.apache.spark.rpc.netty.Inbox.safelyCall(Inbox.scala:216) > at org.apache.spark.rpc.netty.Inbox.process(Inbox.scala:101) > at > org.apache.spark.rpc.netty.MessageLoop.org$apache$spark$rpc$netty$MessageLoop$$receiveLoop(MessageLoop.scala:76) > at > org.apache.spark.rpc.netty.MessageLoop$$anon$1.run(MessageLoop.scala:42) > at > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1144) > at > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:642) > at java.base/java.lang.Thread.run(Thread.java:1583) > {code} -- This message was sent by Atlassian Jira (v8.20.10#820010) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org