What kind of application are you running? Can you look in the worker logs and see what is going on?
Thanks Best Regards On Mon, May 11, 2015 at 11:26 AM, 董帅阳 <917361...@qq.com> wrote: > *when i run spark application longtime.* > > > > 15/05/11 12:50:47 INFO spark.SparkContext: Created broadcast 8 from > broadcast at DAGScheduler.scala:839 > 15/05/11 12:50:47 INFO storage.BlockManagerInfo: Removed > input-0-1431345102800 on dn2.vi:45688 in memory (size: 27.6 KB, free: > 524.2 MB) > 15/05/11 12:50:47 ERROR LocalActorRefProvider(akka://sparkDriver): > guardian failed, shutting down system > java.lang.AssertionError: assertion failed > at scala.Predef$.assert(Predef.scala:165) > at > org.apache.spark.streaming.scheduler.ReceivedBlockTracker.cleanupOldBatches(ReceivedBlockTracker.scala:153) > at > org.apache.spark.streaming.scheduler.ReceiverTracker.cleanupOldBlocksAndBatches(ReceiverTracker.scala:127) > at > org.apache.spark.streaming.scheduler.JobGenerator.clearMetadata(JobGenerator.scala:264) > at org.apache.spark.streaming.scheduler.JobGenerator.org > $apache$spark$streaming$scheduler$JobGenerator$$processEvent(JobGenerator.scala:175) > at > org.apache.spark.streaming.scheduler.JobGenerator$$anonfun$start$1$$anon$1$$anonfun$receive$1.applyOrElse(JobGenerator.scala:85) > at akka.actor.Actor$class.aroundReceive(Actor.scala:465) > at > org.apache.spark.streaming.scheduler.JobGenerator$$anonfun$start$1$$anon$1.aroundReceive(JobGenerator.scala:83) > at akka.actor.ActorCell.receiveMessage(ActorCell.scala:516) > at akka.actor.ActorCell.invoke(ActorCell.scala:487) > at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:238) > at akka.dispatch.Mailbox.run(Mailbox.scala:220) > at > akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinTask.exec(AbstractDispatcher.scala:393) > at > scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260) > at > scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339) > at > scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979) > at > scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107) > 15/05/11 12:50:47 ERROR actor.OneForOneStrategy: assertion failed > java.lang.AssertionError: assertion failed > at scala.Predef$.assert(Predef.scala:165) > at > org.apache.spark.streaming.scheduler.ReceivedBlockTracker.cleanupOldBatches(ReceivedBlockTracker.scala:153) > at > org.apache.spark.streaming.scheduler.ReceiverTracker.cleanupOldBlocksAndBatches(ReceiverTracker.scala:127) > at > org.apache.spark.streaming.scheduler.JobGenerator.clearMetadata(JobGenerator.scala:264) > at org.apache.spark.streaming.scheduler.JobGenerator.org > $apache$spark$streaming$scheduler$JobGenerator$$processEvent(JobGenerator.scala:175) > at > org.apache.spark.streaming.scheduler.JobGenerator$$anonfun$start$1$$anon$1$$anonfun$receive$1.applyOrElse(JobGenerator.scala:85) > at akka.actor.Actor$class.aroundReceive(Actor.scala:465) > at > org.apache.spark.streaming.scheduler.JobGenerator$$anonfun$start$1$$anon$1.aroundReceive(JobGenerator.scala:83) > at akka.actor.ActorCell.receiveMessage(ActorCell.scala:516) > at akka.actor.ActorCell.invoke(ActorCell.scala:487) > at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:238) > at akka.dispatch.Mailbox.run(Mailbox.scala:220) > at > akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinTask.exec(AbstractDispatcher.scala:393) > at > scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260) > at > scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339) > at > scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979) > at > scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107) > 15/05/11 12:50:47 WARN util.AkkaUtils: Error sending message [message = > GetLocationsMultipleBlockIds([Lorg.apache.spark.storage.BlockId;@7e6a12b8)] > in 1 attempts > akka.pattern.AskTimeoutException: > Recipient[Actor[akka://sparkDriver/user/BlockManagerMaster#-1781979320]] > had already been terminated. > at > akka.pattern.AskableActorRef$.ask$extension(AskSupport.scala:132) > at > org.apache.spark.util.AkkaUtils$.askWithReply(AkkaUtils.scala:194) > at > org.apache.spark.storage.BlockManagerMaster.askDriverWithReply(BlockManagerMaster.scala:221) > at > org.apache.spark.storage.BlockManagerMaster.getLocations(BlockManagerMaster.scala:75) > at org.apache.spark.storage.BlockManager.org > $apache$spark$storage$BlockManager$$getLocationBlockIds(BlockManager.scala:421) > at > org.apache.spark.storage.BlockManager$.blockIdsToBlockManagers(BlockManager.scala:1256) > at > org.apache.spark.storage.BlockManager$.blockIdsToHosts(BlockManager.scala:1279) > at > org.apache.spark.rdd.BlockRDD.locations_$lzycompute(BlockRDD.scala:34) > at org.apache.spark.rdd.BlockRDD.locations_(BlockRDD.scala:34) > at > org.apache.spark.rdd.BlockRDD.getPreferredLocations(BlockRDD.scala:57) > at > org.apache.spark.rdd.RDD$$anonfun$preferredLocations$2.apply(RDD.scala:231) > at > org.apache.spark.rdd.RDD$$anonfun$preferredLocations$2.apply(RDD.scala:231) > at scala.Option.getOrElse(Option.scala:120) > at org.apache.spark.rdd.RDD.preferredLocations(RDD.scala:230) > at > org.apache.spark.rdd.UnionPartition.preferredLocations(UnionRDD.scala:47) > at > org.apache.spark.rdd.UnionRDD.getPreferredLocations(UnionRDD.scala:91) > at > org.apache.spark.rdd.RDD$$anonfun$preferredLocations$2.apply(RDD.scala:231) > at > org.apache.spark.rdd.RDD$$anonfun$preferredLocations$2.apply(RDD.scala:231) > at scala.Option.getOrElse(Option.scala:120) > at org.apache.spark.rdd.RDD.preferredLocations(RDD.scala:230) > at org.apache.spark.scheduler.DAGScheduler.org > $apache$spark$scheduler$DAGScheduler$$getPreferredLocsInternal(DAGScheduler.scala:1324) > at > org.apache.spark.scheduler.DAGScheduler$$anonfun$org$apache$spark$scheduler$DAGScheduler$$getPreferredLocsInternal$2$$anonfun$apply$2.apply$mcVI$sp(DAGScheduler.scala:1334) > at > org.apache.spark.scheduler.DAGScheduler$$anonfun$org$apache$spark$scheduler$DAGScheduler$$getPreferredLocsInternal$2$$anonfun$apply$2.apply(DAGScheduler.scala:1333) > at > org.apache.spark.scheduler.DAGScheduler$$anonfun$org$apache$spark$scheduler$DAGScheduler$$getPreferredLocsInternal$2$$anonfun$apply$2.apply(DAGScheduler.scala:1333) > at scala.collection.immutable.List.foreach(List.scala:318) > at > org.apache.spark.scheduler.DAGScheduler$$anonfun$org$apache$spark$scheduler$DAGScheduler$$getPreferredLocsInternal$2.apply(DAGScheduler.scala:1333) > at > org.apache.spark.scheduler.DAGScheduler$$anonfun$org$apache$spark$scheduler$DAGScheduler$$getPreferredLocsInternal$2.apply(DAGScheduler.scala:1331) > at scala.collection.immutable.List.foreach(List.scala:318) > at org.apache.spark.scheduler.DAGScheduler.org > $apache$spark$scheduler$DAGScheduler$$getPreferredLocsInternal(DAGScheduler.scala:1331) > at > org.apache.spark.scheduler.DAGScheduler.getPreferredLocs(DAGScheduler.scala:1296) > at > org.apache.spark.scheduler.DAGScheduler$$anonfun$14.apply(DAGScheduler.scala:863) > at > org.apache.spark.scheduler.DAGScheduler$$anonfun$14.apply(DAGScheduler.scala:860) > at > scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:244) > at > scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:244) > at scala.collection.Iterator$class.foreach(Iterator.scala:727) > at scala.collection.AbstractIterator.foreach(Iterator.scala:1157) > at > scala.collection.IterableLike$class.foreach(IterableLike.scala:72) > at scala.collection.AbstractIterable.foreach(Iterable.scala:54) > at > scala.collection.TraversableLike$class.map(TraversableLike.scala:244) > at scala.collection.AbstractTraversable.map(Traversable.scala:105) > at org.apache.spark.scheduler.DAGScheduler.org > $apache$spark$scheduler$DAGScheduler$$submitMissingTasks(DAGScheduler.scala:860) > at org.apache.spark.scheduler.DAGScheduler.org > $apache$spark$scheduler$DAGScheduler$$submitStage(DAGScheduler.scala:778) > at > org.apache.spark.scheduler.DAGScheduler.handleJobSubmitted(DAGScheduler.scala:762) > at > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1362) > at > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1354) > at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48) > 15/05/11 12:50:47 INFO remote.RemoteActorRefProvider$RemotingTerminator: > Shutting down remote daemon. > 15/05/11 12:50:47 INFO remote.RemoteActorRefProvider$RemotingTerminator: > Remote daemon shut down; proceeding with flushing remote transports. > 15/05/11 12:50:47 INFO remote.RemoteActorRefProvider$RemotingTerminator: > Remoting shut down. > 15/05/11 12:50:47 ERROR cluster.YarnClientSchedulerBackend: Yarn > application has already exited with state FINISHED! > 15/05/11 12:50:47 INFO handler.ContextHandler: stopped > o.s.j.s.ServletContextHandler{/streaming/json,null} > 15/05/11 12:50:47 INFO handler.ContextHandler: stopped > o.s.j.s.ServletContextHandler{/streaming,null} > 15/05/11 12:50:47 INFO handler.ContextHandler: stopped > o.s.j.s.ServletContextHandler{/metrics/json,null} > 15/05/11 12:50:47 INFO handler.ContextHandler: stopped > o.s.j.s.ServletContextHandler{/stages/stage/kill,null} > 15/05/11 12:50:47 INFO handler.ContextHandler: stopped > o.s.j.s.ServletContextHandler{/,null} > 15/05/11 12:50:47 INFO handler.ContextHandler: stopped > o.s.j.s.ServletContextHandler{/static,null} > 15/05/11 12:50:47 INFO handler.ContextHandler: stopped > o.s.j.s.ServletContextHandler{/executors/threadDump/json,null} > 15/05/11 12:50:47 INFO handler.ContextHandler: stopped > o.s.j.s.ServletContextHandler{/executors/threadDump,null} > 15/05/11 12:50:47 INFO handler.ContextHandler: stopped > o.s.j.s.ServletContextHandler{/executors/json,null} > 15/05/11 12:50:47 INFO handler.ContextHandler: stopped > o.s.j.s.ServletContextHandler{/executors,null} > 15/05/11 12:50:47 INFO handler.ContextHandler: stopped > o.s.j.s.ServletContextHandler{/environment/json,null} > 15/05/11 12:50:47 INFO handler.ContextHandler: stopped > o.s.j.s.ServletContextHandler{/environment,null} > 15/05/11 12:50:47 INFO handler.ContextHandler: stopped > o.s.j.s.ServletContextHandler{/storage/rdd/json,null} > 15/05/11 12:50:47 INFO handler.ContextHandler: stopped > o.s.j.s.ServletContextHandler{/storage/rdd,null} > 15/05/11 12:50:47 INFO handler.ContextHandler: stopped > o.s.j.s.ServletContextHandler{/storage/json,null} > 15/05/11 12:50:47 INFO handler.ContextHandler: stopped > o.s.j.s.ServletContextHandler{/storage,null} > 15/05/11 12:50:47 INFO handler.ContextHandler: stopped > o.s.j.s.ServletContextHandler{/stages/pool/json,null} > 15/05/11 12:50:47 INFO handler.ContextHandler: stopped > o.s.j.s.ServletContextHandler{/stages/pool,null} > 15/05/11 12:50:47 INFO handler.ContextHandler: stopped > o.s.j.s.ServletContextHandler{/stages/stage/json,null} > 15/05/11 12:50:47 INFO handler.ContextHandler: stopped > o.s.j.s.ServletContextHandler{/stages/stage,null} > 15/05/11 12:50:47 INFO handler.ContextHandler: stopped > o.s.j.s.ServletContextHandler{/stages/json,null} > 15/05/11 12:50:47 INFO handler.ContextHandler: stopped > o.s.j.s.ServletContextHandler{/stages,null} > 15/05/11 12:50:47 INFO handler.ContextHandler: stopped > o.s.j.s.ServletContextHandler{/jobs/job/json,null} > 15/05/11 12:50:47 INFO handler.ContextHandler: stopped > o.s.j.s.ServletContextHandler{/jobs/job,null} > 15/05/11 12:50:47 INFO handler.ContextHandler: stopped > o.s.j.s.ServletContextHandler{/jobs/json,null} > 15/05/11 12:50:47 INFO handler.ContextHandler: stopped > o.s.j.s.ServletContextHandler{/jobs,null} > 15/05/11 12:50:47 INFO ui.SparkUI: Stopped Spark web UI at > http://master.vi:4041 > 15/05/11 12:50:47 INFO scheduler.DAGScheduler: Stopping DAGScheduler > 15/05/11 12:50:47 INFO scheduler.DAGScheduler: Job 3 failed: start at > SocketStreamingTest.scala:110, took 387.065955 s > Exception in thread "Thread-40" org.apache.spark.SparkException: Job > cancelled because SparkContext was shut down > at > org.apache.spark.scheduler.DAGScheduler$$anonfun$cleanUpAfterSchedulerStop$1.apply(DAGScheduler.scala:699) > at > org.apache.spark.scheduler.DAGScheduler$$anonfun$cleanUpAfterSchedulerStop$1.apply(DAGScheduler.scala:698) > at scala.collection.mutable.HashSet.foreach(HashSet.scala:79) > at > org.apache.spark.scheduler.DAGScheduler.cleanUpAfterSchedulerStop(DAGScheduler.scala:698) > at > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onStop(DAGScheduler.scala:1411) > at org.apache.spark.util.EventLoop.stop(EventLoop.scala:81) > at > org.apache.spark.scheduler.DAGScheduler.stop(DAGScheduler.scala:1346) > at org.apache.spark.SparkContext.stop(SparkContext.scala:1380) > at > org.apache.spark.scheduler.cluster.YarnClientSchedulerBackend$$anon$1.run(YarnClientSchedulerBackend.scala:143) > 15/05/11 12:50:47 INFO scheduler.DAGScheduler: Job 53 failed: foreachRDD > at SocketStreamingTest.scala:107, took 0.553739 s > 15/05/11 12:50:47 INFO cluster.YarnClientSchedulerBackend: Shutting down > all executors > 15/05/11 12:50:47 INFO cluster.YarnClientSchedulerBackend: Stopped > 15/05/11 12:50:47 WARN util.AkkaUtils: Error sending message [message = > StopMapOutputTracker] in 1 attempts > akka.pattern.AskTimeoutException: > Recipient[Actor[akka://sparkDriver/user/MapOutputTracker#-890977520]] had > already been terminated. > at > akka.pattern.AskableActorRef$.ask$extension(AskSupport.scala:132) > at > org.apache.spark.util.AkkaUtils$.askWithReply(AkkaUtils.scala:194) > at > org.apache.spark.MapOutputTracker.askTracker(MapOutputTracker.scala:113) > at > org.apache.spark.MapOutputTracker.sendTracker(MapOutputTracker.scala:123) > at > org.apache.spark.MapOutputTrackerMaster.stop(MapOutputTracker.scala:329) > at org.apache.spark.SparkEnv.stop(SparkEnv.scala:85) > at org.apache.spark.SparkContext.stop(SparkContext.scala:1385) > at > org.apache.spark.scheduler.cluster.YarnClientSchedulerBackend$$anon$1.run(YarnClientSchedulerBackend.scala:143) > 15/05/11 12:50:50 WARN util.AkkaUtils: Error sending message [message = > StopMapOutputTracker] in 2 attempts > akka.pattern.AskTimeoutException: > Recipient[Actor[akka://sparkDriver/user/MapOutputTracker#-890977520]] had > already been terminated. > at > akka.pattern.AskableActorRef$.ask$extension(AskSupport.scala:132) > at > org.apache.spark.util.AkkaUtils$.askWithReply(AkkaUtils.scala:194) > at > org.apache.spark.MapOutputTracker.askTracker(MapOutputTracker.scala:113) > at > org.apache.spark.MapOutputTracker.sendTracker(MapOutputTracker.scala:123) > at > org.apache.spark.MapOutputTrackerMaster.stop(MapOutputTracker.scala:329) > at org.apache.spark.SparkEnv.stop(SparkEnv.scala:85) > at org.apache.spark.SparkContext.stop(SparkContext.scala:1385) > at > org.apache.spark.scheduler.cluster.YarnClientSchedulerBackend$$anon$1.run(YarnClientSchedulerBackend.scala:143) > 15/05/11 12:50:53 WARN util.AkkaUtils: Error sending message [message = > StopMapOutputTracker] in 3 attempts > akka.pattern.AskTimeoutException: > Recipient[Actor[akka://sparkDriver/user/MapOutputTracker#-890977520]] had > already been terminated. > at > akka.pattern.AskableActorRef$.ask$extension(AskSupport.scala:132) > at > org.apache.spark.util.AkkaUtils$.askWithReply(AkkaUtils.scala:194) > at > org.apache.spark.MapOutputTracker.askTracker(MapOutputTracker.scala:113) > at > org.apache.spark.MapOutputTracker.sendTracker(MapOutputTracker.scala:123) > at > org.apache.spark.MapOutputTrackerMaster.stop(MapOutputTracker.scala:329) > at org.apache.spark.SparkEnv.stop(SparkEnv.scala:85) > at org.apache.spark.SparkContext.stop(SparkContext.scala:1385) > at > org.apache.spark.scheduler.cluster.YarnClientSchedulerBackend$$anon$1.run(YarnClientSchedulerBackend.scala:143) > 15/05/11 12:50:56 ERROR spark.MapOutputTrackerMaster: Error communicating > with MapOutputTracker > org.apache.spark.SparkException: Error sending message [message = > StopMapOutputTracker] > at > org.apache.spark.util.AkkaUtils$.askWithReply(AkkaUtils.scala:209) > at > org.apache.spark.MapOutputTracker.askTracker(MapOutputTracker.scala:113) > at > org.apache.spark.MapOutputTracker.sendTracker(MapOutputTracker.scala:123) > at > org.apache.spark.MapOutputTrackerMaster.stop(MapOutputTracker.scala:329) > at org.apache.spark.SparkEnv.stop(SparkEnv.scala:85) > at org.apache.spark.SparkContext.stop(SparkContext.scala:1385) > at > org.apache.spark.scheduler.cluster.YarnClientSchedulerBackend$$anon$1.run(YarnClientSchedulerBackend.scala:143) > Caused by: akka.pattern.AskTimeoutException: > Recipient[Actor[akka://sparkDriver/user/MapOutputTracker#-890977520]] had > already been terminated. > at > akka.pattern.AskableActorRef$.ask$extension(AskSupport.scala:132) > at > org.apache.spark.util.AkkaUtils$.askWithReply(AkkaUtils.scala:194) > ... 6 more > Exception in thread "Yarn application state monitor" > org.apache.spark.SparkException: Error communicating with MapOutputTracker > at > org.apache.spark.MapOutputTracker.askTracker(MapOutputTracker.scala:117) > at > org.apache.spark.MapOutputTracker.sendTracker(MapOutputTracker.scala:123) > at > org.apache.spark.MapOutputTrackerMaster.stop(MapOutputTracker.scala:329) > at org.apache.spark.SparkEnv.stop(SparkEnv.scala:85) > at org.apache.spark.SparkContext.stop(SparkContext.scala:1385) > at > org.apache.spark.scheduler.cluster.YarnClientSchedulerBackend$$anon$1.run(YarnClientSchedulerBackend.scala:143) > Caused by: org.apache.spark.SparkException: Error sending message [message > = StopMapOutputTracker] > at > org.apache.spark.util.AkkaUtils$.askWithReply(AkkaUtils.scala:209) > at > org.apache.spark.MapOutputTracker.askTracker(MapOutputTracker.scala:113) > ... 5 more > Caused by: akka.pattern.AskTimeoutException: > Recipient[Actor[akka://sparkDriver/user/MapOutputTracker#-890977520]] had > already been terminated. > at > akka.pattern.AskableActorRef$.ask$extension(AskSupport.scala:132) > at > org.apache.spark.util.AkkaUtils$.askWithReply(AkkaUtils.scala:194) > ... 6 more > >