Is there anyone at share me some lights about this issue?

Thanks
Martin

2017-07-21 18:58 GMT-07:00 Martin Peng <wei...@gmail.com>:

> Hi,
>
> I have several Spark jobs including both batch job and Stream jobs to
> process the system log and analyze them. We are using Kafka as the pipeline
> to connect each jobs.
>
> Once upgrade to Spark 2.1.0 + Spark Kafka Streaming 010, I found some of
> the jobs(both batch or streaming) are thrown below exceptions
> randomly(either after several hours run or just run in 20 mins). Can anyone
> give me some suggestions about how to figure out the real root cause?
> (Looks like google result is not very useful...)
>
> Thanks,
> Martin
>
> 00:30:04,510 WARN  - 17/07/22 00:30:04 WARN TaskSetManager: Lost task 60.0
> in stage 1518490.0 (TID 338070, 10.133.96.21, executor 0):
> java.io.FileNotFoundException: /mnt/mesos/work_dir/slaves/
> 20160924-021501-274760970-5050-7646-S2/frameworks/40aeb8e5-e82a-4df9-b034-
> 8815a7a7564b-2543/executors/0/runs/fd15c15d-2511-4f37-a106-
> 27431f583153/blockmgr-a0e0e673-f88b-4d12-a802-
> c35643e6c6b2/33/shuffle_2090_60_0.index.b66235be-79be-4455-9759-1c7ba70f91f6
> (No such file or directory)
> 00:30:04,510 WARN  -     at java.io.FileOutputStream.open0(Native Method)
> 00:30:04,510 WARN  -     at java.io.FileOutputStream.open(
> FileOutputStream.java:270)
> 00:30:04,510 WARN  -     at java.io.FileOutputStream.<
> init>(FileOutputStream.java:213)
> 00:30:04,510 WARN  -     at java.io.FileOutputStream.<
> init>(FileOutputStream.java:162)
> 00:30:04,510 WARN  -     at org.apache.spark.shuffle.
> IndexShuffleBlockResolver.writeIndexFileAndCommit(
> IndexShuffleBlockResolver.scala:144)
> 00:30:04,510 WARN  -     at org.apache.spark.shuffle.sort.
> BypassMergeSortShuffleWriter.write(BypassMergeSortShuffleWriter.java:128)
> 00:30:04,510 WARN  -     at org.apache.spark.scheduler.
> ShuffleMapTask.runTask(ShuffleMapTask.scala:96)
> 00:30:04,510 WARN  -     at org.apache.spark.scheduler.
> ShuffleMapTask.runTask(ShuffleMapTask.scala:53)
> 00:30:04,510 WARN  -     at org.apache.spark.scheduler.
> Task.run(Task.scala:99)
> 00:30:04,510 WARN  -     at org.apache.spark.executor.
> Executor$TaskRunner.run(Executor.scala:282)
> 00:30:04,510 WARN  -     at java.util.concurrent.
> ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
> 00:30:04,510 WARN  -     at java.util.concurrent.
> ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
> 00:30:04,510 WARN  -     at java.lang.Thread.run(Thread.java:748)
>
> 00:30:04,580 INFO  - Driver stacktrace:
> 00:30:04,580 INFO  - org.apache.spark.scheduler.DAGScheduler.org
> $apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(
> DAGScheduler.scala:1435)
> 00:30:04,580 INFO  - org.apache.spark.scheduler.DAGScheduler$$anonfun$
> abortStage$1.apply(DAGScheduler.scala:1423)
> 00:30:04,580 INFO  - org.apache.spark.scheduler.DAGScheduler$$anonfun$
> abortStage$1.apply(DAGScheduler.scala:1422)
> 00:30:04,580 INFO  - scala.collection.mutable.
> ResizableArray$class.foreach(ResizableArray.scala:59)
> 00:30:04,580 INFO  - scala.collection.mutable.ArrayBuffer.foreach(
> ArrayBuffer.scala:48)
> 00:30:04,580 INFO  - org.apache.spark.scheduler.DAGScheduler.abortStage(
> DAGScheduler.scala:1422)
> 00:30:04,580 INFO  - org.apache.spark.scheduler.DAGScheduler$$anonfun$
> handleTaskSetFailed$1.apply(DAGScheduler.scala:802)
> 00:30:04,580 INFO  - org.apache.spark.scheduler.DAGScheduler$$anonfun$
> handleTaskSetFailed$1.apply(DAGScheduler.scala:802)
> 00:30:04,580 INFO  - scala.Option.foreach(Option.scala:257)
> 00:30:04,580 INFO  - org.apache.spark.scheduler.DAGScheduler.
> handleTaskSetFailed(DAGScheduler.scala:802)
> 00:30:04,580 INFO  - org.apache.spark.scheduler.
> DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1650)
> 00:30:04,580 INFO  - org.apache.spark.scheduler.
> DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1605)
> 00:30:04,580 INFO  - org.apache.spark.scheduler.
> DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1594)
> 00:30:04,580 INFO  - org.apache.spark.util.EventLoop$$anon$1.run(
> EventLoop.scala:48)
> 00:30:04,580 INFO  - org.apache.spark.scheduler.DAGScheduler.runJob(
> DAGScheduler.scala:628)
> 00:30:04,580 INFO  - org.apache.spark.SparkContext.
> runJob(SparkContext.scala:1918)
> 00:30:04,580 INFO  - org.apache.spark.SparkContext.
> runJob(SparkContext.scala:1931)
> 00:30:04,580 INFO  - org.apache.spark.SparkContext.
> runJob(SparkContext.scala:1944)
> 00:30:04,580 INFO  - org.apache.spark.rdd.RDD$$anonfun$take$1.apply(RDD.
> scala:1353)
> 00:30:04,580 INFO  - org.apache.spark.rdd.RDDOperationScope$.withScope(
> RDDOperationScope.scala:151)
> 00:30:04,580 INFO  - org.apache.spark.rdd.RDDOperationScope$.withScope(
> RDDOperationScope.scala:112)
> 00:30:04,580 INFO  - org.apache.spark.rdd.RDD.withScope(RDD.scala:362)
> 00:30:04,580 INFO  - org.apache.spark.rdd.RDD.take(RDD.scala:1326)
> 00:30:04,580 INFO  - org.apache.spark.rdd.RDD$$
> anonfun$isEmpty$1.apply$mcZ$sp(RDD.scala:1461)
> 00:30:04,580 INFO  - org.apache.spark.rdd.RDD$$
> anonfun$isEmpty$1.apply(RDD.scala:1461)
> 00:30:04,580 INFO  - org.apache.spark.rdd.RDD$$
> anonfun$isEmpty$1.apply(RDD.scala:1461)
> 00:30:04,580 INFO  - org.apache.spark.rdd.RDDOperationScope$.withScope(
> RDDOperationScope.scala:151)
> 00:30:04,580 INFO  - org.apache.spark.rdd.RDDOperationScope$.withScope(
> RDDOperationScope.scala:112)
> 00:30:04,580 INFO  - org.apache.spark.rdd.RDD.withScope(RDD.scala:362)
> 00:30:04,580 INFO  - org.apache.spark.rdd.RDD.isEmpty(RDD.scala:1460)
> 00:30:04,580 INFO  - com.ericsson.mediafirst.spark.clientlogsenrichment.
> ClientLogsEnrichmentJob$.executeIteration(ClientLogsEnrichmentJob.scala:
> 133)
> 00:30:04,580 INFO  - com.ericsson.mediafirst.spark.clientlogsenrichment.
> ClientLogsEnrichmentJob$.runIteration(ClientLogsEnrichmentJob.scala:76)
> 00:30:04,581 INFO  - com.ericsson.mediafirst.spark.clientlogsenrichment.
> ClientLogsEnrichmentJob$.runBatch(ClientLogsEnrichmentJob.scala:59)
> 00:30:04,581 INFO  - com.ericsson.mediafirst.sparkutils.jobtemplates.
> BatchJob.main(BatchJob.scala:35)
> 00:30:04,581 INFO  - com.ericsson.mediafirst.spark.clientlogsenrichment.
> ClientLogsEnrichmentJob.main(ClientLogsEnrichmentJob.scala)
> 00:30:04,581 INFO  - sun.reflect.NativeMethodAccessorImpl.invoke0(Native
> Method)
> 00:30:04,581 INFO  - sun.reflect.NativeMethodAccessorImpl.invoke(
> NativeMethodAccessorImpl.java:62)
> 00:30:04,581 INFO  - sun.reflect.DelegatingMethodAccessorImpl.invoke(
> DelegatingMethodAccessorImpl.java:43)
> 00:30:04,581 INFO  - java.lang.reflect.Method.invoke(Method.java:498)
> 00:30:04,581 INFO  - org.apache.spark.deploy.
> SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(
> SparkSubmit.scala:738)
> 00:30:04,581 INFO  - org.apache.spark.deploy.SparkSubmit$.doRunMain$1(
> SparkSubmit.scala:187)
> 00:30:04,581 INFO  - org.apache.spark.deploy.SparkSubmit$.submit(
> SparkSubmit.scala:212)
> 00:30:04,581 INFO  - org.apache.spark.deploy.
> SparkSubmit$.main(SparkSubmit.scala:126)
> 00:30:04,581 INFO  - org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.
> scala)
> 00:30:04,581 WARN  - 17/07/22 00:30:04 WARN JobProgressListener: Task
> start for unknown stage 1518491
> 00:30:04,670 WARN  - 17/07/22 00:30:04 ERROR LiveListenerBus:
> SparkListenerBus has already stopped! Dropping event
> SparkListenerBlockUpdated(BlockUpdatedInfo(BlockManagerId(0,
> 10.133.96.21, 45377, None),rdd_15721_0,StorageLevel(memory, deserialized,
> 1 replicas),12024,0))
> 00:30:04,673 WARN  - 17/07/22 00:30:04 ERROR LiveListenerBus:
> SparkListenerBus has already stopped! Dropping event
> SparkListenerBlockUpdated(BlockUpdatedInfo(BlockManagerId(0,
> 10.133.96.21, 45377, None),rdd_15721_1,StorageLevel(memory, deserialized,
> 1 replicas),13736,0))
> 00:30:04,679 WARN  - 17/07/22 00:30:04 ERROR TransportRequestHandler:
> Error while invoking RpcHandler#receive() for one-way message.
> 00:30:04,679 WARN  - org.apache.spark.SparkException: Could not find
> CoarseGrainedScheduler.
> 00:30:04,679 WARN  -     at org.apache.spark.rpc.netty.
> Dispatcher.postMessage(Dispatcher.scala:154)
> 00:30:04,679 WARN  -     at org.apache.spark.rpc.netty.
> Dispatcher.postOneWayMessage(Dispatcher.scala:134)
> 00:30:04,679 WARN  -     at org.apache.spark.rpc.netty.
> NettyRpcHandler.receive(NettyRpcEnv.scala:570)
> 00:30:04,679 WARN  -     at org.apache.spark.network.server.
> TransportRequestHandler.processOneWayMessage(TransportRequestHandler.java:
> 180)
> 00:30:04,679 WARN  -     at org.apache.spark.network.server.
> TransportRequestHandler.handle(TransportRequestHandler.java:109)
> 00:30:04,679 WARN  -     at org.apache.spark.network.server.
> TransportChannelHandler.channelRead0(TransportChannelHandler.java:119)
> 00:30:04,679 WARN  -     at org.apache.spark.network.server.
> TransportChannelHandler.channelRead0(TransportChannelHandler.java:51)
> 00:30:04,679 WARN  -     at io.netty.channel.SimpleChannelInboundHandler.
> channelRead(SimpleChannelInboundHandler.java:105)
> 00:30:04,679 WARN  -     at io.netty.channel.
> AbstractChannelHandlerContext.invokeChannelRead(
> AbstractChannelHandlerContext.java:367)
> 00:30:04,679 WARN  -     at io.netty.channel.
> AbstractChannelHandlerContext.invokeChannelRead(
> AbstractChannelHandlerContext.java:353)
> 00:30:04,679 WARN  -     at io.netty.channel.
> AbstractChannelHandlerContext.fireChannelRead(
> AbstractChannelHandlerContext.java:346)
> 00:30:04,679 WARN  -     at io.netty.handler.timeout.
> IdleStateHandler.channelRead(IdleStateHandler.java:266)
> 00:30:04,679 WARN  -     at io.netty.channel.
> AbstractChannelHandlerContext.invokeChannelRead(
> AbstractChannelHandlerContext.java:367)
> 00:30:04,679 WARN  -     at io.netty.channel.
> AbstractChannelHandlerContext.invokeChannelRead(
> AbstractChannelHandlerContext.java:353)
> 00:30:04,679 WARN  -     at io.netty.channel.
> AbstractChannelHandlerContext.fireChannelRead(
> AbstractChannelHandlerContext.java:346)
> 00:30:04,679 WARN  -     at io.netty.handler.codec.
> MessageToMessageDecoder.channelRead(MessageToMessageDecoder.java:102)
> 00:30:04,679 WARN  -     at io.netty.channel.
> AbstractChannelHandlerContext.invokeChannelRead(
> AbstractChannelHandlerContext.java:367)
> 00:30:04,679 WARN  -     at io.netty.channel.
> AbstractChannelHandlerContext.invokeChannelRead(
> AbstractChannelHandlerContext.java:353)
> 00:30:04,679 WARN  -     at io.netty.channel.
> AbstractChannelHandlerContext.fireChannelRead(
> AbstractChannelHandlerContext.java:346)
> 00:30:04,679 WARN  -     at org.apache.spark.network.util.
> TransportFrameDecoder.channelRead(TransportFrameDecoder.java:85)
> 00:30:04,679 WARN  -     at io.netty.channel.
> AbstractChannelHandlerContext.invokeChannelRead(
> AbstractChannelHandlerContext.java:367)
> 00:30:04,679 WARN  -     at io.netty.channel.
> AbstractChannelHandlerContext.invokeChannelRead(
> AbstractChannelHandlerContext.java:353)
> 00:30:04,679 WARN  -     at io.netty.channel.
> AbstractChannelHandlerContext.fireChannelRead(
> AbstractChannelHandlerContext.java:346)
> 00:30:04,679 WARN  -     at io.netty.channel.DefaultChannelPipeline$
> HeadContext.channelRead(DefaultChannelPipeline.java:1294)
> 00:30:04,679 WARN  -     at io.netty.channel.
> AbstractChannelHandlerContext.invokeChannelRead(
> AbstractChannelHandlerContext.java:367)
> 00:30:04,679 WARN  -     at io.netty.channel.
> AbstractChannelHandlerContext.invokeChannelRead(
> AbstractChannelHandlerContext.java:353)
> 00:30:04,679 WARN  -     at io.netty.channel.DefaultChannelPipeline.
> fireChannelRead(DefaultChannelPipeline.java:911)
> 00:30:04,679 WARN  -     at io.netty.channel.nio.AbstractNioByteChannel$
> NioByteUnsafe.read(AbstractNioByteChannel.java:131)
> 00:30:04,679 WARN  -     at io.netty.channel.nio.NioEventLoop.
> processSelectedKey(NioEventLoop.java:652)
> 00:30:04,679 WARN  -     at io.netty.channel.nio.NioEventLoop.
> processSelectedKeysOptimized(NioEventLoop.java:575)
> 00:30:04,679 WARN  -     at io.netty.channel.nio.NioEventLoop.
> processSelectedKeys(NioEventLoop.java:489)
> 00:30:04,679 WARN  -     at io.netty.channel.nio.
> NioEventLoop.run(NioEventLoop.java:451)
> 00:30:04,679 WARN  -     at io.netty.util.concurrent.
> SingleThreadEventExecutor$2.run(SingleThreadEventExecutor.java:140)
> 00:30:04,679 WARN  -     at io.netty.util.concurrent.DefaultThreadFactory$
> DefaultRunnableDecorator.run(DefaultThreadFactory.java:144)
> 00:30:04,679 WARN  -     at java.lang.Thread.run(Thread.java:748)
> 00:30:04,679 WARN  - 17/07/22 00:30:04 ERROR TransportRequestHandler:
> Error while invoking RpcHandler#receive() for one-way message.
> 00:30:04,679 WARN  - org.apache.spark.SparkException: Could not find
> CoarseGrainedScheduler.
> 00:30:04,679 WARN  -     at org.apache.spark.rpc.netty.
> Dispatcher.postMessage(Dispatcher.scala:154)
> 00:30:04,679 WARN  -     at org.apache.spark.rpc.netty.
> Dispatcher.postOneWayMessage(Dispatcher.scala:134)
> 00:30:04,679 WARN  -     at org.apache.spark.rpc.netty.
> NettyRpcHandler.receive(NettyRpcEnv.scala:570)
> 00:30:04,679 WARN  -     at org.apache.spark.network.server.
> TransportRequestHandler.processOneWayMessage(TransportRequestHandler.java:
> 180)
> 00:30:04,679 WARN  -     at org.apache.spark.network.server.
> TransportRequestHandler.handle(TransportRequestHandler.java:109)
> 00:30:04,679 WARN  -     at org.apache.spark.network.server.
> TransportChannelHandler.channelRead0(TransportChannelHandler.java:119)
> 00:30:04,679 WARN  -     at org.apache.spark.network.server.
> TransportChannelHandler.channelRead0(TransportChannelHandler.java:51)
> 00:30:04,679 WARN  -     at io.netty.channel.SimpleChannelInboundHandler.
> channelRead(SimpleChannelInboundHandler.java:105)
> 00:30:04,679 WARN  -     at io.netty.channel.
> AbstractChannelHandlerContext.invokeChannelRead(
> AbstractChannelHandlerContext.java:367)
> 00:30:04,679 WARN  -     at io.netty.channel.
> AbstractChannelHandlerContext.invokeChannelRead(
> AbstractChannelHandlerContext.java:353)
> 00:30:04,679 WARN  -     at io.netty.channel.
> AbstractChannelHandlerContext.fireChannelRead(
> AbstractChannelHandlerContext.java:346)
> 00:30:04,679 WARN  -     at io.netty.handler.timeout.
> IdleStateHandler.channelRead(IdleStateHandler.java:266)
> 00:30:04,679 WARN  -     at io.netty.channel.
> AbstractChannelHandlerContext.invokeChannelRead(
> AbstractChannelHandlerContext.java:367)
> 00:30:04,679 WARN  -     at io.netty.channel.
> AbstractChannelHandlerContext.invokeChannelRead(
> AbstractChannelHandlerContext.java:353)
> 00:30:04,679 WARN  -     at io.netty.channel.
> AbstractChannelHandlerContext.fireChannelRead(
> AbstractChannelHandlerContext.java:346)
> 00:30:04,679 WARN  -     at io.netty.handler.codec.
> MessageToMessageDecoder.channelRead(MessageToMessageDecoder.java:102)
> 00:30:04,679 WARN  -     at io.netty.channel.
> AbstractChannelHandlerContext.invokeChannelRead(
> AbstractChannelHandlerContext.java:367)
> 00:30:04,679 WARN  -     at io.netty.channel.
> AbstractChannelHandlerContext.invokeChannelRead(
> AbstractChannelHandlerContext.java:353)
> 00:30:04,679 WARN  -     at io.netty.channel.
> AbstractChannelHandlerContext.fireChannelRead(
> AbstractChannelHandlerContext.java:346)
> 00:30:04,679 WARN  -     at org.apache.spark.network.util.
> TransportFrameDecoder.channelRead(TransportFrameDecoder.java:85)
> 00:30:04,679 WARN  -     at io.netty.channel.
> AbstractChannelHandlerContext.invokeChannelRead(
> AbstractChannelHandlerContext.java:367)
> 00:30:04,679 WARN  -     at io.netty.channel.
> AbstractChannelHandlerContext.invokeChannelRead(
> AbstractChannelHandlerContext.java:353)
> 00:30:04,679 WARN  -     at io.netty.channel.
> AbstractChannelHandlerContext.fireChannelRead(
> AbstractChannelHandlerContext.java:346)
> 00:30:04,679 WARN  -     at io.netty.channel.DefaultChannelPipeline$
> HeadContext.channelRead(DefaultChannelPipeline.java:1294)
> 00:30:04,679 WARN  -     at io.netty.channel.
> AbstractChannelHandlerContext.invokeChannelRead(
> AbstractChannelHandlerContext.java:367)
> 00:30:04,679 WARN  -     at io.netty.channel.
> AbstractChannelHandlerContext.invokeChannelRead(
> AbstractChannelHandlerContext.java:353)
> 00:30:04,679 WARN  -     at io.netty.channel.DefaultChannelPipeline.
> fireChannelRead(DefaultChannelPipeline.java:911)
> 00:30:04,679 WARN  -     at io.netty.channel.nio.AbstractNioByteChannel$
> NioByteUnsafe.read(AbstractNioByteChannel.java:131)
> 00:30:04,679 WARN  -     at io.netty.channel.nio.NioEventLoop.
> processSelectedKey(NioEventLoop.java:652)
> 00:30:04,679 WARN  -     at io.netty.channel.nio.NioEventLoop.
> processSelectedKeysOptimized(NioEventLoop.java:575)
> 00:30:04,679 WARN  -     at io.netty.channel.nio.NioEventLoop.
> processSelectedKeys(NioEventLoop.java:489)
> 00:30:04,679 WARN  -     at io.netty.channel.nio.
> NioEventLoop.run(NioEventLoop.java:451)
> 00:30:04,679 WARN  -     at io.netty.util.concurrent.
> SingleThreadEventExecutor$2.run(SingleThreadEventExecutor.java:140)
> 00:30:04,679 WARN  -     at io.netty.util.concurrent.DefaultThreadFactory$
> DefaultRunnableDecorator.run(DefaultThreadFactory.java:144)
> 00:30:04,679 WARN  -     at java.lang.Thread.run(Thread.java:748)
> 00:30:11,318 WARN  - I0722 00:30:11.318724 2921 sched.cpp:2021] Asked to
> stop the driver
> 00:30:11,318 WARN  - I0722 00:30:11.318838 2988 sched.cpp:1203] Stopping
> framework 40aeb8e5-e82a-4df9-b034-8815a7a7564b-2543
>

Reply via email to