Hi, Si-li, I think maybe it is not the root cause. You should find whether there are more exceptions in the JM log and TM logs.
Best, Hang Shammon FY <zjur...@gmail.com> 于2023年4月18日周二 09:02写道: > Hi Si-li > > Could you give some more detailed exceptions? Or you can check the metrics > of your job such as memory usage. > > Best, > Shammon FY > > > On Fri, Apr 14, 2023 at 5:14 PM Si-li Liu <unix...@gmail.com> wrote: > >> My job read data from mysql and write to doris. It will crash after 20 >> mins ~ 1 hour after start. >> >> org.apache.flink.runtime.JobException: Recovery is suppressed by >> FixedDelayRestartBackoffTimeStrategy(maxNumberRestartAttempts=10, >> backoffTimeMS=10000) >> at org.apache.flink.runtime.executiongraph.failover.flip1. >> ExecutionFailureHandler.handleFailure(ExecutionFailureHandler.java:139) >> at org.apache.flink.runtime.executiongraph.failover.flip1. >> ExecutionFailureHandler.getFailureHandlingResult(ExecutionFailureHandler >> .java:83) >> at org.apache.flink.runtime.scheduler.DefaultScheduler.recordTaskFailure( >> DefaultScheduler.java:256) >> at org.apache.flink.runtime.scheduler.DefaultScheduler.handleTaskFailure( >> DefaultScheduler.java:247) >> at org.apache.flink.runtime.scheduler.DefaultScheduler.onTaskFailed( >> DefaultScheduler.java:240) >> at org.apache.flink.runtime.scheduler.SchedulerBase >> .onTaskExecutionStateUpdate(SchedulerBase.java:738) >> at org.apache.flink.runtime.scheduler.SchedulerBase >> .updateTaskExecutionState(SchedulerBase.java:715) >> at org.apache.flink.runtime.scheduler.SchedulerNG >> .updateTaskExecutionState(SchedulerNG.java:78) >> at org.apache.flink.runtime.jobmaster.JobMaster.updateTaskExecutionState( >> JobMaster.java:477) >> at sun.reflect.GeneratedMethodAccessor16.invoke(Unknown Source) >> at sun.reflect.DelegatingMethodAccessorImpl.invoke( >> DelegatingMethodAccessorImpl.java:43) >> at java.lang.reflect.Method.invoke(Method.java:498) >> at org.apache.flink.runtime.rpc.akka.AkkaRpcActor >> .lambda$handleRpcInvocation$1(AkkaRpcActor.java:309) >> at org.apache.flink.runtime.concurrent.akka.ClassLoadingUtils >> .runWithContextClassLoader(ClassLoadingUtils.java:83) >> at org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleRpcInvocation( >> AkkaRpcActor.java:307) >> at org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleRpcMessage( >> AkkaRpcActor.java:222) >> at org.apache.flink.runtime.rpc.akka.FencedAkkaRpcActor.handleRpcMessage( >> FencedAkkaRpcActor.java:84) >> at org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleMessage( >> AkkaRpcActor.java:168) >> at akka.japi.pf.UnitCaseStatement.apply(CaseStatements.scala:24) >> at akka.japi.pf.UnitCaseStatement.apply(CaseStatements.scala:20) >> at scala.PartialFunction.applyOrElse(PartialFunction.scala:123) >> at scala.PartialFunction.applyOrElse$(PartialFunction.scala:122) >> at akka.japi.pf.UnitCaseStatement.applyOrElse(CaseStatements.scala:20) >> at scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:171) >> at scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:172) >> at scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:172) >> at akka.actor.Actor.aroundReceive(Actor.scala:537) >> at akka.actor.Actor.aroundReceive$(Actor.scala:535) >> at akka.actor.AbstractActor.aroundReceive(AbstractActor.scala:220) >> at akka.actor.ActorCell.receiveMessage(ActorCell.scala:580) >> at akka.actor.ActorCell.invoke(ActorCell.scala:548) >> at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:270) >> at akka.dispatch.Mailbox.run(Mailbox.scala:231) >> at akka.dispatch.Mailbox.exec(Mailbox.scala:243) >> at java.util.concurrent.ForkJoinTask.doExec(ForkJoinTask.java:289) >> at java.util.concurrent.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java: >> 1056) >> at java.util.concurrent.ForkJoinPool.runWorker(ForkJoinPool.java:1692) >> at java.util.concurrent.ForkJoinWorkerThread.run(ForkJoinWorkerThread >> .java:175) >> Caused by: java.lang.InterruptedException >> at java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject >> .reportInterruptAfterWait(AbstractQueuedSynchronizer.java:2014) >> at java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject >> .await(AbstractQueuedSynchronizer.java:2173) >> at org.apache.flink.streaming.runtime.tasks.mailbox.TaskMailboxImpl.take( >> TaskMailboxImpl.java:149) >> at org.apache.flink.streaming.runtime.tasks.mailbox.MailboxProcessor >> .processMailsWhenDefaultActionUnavailable(MailboxProcessor.java:363) >> at org.apache.flink.streaming.runtime.tasks.mailbox.MailboxProcessor >> .processMail(MailboxProcessor.java:352) >> at org.apache.flink.streaming.runtime.tasks.mailbox.MailboxProcessor >> .runMailboxLoop(MailboxProcessor.java:229) >> at org.apache.flink.streaming.runtime.tasks.StreamTask.runMailboxLoop( >> StreamTask.java:831) >> at org.apache.flink.streaming.runtime.tasks.StreamTask.invoke(StreamTask >> .java:780) >> at org.apache.flink.runtime.taskmanager.Task.runWithSystemExitMonitoring( >> Task.java:935) >> at org.apache.flink.runtime.taskmanager.Task.restoreAndInvoke(Task.java: >> 914) >> at org.apache.flink.runtime.taskmanager.Task.doRun(Task.java:728) >> at org.apache.flink.runtime.taskmanager.Task.run(Task.java:550) >> at java.lang.Thread.run(Thread.java:748) >> >> java.lang.InterruptedException >> at java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject >> .reportInterruptAfterWait(AbstractQueuedSynchronizer.java:2014) >> at java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject >> .await(AbstractQueuedSynchronizer.java:2173) >> at org.apache.flink.streaming.runtime.tasks.mailbox.TaskMailboxImpl.take( >> TaskMailboxImpl.java:149) >> at org.apache.flink.streaming.runtime.tasks.mailbox.MailboxProcessor >> .processMailsWhenDefaultActionUnavailable(MailboxProcessor.java:363) >> at org.apache.flink.streaming.runtime.tasks.mailbox.MailboxProcessor >> .processMail(MailboxProcessor.java:352) >> at org.apache.flink.streaming.runtime.tasks.mailbox.MailboxProcessor >> .runMailboxLoop(MailboxProcessor.java:229) >> at org.apache.flink.streaming.runtime.tasks.StreamTask.runMailboxLoop( >> StreamTask.java:831) >> at org.apache.flink.streaming.runtime.tasks.StreamTask.invoke(StreamTask >> .java:780) >> at org.apache.flink.runtime.taskmanager.Task.runWithSystemExitMonitoring( >> Task.java:935) >> at org.apache.flink.runtime.taskmanager.Task.restoreAndInvoke(Task.java: >> 914) >> at org.apache.flink.runtime.taskmanager.Task.doRun(Task.java:728) >> at org.apache.flink.runtime.taskmanager.Task.run(Task.java:550) >> at java.lang.Thread.run(Thread.java:748) >> >> -- >> Best regards >> >> Sili Liu >> >