Hi Si-li Could you give some more detailed exceptions? Or you can check the metrics of your job such as memory usage.
Best, Shammon FY On Fri, Apr 14, 2023 at 5:14 PM Si-li Liu <unix...@gmail.com> wrote: > My job read data from mysql and write to doris. It will crash after 20 > mins ~ 1 hour after start. > > org.apache.flink.runtime.JobException: Recovery is suppressed by > FixedDelayRestartBackoffTimeStrategy(maxNumberRestartAttempts=10, > backoffTimeMS=10000) > at org.apache.flink.runtime.executiongraph.failover.flip1. > ExecutionFailureHandler.handleFailure(ExecutionFailureHandler.java:139) > at org.apache.flink.runtime.executiongraph.failover.flip1. > ExecutionFailureHandler.getFailureHandlingResult(ExecutionFailureHandler > .java:83) > at org.apache.flink.runtime.scheduler.DefaultScheduler.recordTaskFailure( > DefaultScheduler.java:256) > at org.apache.flink.runtime.scheduler.DefaultScheduler.handleTaskFailure( > DefaultScheduler.java:247) > at org.apache.flink.runtime.scheduler.DefaultScheduler.onTaskFailed( > DefaultScheduler.java:240) > at org.apache.flink.runtime.scheduler.SchedulerBase > .onTaskExecutionStateUpdate(SchedulerBase.java:738) > at org.apache.flink.runtime.scheduler.SchedulerBase > .updateTaskExecutionState(SchedulerBase.java:715) > at org.apache.flink.runtime.scheduler.SchedulerNG > .updateTaskExecutionState(SchedulerNG.java:78) > at org.apache.flink.runtime.jobmaster.JobMaster.updateTaskExecutionState( > JobMaster.java:477) > at sun.reflect.GeneratedMethodAccessor16.invoke(Unknown Source) > at sun.reflect.DelegatingMethodAccessorImpl.invoke( > DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:498) > at org.apache.flink.runtime.rpc.akka.AkkaRpcActor > .lambda$handleRpcInvocation$1(AkkaRpcActor.java:309) > at org.apache.flink.runtime.concurrent.akka.ClassLoadingUtils > .runWithContextClassLoader(ClassLoadingUtils.java:83) > at org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleRpcInvocation( > AkkaRpcActor.java:307) > at org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleRpcMessage( > AkkaRpcActor.java:222) > at org.apache.flink.runtime.rpc.akka.FencedAkkaRpcActor.handleRpcMessage( > FencedAkkaRpcActor.java:84) > at org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleMessage( > AkkaRpcActor.java:168) > at akka.japi.pf.UnitCaseStatement.apply(CaseStatements.scala:24) > at akka.japi.pf.UnitCaseStatement.apply(CaseStatements.scala:20) > at scala.PartialFunction.applyOrElse(PartialFunction.scala:123) > at scala.PartialFunction.applyOrElse$(PartialFunction.scala:122) > at akka.japi.pf.UnitCaseStatement.applyOrElse(CaseStatements.scala:20) > at scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:171) > at scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:172) > at scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:172) > at akka.actor.Actor.aroundReceive(Actor.scala:537) > at akka.actor.Actor.aroundReceive$(Actor.scala:535) > at akka.actor.AbstractActor.aroundReceive(AbstractActor.scala:220) > at akka.actor.ActorCell.receiveMessage(ActorCell.scala:580) > at akka.actor.ActorCell.invoke(ActorCell.scala:548) > at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:270) > at akka.dispatch.Mailbox.run(Mailbox.scala:231) > at akka.dispatch.Mailbox.exec(Mailbox.scala:243) > at java.util.concurrent.ForkJoinTask.doExec(ForkJoinTask.java:289) > at java.util.concurrent.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java: > 1056) > at java.util.concurrent.ForkJoinPool.runWorker(ForkJoinPool.java:1692) > at java.util.concurrent.ForkJoinWorkerThread.run(ForkJoinWorkerThread > .java:175) > Caused by: java.lang.InterruptedException > at java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject > .reportInterruptAfterWait(AbstractQueuedSynchronizer.java:2014) > at java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject > .await(AbstractQueuedSynchronizer.java:2173) > at org.apache.flink.streaming.runtime.tasks.mailbox.TaskMailboxImpl.take( > TaskMailboxImpl.java:149) > at org.apache.flink.streaming.runtime.tasks.mailbox.MailboxProcessor > .processMailsWhenDefaultActionUnavailable(MailboxProcessor.java:363) > at org.apache.flink.streaming.runtime.tasks.mailbox.MailboxProcessor > .processMail(MailboxProcessor.java:352) > at org.apache.flink.streaming.runtime.tasks.mailbox.MailboxProcessor > .runMailboxLoop(MailboxProcessor.java:229) > at org.apache.flink.streaming.runtime.tasks.StreamTask.runMailboxLoop( > StreamTask.java:831) > at org.apache.flink.streaming.runtime.tasks.StreamTask.invoke(StreamTask > .java:780) > at org.apache.flink.runtime.taskmanager.Task.runWithSystemExitMonitoring( > Task.java:935) > at org.apache.flink.runtime.taskmanager.Task.restoreAndInvoke(Task.java: > 914) > at org.apache.flink.runtime.taskmanager.Task.doRun(Task.java:728) > at org.apache.flink.runtime.taskmanager.Task.run(Task.java:550) > at java.lang.Thread.run(Thread.java:748) > > java.lang.InterruptedException > at java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject > .reportInterruptAfterWait(AbstractQueuedSynchronizer.java:2014) > at java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject > .await(AbstractQueuedSynchronizer.java:2173) > at org.apache.flink.streaming.runtime.tasks.mailbox.TaskMailboxImpl.take( > TaskMailboxImpl.java:149) > at org.apache.flink.streaming.runtime.tasks.mailbox.MailboxProcessor > .processMailsWhenDefaultActionUnavailable(MailboxProcessor.java:363) > at org.apache.flink.streaming.runtime.tasks.mailbox.MailboxProcessor > .processMail(MailboxProcessor.java:352) > at org.apache.flink.streaming.runtime.tasks.mailbox.MailboxProcessor > .runMailboxLoop(MailboxProcessor.java:229) > at org.apache.flink.streaming.runtime.tasks.StreamTask.runMailboxLoop( > StreamTask.java:831) > at org.apache.flink.streaming.runtime.tasks.StreamTask.invoke(StreamTask > .java:780) > at org.apache.flink.runtime.taskmanager.Task.runWithSystemExitMonitoring( > Task.java:935) > at org.apache.flink.runtime.taskmanager.Task.restoreAndInvoke(Task.java: > 914) > at org.apache.flink.runtime.taskmanager.Task.doRun(Task.java:728) > at org.apache.flink.runtime.taskmanager.Task.run(Task.java:550) > at java.lang.Thread.run(Thread.java:748) > > -- > Best regards > > Sili Liu >