Thanks for the note. The root cause is the following Caused by: org.apache.flink.util.FlinkRuntimeException: Failed to start the operator coordinators at org.apache.flink.runtime.scheduler.DefaultOperatorCoordinatorHandler.startOperatorCoordinators(DefaultOperatorCoordinatorHandler.java:169) ~[flink-dist-1.15.0.jar:1.15.0] at org.apache.flink.runtime.scheduler.DefaultOperatorCoordinatorHandler.startAllOperatorCoordinators(DefaultOperatorCoordinatorHandler.java:82) ~[flink-dist-1.15.0.jar:1.15.0] at org.apache.flink.runtime.scheduler.SchedulerBase.startScheduling(SchedulerBase.java:624) ~[flink-dist-1.15.0.jar:1.15.0] at org.apache.flink.runtime.jobmaster.JobMaster.startScheduling(JobMaster.java:1010) ~[flink-dist-1.15.0.jar:1.15.0] at org.apache.flink.runtime.jobmaster.JobMaster.startJobExecution(JobMaster.java:927) ~[flink-dist-1.15.0.jar:1.15.0] at org.apache.flink.runtime.jobmaster.JobMaster.onStart(JobMaster.java:388) ~[flink-dist-1.15.0.jar:1.15.0] at org.apache.flink.runtime.rpc.RpcEndpoint.internalCallOnStart(RpcEndpoint.java:181) ~[flink-dist-1.15.0.jar:1.15.0] at org.apache.flink.runtime.rpc.akka.AkkaRpcActor$StoppedState.lambda$start$0(AkkaRpcActor.java:612) ~[flink-rpc-akka_db70a2fa-991e-4392-9447-5d060aeb156e.jar:1.15.0] at org.apache.flink.runtime.concurrent.akka.ClassLoadingUtils.runWithContextClassLoader(ClassLoadingUtils.java:68) ~[flink-rpc-akka_db70a2fa-991e-4392-9447-5d060aeb156e.jar:1.15.0] at org.apache.flink.runtime.rpc.akka.AkkaRpcActor$StoppedState.start(AkkaRpcActor.java:611) ~[flink-rpc-akka_db70a2fa-991e-4392-9447-5d060aeb156e.jar:1.15.0] at org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleControlMessage(AkkaRpcActor.java:185) ~[flink-rpc-akka_db70a2fa-991e-4392-9447-5d060aeb156e.jar:1.15.0] at akka.japi.pf.UnitCaseStatement.apply(CaseStatements.scala:24) ~[?:?] at akka.japi.pf.UnitCaseStatement.apply(CaseStatements.scala:20) ~[?:?] at scala.PartialFunction.applyOrElse(PartialFunction.scala:123) ~[flink-scala_2.12-1.15.0.jar:1.15.0] at scala.PartialFunction.applyOrElse$(PartialFunction.scala:122) ~[flink-scala_2.12-1.15.0.jar:1.15.0] at akka.japi.pf.UnitCaseStatement.applyOrElse(CaseStatements.scala:20) ~[?:?] at scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:171) ~[flink-scala_2.12-1.15.0.jar:1.15.0] ... 13 more Caused by: java.lang.RuntimeException: java.net.URISyntaxException: Relative path in absolute URI: file:~/usr/bin/hudi/tables/t1/.hoodie at org.apache.hudi.common.fs.HoodieWrapperFileSystem.convertPathWithScheme(HoodieWrapperFileSystem.java:156) ~[?:?] at org.apache.hudi.common.fs.HoodieWrapperFileSystem.convertToDefaultPath(HoodieWrapperFileSystem.java:961) ~[?:?] at org.apache.hudi.common.fs.HoodieWrapperFileSystem.lambda$getFileStatus$17(HoodieWrapperFileSystem.java:398) ~[?:?] at org.apache.hudi.common.fs.HoodieWrapperFileSystem.executeFuncWithTimeMetrics(HoodieWrapperFileSystem.java:106) ~[?:?] at org.apache.hudi.common.fs.HoodieWrapperFileSystem.getFileStatus(HoodieWrapperFileSystem.java:396) ~[?:?] at org.apache.hudi.exception.TableNotFoundException.checkTableValidity(TableNotFoundException.java:51) ~[?:?] at org.apache.hudi.common.table.HoodieTableMetaClient.<init>(HoodieTableMetaClient.java:128) ~[?:?] at org.apache.hudi.common.table.HoodieTableMetaClient.newMetaClient(HoodieTableMetaClient.java:642) ~[?:?] at org.apache.hudi.common.table.HoodieTableMetaClient.access$000(HoodieTableMetaClient.java:80) ~[?:?] at org.apache.hudi.common.table.HoodieTableMetaClient$Builder.build(HoodieTableMetaClient.java:711) ~[?:?] at org.apache.hudi.common.table.HoodieTableMetaClient.initTableAndGetMetaClient(HoodieTableMetaClient.java:466) ~[?:?] at org.apache.hudi.common.table.HoodieTableMetaClient$PropertyBuilder.initTable(HoodieTableMetaClient.java:1122) ~[?:?] at org.apache.hudi.util.StreamerUtil.initTableIfNotExists(StreamerUtil.java:323) ~[?:?] at org.apache.hudi.util.StreamerUtil.initTableIfNotExists(StreamerUtil.java:293) ~[?:?] at org.apache.hudi.sink.StreamWriteOperatorCoordinator.start(StreamWriteOperatorCoordinator.java:179) ~[?:?] at org.apache.flink.runtime.operators.coordination.OperatorCoordinatorHolder.start(OperatorCoordinatorHolder.java:194) ~[flink-dist-1.15.0.jar:1.15.0] at org.apache.flink.runtime.scheduler.DefaultOperatorCoordinatorHandler.startOperatorCoordinators(DefaultOperatorCoordinatorHandler.java:164) ~[flink-dist-1.15.0.jar:1.15.0] at org.apache.flink.runtime.scheduler.DefaultOperatorCoordinatorHandler.startAllOperatorCoordinators(DefaultOperatorCoordinatorHandler.java:82) ~[flink-dist-1.15.0.jar:1.15.0] at org.apache.flink.runtime.scheduler.SchedulerBase.startScheduling(SchedulerBase.java:624) ~[flink-dist-1.15.0.jar:1.15.0] at org.apache.flink.runtime.jobmaster.JobMaster.startScheduling(JobMaster.java:1010) ~[flink-dist-1.15.0.jar:1.15.0] at org.apache.flink.runtime.jobmaster.JobMaster.startJobExecution(JobMaster.java:927) ~[flink-dist-1.15.0.jar:1.15.0] at org.apache.flink.runtime.jobmaster.JobMaster.onStart(JobMaster.java:388) ~[flink-dist-1.15.0.jar:1.15.0] at org.apache.flink.runtime.rpc.RpcEndpoint.internalCallOnStart(RpcEndpoint.java:181) ~[flink-dist-1.15.0.jar:1.15.0] at org.apache.flink.runtime.rpc.akka.AkkaRpcActor$StoppedState.lambda$start$0(AkkaRpcActor.java:612) ~[flink-rpc-akka_db70a2fa-991e-4392-9447-5d060aeb156e.jar:1.15.0] at org.apache.flink.runtime.concurrent.akka.ClassLoadingUtils.runWithContextClassLoader(ClassLoadingUtils.java:68) ~[flink-rpc-akka_db70a2fa-991e-4392-9447-5d060aeb156e.jar:1.15.0] at org.apache.flink.runtime.rpc.akka.AkkaRpcActor$StoppedState.start(AkkaRpcActor.java:611) ~[flink-rpc-akka_db70a2fa-991e-4392-9447-5d060aeb156e.jar:1.15.0] at org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleControlMessage(AkkaRpcActor.java:185) ~[flink-rpc-akka_db70a2fa-991e-4392-9447-5d060aeb156e.jar:1.15.0] at akka.japi.pf.UnitCaseStatement.apply(CaseStatements.scala:24) ~[?:?] at akka.japi.pf.UnitCaseStatement.apply(CaseStatements.scala:20) ~[?:?] at scala.PartialFunction.applyOrElse(PartialFunction.scala:123) ~[flink-scala_2.12-1.15.0.jar:1.15.0] at scala.PartialFunction.applyOrElse$(PartialFunction.scala:122) ~[flink-scala_2.12-1.15.0.jar:1.15.0] at akka.japi.pf.UnitCaseStatement.applyOrElse(CaseStatements.scala:20) ~[?:?] at scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:171) ~[flink-scala_2.12-1.15.0.jar:1.15.0]
I’m not sure whether it’s proper to kill the cluster just because of using a wrong job configuration (set a relative path). > 2022年10月14日 19:53,Matthias Pohl via user <user@flink.apache.org> 写道: > > Hi Jie Han, > welcome to the community. Just a little side note: These kinds of questions > are more suitable to be asked in the user mailing list. The dev mailing list > is rather used for discussing feature development or project-related topics. > See [1] for further details. > > About your question: The stacktrace you're providing indicates that something > went wrong while initiating the job execution. Unfortunately, the actual > reason is not clear because that's not included in your stacktrace (it should > be listed as a cause for the JobMasterException in your logs). You're right > in assuming that Flink is able to handle certain kinds of user code and > infrastructure-related errors by restarting the job. But there might be other > Flink cluster internal errors that could cause a Flink cluster shutdown. It's > hard to tell from the logs you provided. Usually, it's a good habit to share > a reasonable amount of logs to make investigating the issue easier right away. > > Let's move the discussion into the user mailing list in case you have further > questions. > > Best, > Matthias > > [1] https://flink.apache.org/community.html#mailing-lists > <https://flink.apache.org/community.html#mailing-lists> > On Fri, Oct 14, 2022 at 10:13 AM Jie Han <tunyu...@gmail.com > <mailto:tunyu...@gmail.com>> wrote: > Hi, guys, I’m new to apache flink. It’s exciting to join the community! > > When I experienced flink 1.15.0, I met some problems confusing, here is the > streamlined log: > > org.apache.flink.runtime.rpc.akka.exceptions.AkkaRpcException: Could not > start RpcEndpoint jobmanager_2. > at > org.apache.flink.runtime.rpc.akka.AkkaRpcActor$StoppedState.start(AkkaRpcActor.java:617) > ~[flink-rpc-akka_65043be6-9dc5-4303-a760-61bd044fb53a.jar:1.15.0] > at > org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleControlMessage(AkkaRpcActor.java:185) > ~[flink-rpc-akka_65043be6-9dc5-4303-a760-61bd044fb53a.jar:1.15.0] > at akka.japi.pf > <http://akka.japi.pf/>.UnitCaseStatement.apply(CaseStatements.scala:24) > ~[flink-rpc-akka_65043be6-9dc5-4303-a760-61bd044fb53a.jar:1.15.0] > at akka.japi.pf > <http://akka.japi.pf/>.UnitCaseStatement.apply(CaseStatements.scala:20) > ~[flink-rpc-akka_65043be6-9dc5-4303-a760-61bd044fb53a.jar:1.15.0] > at scala.PartialFunction.applyOrElse(PartialFunction.scala:123) > ~[flink-rpc-akka_65043be6-9dc5-4303-a760-61bd044fb53a.jar:1.15.0] > at scala.PartialFunction.applyOrElse$(PartialFunction.scala:122) > ~[flink-rpc-akka_65043be6-9dc5-4303-a760-61bd044fb53a.jar:1.15.0] > at akka.japi.pf > <http://akka.japi.pf/>.UnitCaseStatement.applyOrElse(CaseStatements.scala:20) > ~[flink-rpc-akka_65043be6-9dc5-4303-a760-61bd044fb53a.jar:1.15.0] > at > scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:171) > ~[flink-rpc-akka_65043be6-9dc5-4303-a760-61bd044fb53a.jar:1.15.0] > at > scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:172) > ~[flink-rpc-akka_65043be6-9dc5-4303-a760-61bd044fb53a.jar:1.15.0] > at akka.actor.Actor.aroundReceive(Actor.scala:537) > ~[flink-rpc-akka_65043be6-9dc5-4303-a760-61bd044fb53a.jar:1.15.0] > at akka.actor.Actor.aroundReceive$(Actor.scala:535) > ~[flink-rpc-akka_65043be6-9dc5-4303-a760-61bd044fb53a.jar:1.15.0] > at akka.actor.AbstractActor.aroundReceive(AbstractActor.scala:220) > ~[flink-rpc-akka_65043be6-9dc5-4303-a760-61bd044fb53a.jar:1.15.0] > at akka.actor.ActorCell.receiveMessage(ActorCell.scala:580) > ~[flink-rpc-akka_65043be6-9dc5-4303-a760-61bd044fb53a.jar:1.15.0] > at akka.actor.ActorCell.invoke(ActorCell.scala:548) > ~[flink-rpc-akka_65043be6-9dc5-4303-a760-61bd044fb53a.jar:1.15.0] > at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:270) > [flink-rpc-akka_65043be6-9dc5-4303-a760-61bd044fb53a.jar:1.15.0] > at akka.dispatch.Mailbox.run(Mailbox.scala:231) > [flink-rpc-akka_65043be6-9dc5-4303-a760-61bd044fb53a.jar:1.15.0] > at akka.dispatch.Mailbox.exec(Mailbox.scala:243) > [flink-rpc-akka_65043be6-9dc5-4303-a760-61bd044fb53a.jar:1.15.0] > at java.util.concurrent.ForkJoinTask.doExec(ForkJoinTask.java:289) > [?:1.8.0_301] > at > java.util.concurrent.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1067) > [?:1.8.0_301] > at > java.util.concurrent.ForkJoinPool.runWorker(ForkJoinPool.java:1703) > [?:1.8.0_301] > at > java.util.concurrent.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:172) > [?:1.8.0_301] > Caused by: org.apache.flink.runtime.jobmaster.JobMasterException: Could not > start the JobMaster. > at > org.apache.flink.runtime.jobmaster.JobMaster.onStart(JobMaster.java:390) > ~[flink-dist-1.15.0.jar:1.15.0] > at > org.apache.flink.runtime.rpc.RpcEndpoint.internalCallOnStart(RpcEndpoint.java:181) > ~[flink-dist-1.15.0.jar:1.15.0] > at > org.apache.flink.runtime.rpc.akka.AkkaRpcActor$StoppedState.lambda$start$0(AkkaRpcActor.java:612) > ~[flink-rpc-akka_65043be6-9dc5-4303-a760-61bd044fb53a.jar:1.15.0] > at > org.apache.flink.runtime.concurrent.akka.ClassLoadingUtils.runWithContextClassLoader(ClassLoadingUtils.java:68) > ~[flink-rpc-akka_65043be6-9dc5-4303-a760-61bd044fb53a.jar:1.15.0] > at > org.apache.flink.runtime.rpc.akka.AkkaRpcActor$StoppedState.start(AkkaRpcActor.java:611) > ~[flink-rpc-akka_65043be6-9dc5-4303-a760-61bd044fb53a.jar:1.15.0] > ... 20 more > … > > 2022-10-14 15:13:30,493 INFO > org.apache.flink.runtime.entrypoint.ClusterEntrypoint [] - Shutting > StandaloneSessionClusterEntrypoint down with application status UNKNOWN. > Diagnostics Cluster entrypoint has been closed externally.. > > As recorded in the log, the standalone session cluster was shut down by the > jobmaster exception. I thought any job’s exception should not shut down the > cluster. > So, is this action expected?