Hi Claude Unfortunately, the Flink community introduced an incompatible change to ResourceSpec with FLINK-14594 [1] in the Flink 1.10.0 release. This prevents Flink from recovering JobGraphs which were created by previous versions. Hence, what you need to do is to start your Flink 1.10.0 cluster with a clean ZooKeeper namespace. Either you clean up Zk or you define a different ZooKeeper cluster id via `high-availability.cluster-id` in your flink-conf.yaml.
If you want to resume from your previous jobs, then I would suggest to stop and take a savepoint for every job and then to resume them on the Flink 1.10.0 cluster by resubmitting them. Sorry for the inconveniences. [1] https://issues.apache.org/jira/browse/FLINK-14594 Cheers, Till On Thu, Jun 11, 2020 at 5:27 PM Claude Murad <claudemur...@gmail.com> wrote: > Hello, > > I upgraded Flink from 1.7 to 1.10 in Kubernetes. When the job manager is > launched, the following exception occurs. If I do some cleanup in > zookeeper and re-start, it will work. Any ideas about this error and what > needs to be done without having to do cleanup in zookeeper? > > ERROR org.apache.flink.runtime.entrypoint.ClusterEntrypoint - > Fatal error occurred in the cluster entrypoint. > java.util.concurrent.CompletionException: > org.apache.flink.util.FlinkRuntimeException: Could not recover job with job > id bfb2a937257727e080ca85933586f38b. > at > java.util.concurrent.CompletableFuture.encodeThrowable(CompletableFuture.java:273) > at > java.util.concurrent.CompletableFuture.completeThrowable(CompletableFuture.java:280) > at > java.util.concurrent.CompletableFuture$AsyncSupply.run(CompletableFuture.java:1606) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > at java.lang.Thread.run(Thread.java:748) > Caused by: org.apache.flink.util.FlinkRuntimeException: Could not recover > job with job id bfb2a937257727e080ca85933586f38b. > at > org.apache.flink.runtime.dispatcher.runner.SessionDispatcherLeaderProcess.recoverJob(SessionDispatcherLeaderProcess.java:149) > at > org.apache.flink.runtime.dispatcher.runner.SessionDispatcherLeaderProcess.recoverJobs(SessionDispatcherLeaderProcess.java:125) > at > org.apache.flink.runtime.dispatcher.runner.AbstractDispatcherLeaderProcess.supplyUnsynchronizedIfRunning(AbstractDispatcherLeaderProcess.java:184) > at > org.apache.flink.runtime.dispatcher.runner.SessionDispatcherLeaderProcess.recoverJobsIfRunning(SessionDispatcherLeaderProcess.java:115) > at > java.util.concurrent.CompletableFuture$AsyncSupply.run(CompletableFuture.java:1604) > ... 3 more > Caused by: org.apache.flink.util.FlinkException: Could not retrieve > submitted JobGraph from state handle under > /bfb2a937257727e080ca85933586f38b. This indicates that the retrieved state > handle is broken. Try cleaning the state handle store. > at > org.apache.flink.runtime.jobmanager.ZooKeeperJobGraphStore.recoverJobGraph(ZooKeeperJobGraphStore.java:191) > at > org.apache.flink.runtime.dispatcher.runner.SessionDispatcherLeaderProcess.recoverJob(SessionDispatcherLeaderProcess.java:146) > ... 7 more > Caused by: java.io.InvalidClassException: > org.apache.flink.api.common.operators.ResourceSpec; incompatible types for > field cpuCores > at java.io.ObjectStreamClass.matchFields(ObjectStreamClass.java:2467) > at java.io.ObjectStreamClass.getReflector(ObjectStreamClass.java:2361) > at java.io.ObjectStreamClass.initNonProxy(ObjectStreamClass.java:753) > at java.io.ObjectInputStream.readNonProxyDesc(ObjectInputStream.java:1942) > at java.io.ObjectInputStream.readClassDesc(ObjectInputStream.java:1808) > at > java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2099) > at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1625) > at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2344) > at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2268) > at > java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2126) > at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1625) > at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2344) > at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2268) > at > java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2126) > at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1625) > at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2344) > at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2268) > at > java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2126) > at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1625) > at java.io.ObjectInputStream.readObject(ObjectInputStream.java:465) > at java.io.ObjectInputStream.readObject(ObjectInputStream.java:423) > at java.util.ArrayList.readObject(ArrayList.java:797) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:498) > at java.io.ObjectStreamClass.invokeReadObject(ObjectStreamClass.java:1184) > at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2235) > at > java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2126) > at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1625) > at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2344) > at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2268) > at > java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2126) > at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1625) > at java.io.ObjectInputStream.readObject(ObjectInputStream.java:465) > at java.io.ObjectInputStream.readObject(ObjectInputStream.java:423) > at java.util.HashMap.readObject(HashMap.java:1412) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:498) > at java.io.ObjectStreamClass.invokeReadObject(ObjectStreamClass.java:1184) > at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2235) > at > java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2126) > at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1625) > at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2344) > at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2268) > at > java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2126) > at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1625) > at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2344) > at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2224) > at > java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2126) > at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1625) > at java.io.ObjectInputStream.readObject(ObjectInputStream.java:465) > at java.io.ObjectInputStream.readObject(ObjectInputStream.java:423) > at > org.apache.flink.util.InstantiationUtil.deserializeObject(InstantiationUtil.java:576) > at > org.apache.flink.util.InstantiationUtil.deserializeObject(InstantiationUtil.java:555) > at > org.apache.flink.runtime.state.RetrievableStreamStateHandle.retrieveState(RetrievableStreamStateHandle.java:58) > at > org.apache.flink.runtime.jobmanager.ZooKeeperJobGraphStore.recoverJobGraph(ZooKeeperJobGraphStore.java:185) > ... 8 more >