Xingbo Huang created FLINK-28898: ------------------------------------ Summary: ChangelogRecoverySwitchStateBackendITCase.testSwitchFromEnablingToDisablingWithRescalingOut failed Key: FLINK-28898 URL: https://issues.apache.org/jira/browse/FLINK-28898 Project: Flink Issue Type: Bug Components: Runtime / Checkpointing, Runtime / State Backends Affects Versions: 1.16.0 Reporter: Xingbo Huang Fix For: 1.16.0
{code:java} 2022-08-10T02:48:19.5711924Z Aug 10 02:48:19 [ERROR] ChangelogRecoverySwitchStateBackendITCase.testSwitchFromEnablingToDisablingWithRescalingOut Time elapsed: 6.064 s <<< ERROR! 2022-08-10T02:48:19.5712815Z Aug 10 02:48:19 org.apache.flink.runtime.JobException: Recovery is suppressed by FixedDelayRestartBackoffTimeStrategy(maxNumberRestartAttempts=0, backoffTimeMS=0) 2022-08-10T02:48:19.5714530Z Aug 10 02:48:19 at org.apache.flink.runtime.executiongraph.failover.flip1.ExecutionFailureHandler.handleFailure(ExecutionFailureHandler.java:139) 2022-08-10T02:48:19.5716211Z Aug 10 02:48:19 at org.apache.flink.runtime.executiongraph.failover.flip1.ExecutionFailureHandler.getFailureHandlingResult(ExecutionFailureHandler.java:83) 2022-08-10T02:48:19.5717627Z Aug 10 02:48:19 at org.apache.flink.runtime.scheduler.DefaultScheduler.recordTaskFailure(DefaultScheduler.java:256) 2022-08-10T02:48:19.5718885Z Aug 10 02:48:19 at org.apache.flink.runtime.scheduler.DefaultScheduler.handleTaskFailure(DefaultScheduler.java:247) 2022-08-10T02:48:19.5720430Z Aug 10 02:48:19 at org.apache.flink.runtime.scheduler.DefaultScheduler.onTaskFailed(DefaultScheduler.java:240) 2022-08-10T02:48:19.5721733Z Aug 10 02:48:19 at org.apache.flink.runtime.scheduler.SchedulerBase.onTaskExecutionStateUpdate(SchedulerBase.java:738) 2022-08-10T02:48:19.5722680Z Aug 10 02:48:19 at org.apache.flink.runtime.scheduler.SchedulerBase.updateTaskExecutionState(SchedulerBase.java:715) 2022-08-10T02:48:19.5723612Z Aug 10 02:48:19 at org.apache.flink.runtime.scheduler.SchedulerNG.updateTaskExecutionState(SchedulerNG.java:78) 2022-08-10T02:48:19.5724389Z Aug 10 02:48:19 at org.apache.flink.runtime.jobmaster.JobMaster.updateTaskExecutionState(JobMaster.java:477) 2022-08-10T02:48:19.5725046Z Aug 10 02:48:19 at sun.reflect.GeneratedMethodAccessor20.invoke(Unknown Source) 2022-08-10T02:48:19.5725708Z Aug 10 02:48:19 at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) 2022-08-10T02:48:19.5726374Z Aug 10 02:48:19 at java.lang.reflect.Method.invoke(Method.java:498) 2022-08-10T02:48:19.5727065Z Aug 10 02:48:19 at org.apache.flink.runtime.rpc.akka.AkkaRpcActor.lambda$handleRpcInvocation$1(AkkaRpcActor.java:309) 2022-08-10T02:48:19.5727932Z Aug 10 02:48:19 at org.apache.flink.runtime.concurrent.akka.ClassLoadingUtils.runWithContextClassLoader(ClassLoadingUtils.java:83) 2022-08-10T02:48:19.5729087Z Aug 10 02:48:19 at org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleRpcInvocation(AkkaRpcActor.java:307) 2022-08-10T02:48:19.5730134Z Aug 10 02:48:19 at org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleRpcMessage(AkkaRpcActor.java:222) 2022-08-10T02:48:19.5731536Z Aug 10 02:48:19 at org.apache.flink.runtime.rpc.akka.FencedAkkaRpcActor.handleRpcMessage(FencedAkkaRpcActor.java:84) 2022-08-10T02:48:19.5732549Z Aug 10 02:48:19 at org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleMessage(AkkaRpcActor.java:168) 2022-08-10T02:48:19.5735018Z Aug 10 02:48:19 at akka.japi.pf.UnitCaseStatement.apply(CaseStatements.scala:24) 2022-08-10T02:48:19.5735821Z Aug 10 02:48:19 at akka.japi.pf.UnitCaseStatement.apply(CaseStatements.scala:20) 2022-08-10T02:48:19.5736465Z Aug 10 02:48:19 at scala.PartialFunction.applyOrElse(PartialFunction.scala:123) 2022-08-10T02:48:19.5737234Z Aug 10 02:48:19 at scala.PartialFunction.applyOrElse$(PartialFunction.scala:122) 2022-08-10T02:48:19.5737895Z Aug 10 02:48:19 at akka.japi.pf.UnitCaseStatement.applyOrElse(CaseStatements.scala:20) 2022-08-10T02:48:19.5738574Z Aug 10 02:48:19 at scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:171) 2022-08-10T02:48:19.5739276Z Aug 10 02:48:19 at scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:172) 2022-08-10T02:48:19.5740315Z Aug 10 02:48:19 at scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:172) 2022-08-10T02:48:19.5741211Z Aug 10 02:48:19 at akka.actor.Actor.aroundReceive(Actor.scala:537) 2022-08-10T02:48:19.5741878Z Aug 10 02:48:19 at akka.actor.Actor.aroundReceive$(Actor.scala:535) 2022-08-10T02:48:19.5742497Z Aug 10 02:48:19 at akka.actor.AbstractActor.aroundReceive(AbstractActor.scala:220) 2022-08-10T02:48:19.5743242Z Aug 10 02:48:19 at akka.actor.ActorCell.receiveMessage(ActorCell.scala:580) 2022-08-10T02:48:19.5743977Z Aug 10 02:48:19 at akka.actor.ActorCell.invoke(ActorCell.scala:548) 2022-08-10T02:48:19.5744851Z Aug 10 02:48:19 at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:270) 2022-08-10T02:48:19.5745438Z Aug 10 02:48:19 at akka.dispatch.Mailbox.run(Mailbox.scala:231) 2022-08-10T02:48:19.5746059Z Aug 10 02:48:19 at akka.dispatch.Mailbox.exec(Mailbox.scala:243) 2022-08-10T02:48:19.5746885Z Aug 10 02:48:19 at java.util.concurrent.ForkJoinTask.doExec(ForkJoinTask.java:289) 2022-08-10T02:48:19.5747571Z Aug 10 02:48:19 at java.util.concurrent.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1056) 2022-08-10T02:48:19.5748236Z Aug 10 02:48:19 at java.util.concurrent.ForkJoinPool.runWorker(ForkJoinPool.java:1692) 2022-08-10T02:48:19.5749314Z Aug 10 02:48:19 at java.util.concurrent.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:175) 2022-08-10T02:48:19.5750188Z Aug 10 02:48:19 Caused by: java.io.IOException: Could not parse external pointer as state base path 2022-08-10T02:48:19.5751178Z Aug 10 02:48:19 at org.apache.flink.runtime.checkpoint.metadata.MetadataV2V3SerializerBase$DeserializationContext.createExclusiveDirPath(MetadataV2V3SerializerBase.java:875) 2022-08-10T02:48:19.5752359Z Aug 10 02:48:19 at org.apache.flink.runtime.checkpoint.metadata.MetadataV2V3SerializerBase$DeserializationContext.getExclusiveDirPath(MetadataV2V3SerializerBase.java:865) 2022-08-10T02:48:19.5753888Z Aug 10 02:48:19 at org.apache.flink.runtime.checkpoint.metadata.MetadataV2V3SerializerBase.deserializeStreamStateHandle(MetadataV2V3SerializerBase.java:727) 2022-08-10T02:48:19.5754918Z Aug 10 02:48:19 at org.apache.flink.runtime.checkpoint.metadata.MetadataV2V3SerializerBase.deserializeOperatorStateHandle(MetadataV2V3SerializerBase.java:630) 2022-08-10T02:48:19.5755929Z Aug 10 02:48:19 at org.apache.flink.runtime.checkpoint.metadata.MetadataV2V3SerializerBase.deserializeSubtaskState(MetadataV2V3SerializerBase.java:279) 2022-08-10T02:48:19.5756887Z Aug 10 02:48:19 at org.apache.flink.runtime.checkpoint.metadata.MetadataV3Serializer.deserializeOperatorState(MetadataV3Serializer.java:184) 2022-08-10T02:48:19.5757815Z Aug 10 02:48:19 at org.apache.flink.runtime.checkpoint.metadata.MetadataV2V3SerializerBase.deserializeMetadata(MetadataV2V3SerializerBase.java:182) 2022-08-10T02:48:19.5758890Z Aug 10 02:48:19 at org.apache.flink.runtime.checkpoint.metadata.MetadataV3Serializer.deserialize(MetadataV3Serializer.java:90) 2022-08-10T02:48:19.5759762Z Aug 10 02:48:19 at org.apache.flink.runtime.checkpoint.metadata.MetadataV4Serializer.deserialize(MetadataV4Serializer.java:49) 2022-08-10T02:48:19.5760584Z Aug 10 02:48:19 at org.apache.flink.runtime.checkpoint.Checkpoints.loadCheckpointMetadata(Checkpoints.java:118) 2022-08-10T02:48:19.5761414Z Aug 10 02:48:19 at org.apache.flink.test.util.TestUtils.loadCheckpointMetadata(TestUtils.java:103) 2022-08-10T02:48:19.5762243Z Aug 10 02:48:19 at org.apache.flink.test.checkpointing.ChangelogRecoveryITCaseBase.getAllStateHandleId(ChangelogRecoveryITCaseBase.java:233) 2022-08-10T02:48:19.5763361Z Aug 10 02:48:19 at org.apache.flink.test.checkpointing.ChangelogRecoverySwitchEnvTestBase$2.lambda$beforeElement$cdc83b0a$1(ChangelogRecoverySwitchEnvTestBase.java:120) 2022-08-10T02:48:19.5764755Z Aug 10 02:48:19 at org.apache.flink.test.checkpointing.ChangelogRecoveryITCaseBase$ControlledSource.waitWhile(ChangelogRecoveryITCaseBase.java:359) 2022-08-10T02:48:19.5765732Z Aug 10 02:48:19 at org.apache.flink.test.checkpointing.ChangelogRecoverySwitchEnvTestBase$2.beforeElement(ChangelogRecoverySwitchEnvTestBase.java:117) 2022-08-10T02:48:19.5766685Z Aug 10 02:48:19 at org.apache.flink.test.checkpointing.ChangelogRecoveryITCaseBase$ControlledSource.run(ChangelogRecoveryITCaseBase.java:342) 2022-08-10T02:48:19.5767503Z Aug 10 02:48:19 at org.apache.flink.streaming.api.operators.StreamSource.run(StreamSource.java:110) 2022-08-10T02:48:19.5768234Z Aug 10 02:48:19 at org.apache.flink.streaming.api.operators.StreamSource.run(StreamSource.java:67) 2022-08-10T02:48:19.5769041Z Aug 10 02:48:19 at org.apache.flink.streaming.runtime.tasks.SourceStreamTask$LegacySourceFunctionThread.run(SourceStreamTask.java:333) 2022-08-10T02:48:19.5771138Z Aug 10 02:48:19 Caused by: java.io.FileNotFoundException: Cannot find meta data file '_metadata' in directory 'file:/tmp/junit8987764363790519654/junit1534389645319798457/0d8e7372aaeb10c57565dae19d6f6ed6/chk-4'. Please try to load the checkpoint/savepoint directly from the metadata file instead of the directory. 2022-08-10T02:48:19.5772448Z Aug 10 02:48:19 at org.apache.flink.runtime.state.filesystem.AbstractFsCheckpointStorageAccess.resolveCheckpointPointer(AbstractFsCheckpointStorageAccess.java:290) 2022-08-10T02:48:19.5773761Z Aug 10 02:48:19 at org.apache.flink.runtime.checkpoint.metadata.MetadataV2V3SerializerBase$DeserializationContext.createExclusiveDirPath(MetadataV2V3SerializerBase.java:872) 2022-08-10T02:48:19.5774662Z Aug 10 02:48:19 ... 18 more {code} https://dev.azure.com/apache-flink/apache-flink/_build/results?buildId=39795&view=logs&j=5c8e7682-d68f-54d1-16a2-a09310218a49&t=86f654fa-ab48-5c1a-25f4-7e7f6afb9bba -- This message was sent by Atlassian Jira (v8.20.10#820010)