[ https://issues.apache.org/jira/browse/FLINK-28898?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Xingbo Huang updated FLINK-28898: --------------------------------- Priority: Major (was: Critical) > ChangelogRecoverySwitchStateBackendITCase.testSwitchFromEnablingToDisablingWithRescalingOut > failed > -------------------------------------------------------------------------------------------------- > > Key: FLINK-28898 > URL: https://issues.apache.org/jira/browse/FLINK-28898 > Project: Flink > Issue Type: Bug > Components: Runtime / Checkpointing, Runtime / State Backends > Affects Versions: 1.16.0 > Reporter: Xingbo Huang > Assignee: Hangxiang Yu > Priority: Major > Labels: pull-request-available, test-stability > Fix For: 1.16.0 > > > {code:java} > 2022-08-10T02:48:19.5711924Z Aug 10 02:48:19 [ERROR] > ChangelogRecoverySwitchStateBackendITCase.testSwitchFromEnablingToDisablingWithRescalingOut > Time elapsed: 6.064 s <<< ERROR! > 2022-08-10T02:48:19.5712815Z Aug 10 02:48:19 > org.apache.flink.runtime.JobException: Recovery is suppressed by > FixedDelayRestartBackoffTimeStrategy(maxNumberRestartAttempts=0, > backoffTimeMS=0) > 2022-08-10T02:48:19.5714530Z Aug 10 02:48:19 at > org.apache.flink.runtime.executiongraph.failover.flip1.ExecutionFailureHandler.handleFailure(ExecutionFailureHandler.java:139) > 2022-08-10T02:48:19.5716211Z Aug 10 02:48:19 at > org.apache.flink.runtime.executiongraph.failover.flip1.ExecutionFailureHandler.getFailureHandlingResult(ExecutionFailureHandler.java:83) > 2022-08-10T02:48:19.5717627Z Aug 10 02:48:19 at > org.apache.flink.runtime.scheduler.DefaultScheduler.recordTaskFailure(DefaultScheduler.java:256) > 2022-08-10T02:48:19.5718885Z Aug 10 02:48:19 at > org.apache.flink.runtime.scheduler.DefaultScheduler.handleTaskFailure(DefaultScheduler.java:247) > 2022-08-10T02:48:19.5720430Z Aug 10 02:48:19 at > org.apache.flink.runtime.scheduler.DefaultScheduler.onTaskFailed(DefaultScheduler.java:240) > 2022-08-10T02:48:19.5721733Z Aug 10 02:48:19 at > org.apache.flink.runtime.scheduler.SchedulerBase.onTaskExecutionStateUpdate(SchedulerBase.java:738) > 2022-08-10T02:48:19.5722680Z Aug 10 02:48:19 at > org.apache.flink.runtime.scheduler.SchedulerBase.updateTaskExecutionState(SchedulerBase.java:715) > 2022-08-10T02:48:19.5723612Z Aug 10 02:48:19 at > org.apache.flink.runtime.scheduler.SchedulerNG.updateTaskExecutionState(SchedulerNG.java:78) > 2022-08-10T02:48:19.5724389Z Aug 10 02:48:19 at > org.apache.flink.runtime.jobmaster.JobMaster.updateTaskExecutionState(JobMaster.java:477) > 2022-08-10T02:48:19.5725046Z Aug 10 02:48:19 at > sun.reflect.GeneratedMethodAccessor20.invoke(Unknown Source) > 2022-08-10T02:48:19.5725708Z Aug 10 02:48:19 at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > 2022-08-10T02:48:19.5726374Z Aug 10 02:48:19 at > java.lang.reflect.Method.invoke(Method.java:498) > 2022-08-10T02:48:19.5727065Z Aug 10 02:48:19 at > org.apache.flink.runtime.rpc.akka.AkkaRpcActor.lambda$handleRpcInvocation$1(AkkaRpcActor.java:309) > 2022-08-10T02:48:19.5727932Z Aug 10 02:48:19 at > org.apache.flink.runtime.concurrent.akka.ClassLoadingUtils.runWithContextClassLoader(ClassLoadingUtils.java:83) > 2022-08-10T02:48:19.5729087Z Aug 10 02:48:19 at > org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleRpcInvocation(AkkaRpcActor.java:307) > 2022-08-10T02:48:19.5730134Z Aug 10 02:48:19 at > org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleRpcMessage(AkkaRpcActor.java:222) > 2022-08-10T02:48:19.5731536Z Aug 10 02:48:19 at > org.apache.flink.runtime.rpc.akka.FencedAkkaRpcActor.handleRpcMessage(FencedAkkaRpcActor.java:84) > 2022-08-10T02:48:19.5732549Z Aug 10 02:48:19 at > org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleMessage(AkkaRpcActor.java:168) > 2022-08-10T02:48:19.5735018Z Aug 10 02:48:19 at > akka.japi.pf.UnitCaseStatement.apply(CaseStatements.scala:24) > 2022-08-10T02:48:19.5735821Z Aug 10 02:48:19 at > akka.japi.pf.UnitCaseStatement.apply(CaseStatements.scala:20) > 2022-08-10T02:48:19.5736465Z Aug 10 02:48:19 at > scala.PartialFunction.applyOrElse(PartialFunction.scala:123) > 2022-08-10T02:48:19.5737234Z Aug 10 02:48:19 at > scala.PartialFunction.applyOrElse$(PartialFunction.scala:122) > 2022-08-10T02:48:19.5737895Z Aug 10 02:48:19 at > akka.japi.pf.UnitCaseStatement.applyOrElse(CaseStatements.scala:20) > 2022-08-10T02:48:19.5738574Z Aug 10 02:48:19 at > scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:171) > 2022-08-10T02:48:19.5739276Z Aug 10 02:48:19 at > scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:172) > 2022-08-10T02:48:19.5740315Z Aug 10 02:48:19 at > scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:172) > 2022-08-10T02:48:19.5741211Z Aug 10 02:48:19 at > akka.actor.Actor.aroundReceive(Actor.scala:537) > 2022-08-10T02:48:19.5741878Z Aug 10 02:48:19 at > akka.actor.Actor.aroundReceive$(Actor.scala:535) > 2022-08-10T02:48:19.5742497Z Aug 10 02:48:19 at > akka.actor.AbstractActor.aroundReceive(AbstractActor.scala:220) > 2022-08-10T02:48:19.5743242Z Aug 10 02:48:19 at > akka.actor.ActorCell.receiveMessage(ActorCell.scala:580) > 2022-08-10T02:48:19.5743977Z Aug 10 02:48:19 at > akka.actor.ActorCell.invoke(ActorCell.scala:548) > 2022-08-10T02:48:19.5744851Z Aug 10 02:48:19 at > akka.dispatch.Mailbox.processMailbox(Mailbox.scala:270) > 2022-08-10T02:48:19.5745438Z Aug 10 02:48:19 at > akka.dispatch.Mailbox.run(Mailbox.scala:231) > 2022-08-10T02:48:19.5746059Z Aug 10 02:48:19 at > akka.dispatch.Mailbox.exec(Mailbox.scala:243) > 2022-08-10T02:48:19.5746885Z Aug 10 02:48:19 at > java.util.concurrent.ForkJoinTask.doExec(ForkJoinTask.java:289) > 2022-08-10T02:48:19.5747571Z Aug 10 02:48:19 at > java.util.concurrent.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1056) > 2022-08-10T02:48:19.5748236Z Aug 10 02:48:19 at > java.util.concurrent.ForkJoinPool.runWorker(ForkJoinPool.java:1692) > 2022-08-10T02:48:19.5749314Z Aug 10 02:48:19 at > java.util.concurrent.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:175) > 2022-08-10T02:48:19.5750188Z Aug 10 02:48:19 Caused by: java.io.IOException: > Could not parse external pointer as state base path > 2022-08-10T02:48:19.5751178Z Aug 10 02:48:19 at > org.apache.flink.runtime.checkpoint.metadata.MetadataV2V3SerializerBase$DeserializationContext.createExclusiveDirPath(MetadataV2V3SerializerBase.java:875) > 2022-08-10T02:48:19.5752359Z Aug 10 02:48:19 at > org.apache.flink.runtime.checkpoint.metadata.MetadataV2V3SerializerBase$DeserializationContext.getExclusiveDirPath(MetadataV2V3SerializerBase.java:865) > 2022-08-10T02:48:19.5753888Z Aug 10 02:48:19 at > org.apache.flink.runtime.checkpoint.metadata.MetadataV2V3SerializerBase.deserializeStreamStateHandle(MetadataV2V3SerializerBase.java:727) > 2022-08-10T02:48:19.5754918Z Aug 10 02:48:19 at > org.apache.flink.runtime.checkpoint.metadata.MetadataV2V3SerializerBase.deserializeOperatorStateHandle(MetadataV2V3SerializerBase.java:630) > 2022-08-10T02:48:19.5755929Z Aug 10 02:48:19 at > org.apache.flink.runtime.checkpoint.metadata.MetadataV2V3SerializerBase.deserializeSubtaskState(MetadataV2V3SerializerBase.java:279) > 2022-08-10T02:48:19.5756887Z Aug 10 02:48:19 at > org.apache.flink.runtime.checkpoint.metadata.MetadataV3Serializer.deserializeOperatorState(MetadataV3Serializer.java:184) > 2022-08-10T02:48:19.5757815Z Aug 10 02:48:19 at > org.apache.flink.runtime.checkpoint.metadata.MetadataV2V3SerializerBase.deserializeMetadata(MetadataV2V3SerializerBase.java:182) > 2022-08-10T02:48:19.5758890Z Aug 10 02:48:19 at > org.apache.flink.runtime.checkpoint.metadata.MetadataV3Serializer.deserialize(MetadataV3Serializer.java:90) > 2022-08-10T02:48:19.5759762Z Aug 10 02:48:19 at > org.apache.flink.runtime.checkpoint.metadata.MetadataV4Serializer.deserialize(MetadataV4Serializer.java:49) > 2022-08-10T02:48:19.5760584Z Aug 10 02:48:19 at > org.apache.flink.runtime.checkpoint.Checkpoints.loadCheckpointMetadata(Checkpoints.java:118) > 2022-08-10T02:48:19.5761414Z Aug 10 02:48:19 at > org.apache.flink.test.util.TestUtils.loadCheckpointMetadata(TestUtils.java:103) > 2022-08-10T02:48:19.5762243Z Aug 10 02:48:19 at > org.apache.flink.test.checkpointing.ChangelogRecoveryITCaseBase.getAllStateHandleId(ChangelogRecoveryITCaseBase.java:233) > 2022-08-10T02:48:19.5763361Z Aug 10 02:48:19 at > org.apache.flink.test.checkpointing.ChangelogRecoverySwitchEnvTestBase$2.lambda$beforeElement$cdc83b0a$1(ChangelogRecoverySwitchEnvTestBase.java:120) > 2022-08-10T02:48:19.5764755Z Aug 10 02:48:19 at > org.apache.flink.test.checkpointing.ChangelogRecoveryITCaseBase$ControlledSource.waitWhile(ChangelogRecoveryITCaseBase.java:359) > 2022-08-10T02:48:19.5765732Z Aug 10 02:48:19 at > org.apache.flink.test.checkpointing.ChangelogRecoverySwitchEnvTestBase$2.beforeElement(ChangelogRecoverySwitchEnvTestBase.java:117) > 2022-08-10T02:48:19.5766685Z Aug 10 02:48:19 at > org.apache.flink.test.checkpointing.ChangelogRecoveryITCaseBase$ControlledSource.run(ChangelogRecoveryITCaseBase.java:342) > 2022-08-10T02:48:19.5767503Z Aug 10 02:48:19 at > org.apache.flink.streaming.api.operators.StreamSource.run(StreamSource.java:110) > 2022-08-10T02:48:19.5768234Z Aug 10 02:48:19 at > org.apache.flink.streaming.api.operators.StreamSource.run(StreamSource.java:67) > 2022-08-10T02:48:19.5769041Z Aug 10 02:48:19 at > org.apache.flink.streaming.runtime.tasks.SourceStreamTask$LegacySourceFunctionThread.run(SourceStreamTask.java:333) > 2022-08-10T02:48:19.5771138Z Aug 10 02:48:19 Caused by: > java.io.FileNotFoundException: Cannot find meta data file '_metadata' in > directory > 'file:/tmp/junit8987764363790519654/junit1534389645319798457/0d8e7372aaeb10c57565dae19d6f6ed6/chk-4'. > Please try to load the checkpoint/savepoint directly from the metadata file > instead of the directory. > 2022-08-10T02:48:19.5772448Z Aug 10 02:48:19 at > org.apache.flink.runtime.state.filesystem.AbstractFsCheckpointStorageAccess.resolveCheckpointPointer(AbstractFsCheckpointStorageAccess.java:290) > 2022-08-10T02:48:19.5773761Z Aug 10 02:48:19 at > org.apache.flink.runtime.checkpoint.metadata.MetadataV2V3SerializerBase$DeserializationContext.createExclusiveDirPath(MetadataV2V3SerializerBase.java:872) > 2022-08-10T02:48:19.5774662Z Aug 10 02:48:19 ... 18 more {code} > https://dev.azure.com/apache-flink/apache-flink/_build/results?buildId=39795&view=logs&j=5c8e7682-d68f-54d1-16a2-a09310218a49&t=86f654fa-ab48-5c1a-25f4-7e7f6afb9bba -- This message was sent by Atlassian Jira (v8.20.10#820010)