[ https://issues.apache.org/jira/browse/FLINK-34272?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17812379#comment-17812379 ]
Matthias Pohl commented on FLINK-34272: --------------------------------------- I checked the logs for the build in the Jira description. It sounds like a more serious issue where concurrent state transitions are happening. I'm gonna increase priority to blocker. {code:java} 17:21:28,764 [flink-pekko.actor.default-dispatcher-11] WARN org.apache.flink.runtime.minicluster.MiniCluster [] - Error in MiniCluster. Shutting the MiniCluster down. java.lang.IllegalStateException: State transitions must not be triggered while another state transition is in progress. at org.apache.flink.util.Preconditions.checkState(Preconditions.java:193) ~[flink-core-1.19-SNAPSHOT.jar:1.19-SNAPSHOT] at org.apache.flink.runtime.scheduler.adaptive.AdaptiveScheduler.transitionToState(AdaptiveScheduler.java:1376) ~[classes/:?] at org.apache.flink.runtime.scheduler.adaptive.AdaptiveScheduler.goToRestarting(AdaptiveScheduler.java:1068) ~[classes/:?] at org.apache.flink.runtime.scheduler.adaptive.Executing.maybeRescale(Executing.java:178) ~[classes/:?] at org.apache.flink.runtime.scheduler.adaptive.Executing.rescaleWhenCooldownPeriodIsOver(Executing.java:207) ~[classes/:?] at org.apache.flink.runtime.scheduler.adaptive.Executing.<init>(Executing.java:91) ~[classes/:?] at org.apache.flink.runtime.scheduler.adaptive.Executing$Factory.getState(Executing.java:331) ~[classes/:?] at org.apache.flink.runtime.scheduler.adaptive.Executing$Factory.getState(Executing.java:283) ~[classes/:?] at org.apache.flink.runtime.scheduler.adaptive.AdaptiveScheduler.transitionToState(AdaptiveScheduler.java:1394) ~[classes/:?] at org.apache.flink.runtime.scheduler.adaptive.AdaptiveScheduler.goToExecuting(AdaptiveScheduler.java:1019) ~[classes/:?] at org.apache.flink.runtime.scheduler.adaptive.CreatingExecutionGraph.handleExecutionGraphCreation(CreatingExecutionGraph.java:140) ~[classes/:?] at org.apache.flink.runtime.scheduler.adaptive.CreatingExecutionGraph.lambda$null$0(CreatingExecutionGraph.java:81) ~[classes/:?] at org.apache.flink.runtime.scheduler.adaptive.AdaptiveScheduler.runIfState(AdaptiveScheduler.java:1345) ~[classes/:?] at org.apache.flink.runtime.scheduler.adaptive.AdaptiveScheduler.lambda$runIfState$30(AdaptiveScheduler.java:1360) ~[classes/:?] at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) ~[?:1.8.0_292] at java.util.concurrent.FutureTask.run(FutureTask.java:266) ~[?:1.8.0_292] at org.apache.flink.runtime.rpc.pekko.PekkoRpcActor.lambda$handleRunAsync$4(PekkoRpcActor.java:451) ~[flink-rpc-akkabde5885c-cf2c-492a-ad84-375c89ad3c43.jar:1.19-SNAPSHOT] at org.apache.flink.runtime.concurrent.ClassLoadingUtils.runWithContextClassLoader(ClassLoadingUtils.java:68) ~[flink-rpc-core-1.19-SNAPSHOT.jar:1.19-SNAPSHOT] at org.apache.flink.runtime.rpc.pekko.PekkoRpcActor.handleRunAsync(PekkoRpcActor.java:451) ~[flink-rpc-akkabde5885c-cf2c-492a-ad84-375c89ad3c43.jar:1.19-SNAPSHOT] at org.apache.flink.runtime.rpc.pekko.PekkoRpcActor.handleRpcMessage(PekkoRpcActor.java:218) ~[flink-rpc-akkabde5885c-cf2c-492a-ad84-375c89ad3c43.jar:1.19-SNAPSHOT] at org.apache.flink.runtime.rpc.pekko.FencedPekkoRpcActor.handleRpcMessage(FencedPekkoRpcActor.java:85) ~[flink-rpc-akkabde5885c-cf2c-492a-ad84-375c89ad3c43.jar:1.19-SNAPSHOT] at org.apache.flink.runtime.rpc.pekko.PekkoRpcActor.handleMessage(PekkoRpcActor.java:168) ~[flink-rpc-akkabde5885c-cf2c-492a-ad84-375c89ad3c43.jar:1.19-SNAPSHOT] [...]{code} > AdaptiveSchedulerClusterITCase failure due to MiniCluster not running > --------------------------------------------------------------------- > > Key: FLINK-34272 > URL: https://issues.apache.org/jira/browse/FLINK-34272 > Project: Flink > Issue Type: Bug > Components: Runtime / Coordination > Affects Versions: 1.19.0 > Reporter: Matthias Pohl > Priority: Critical > Labels: test-stability > > [https://dev.azure.com/apache-flink/apache-flink/_build/results?buildId=57073&view=logs&j=0da23115-68bb-5dcd-192c-bd4c8adebde1&t=24c3384f-1bcb-57b3-224f-51bf973bbee8&l=9543] > {code:java} > Jan 29 17:21:29 17:21:29.465 [ERROR] Tests run: 3, Failures: 0, Errors: 2, > Skipped: 0, Time elapsed: 12.48 s <<< FAILURE! -- in > org.apache.flink.runtime.scheduler.adaptive.AdaptiveSchedulerClusterITCase > Jan 29 17:21:29 17:21:29.465 [ERROR] > org.apache.flink.runtime.scheduler.adaptive.AdaptiveSchedulerClusterITCase.testAutomaticScaleUp > -- Time elapsed: 8.599 s <<< ERROR! > Jan 29 17:21:29 java.lang.IllegalStateException: MiniCluster is not yet > running or has already been shut down. > Jan 29 17:21:29 at > org.apache.flink.util.Preconditions.checkState(Preconditions.java:193) > Jan 29 17:21:29 at > org.apache.flink.runtime.minicluster.MiniCluster.getDispatcherGatewayFuture(MiniCluster.java:1118) > Jan 29 17:21:29 at > org.apache.flink.runtime.minicluster.MiniCluster.runDispatcherCommand(MiniCluster.java:991) > Jan 29 17:21:29 at > org.apache.flink.runtime.minicluster.MiniCluster.getArchivedExecutionGraph(MiniCluster.java:840) > Jan 29 17:21:29 at > org.apache.flink.runtime.scheduler.adaptive.AdaptiveSchedulerClusterITCase.lambda$waitUntilParallelismForVertexReached$3(AdaptiveSchedulerClusterITCase.java:270) > Jan 29 17:21:29 at > org.apache.flink.runtime.testutils.CommonTestUtils.waitUntilCondition(CommonTestUtils.java:151) > Jan 29 17:21:29 at > org.apache.flink.runtime.testutils.CommonTestUtils.waitUntilCondition(CommonTestUtils.java:145) > Jan 29 17:21:29 at > org.apache.flink.runtime.scheduler.adaptive.AdaptiveSchedulerClusterITCase.waitUntilParallelismForVertexReached(AdaptiveSchedulerClusterITCase.java:265) > Jan 29 17:21:29 at > org.apache.flink.runtime.scheduler.adaptive.AdaptiveSchedulerClusterITCase.testAutomaticScaleUp(AdaptiveSchedulerClusterITCase.java:146) > Jan 29 17:21:29 at java.lang.reflect.Method.invoke(Method.java:498) > Jan 29 17:21:29 at > org.apache.flink.util.TestNameProvider$1.evaluate(TestNameProvider.java:45) > Jan 29 17:21:29 > Jan 29 17:21:29 17:21:29.466 [ERROR] > org.apache.flink.runtime.scheduler.adaptive.AdaptiveSchedulerClusterITCase.testCheckpointStatsPersistedAcrossRescale > -- Time elapsed: 2.036 s <<< ERROR! > Jan 29 17:21:29 java.lang.IllegalStateException: MiniCluster is not yet > running or has already been shut down. > Jan 29 17:21:29 at > org.apache.flink.util.Preconditions.checkState(Preconditions.java:193) > Jan 29 17:21:29 at > org.apache.flink.runtime.minicluster.MiniCluster.getDispatcherGatewayFuture(MiniCluster.java:1118) > Jan 29 17:21:29 at > org.apache.flink.runtime.minicluster.MiniCluster.runDispatcherCommand(MiniCluster.java:991) > Jan 29 17:21:29 at > org.apache.flink.runtime.minicluster.MiniCluster.getExecutionGraph(MiniCluster.java:969) > Jan 29 17:21:29 at > org.apache.flink.runtime.scheduler.adaptive.AdaptiveSchedulerClusterITCase.lambda$testCheckpointStatsPersistedAcrossRescale$1(AdaptiveSchedulerClusterITCase.java:183) > Jan 29 17:21:29 at > org.apache.flink.runtime.testutils.CommonTestUtils.waitUntilCondition(CommonTestUtils.java:151) > Jan 29 17:21:29 at > org.apache.flink.runtime.testutils.CommonTestUtils.waitUntilCondition(CommonTestUtils.java:145) > Jan 29 17:21:29 at > org.apache.flink.runtime.scheduler.adaptive.AdaptiveSchedulerClusterITCase.testCheckpointStatsPersistedAcrossRescale(AdaptiveSchedulerClusterITCase.java:180) > Jan 29 17:21:29 at java.lang.reflect.Method.invoke(Method.java:498) > Jan 29 17:21:29 at > org.apache.flink.util.TestNameProvider$1.evaluate(TestNameProvider.java:45){code} -- This message was sent by Atlassian Jira (v8.20.10#820010)