[ 
https://issues.apache.org/jira/browse/FLINK-25981?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17490781#comment-17490781
 ] 

Till Rohrmann commented on FLINK-25981:
---------------------------------------

The new stack trace is

{code}
2022-02-10T13:16:59.3317243Z Feb 10 13:16:59 [ERROR] 
org.apache.flink.runtime.leaderelection.ZooKeeperMultipleComponentLeaderElectionDriverTest.testLeaderElectionWithMultipleDrivers
  Time elapsed: 1,711.883 s  <<< FAILURE!
2022-02-10T13:16:59.3318299Z Feb 10 13:16:59 java.lang.AssertionError: 
2022-02-10T13:16:59.3319424Z Feb 10 13:16:59 
2022-02-10T13:16:59.3319771Z Feb 10 13:16:59 Expected size: 1 but was: 0 in:
2022-02-10T13:16:59.3323259Z Feb 10 13:16:59 []
2022-02-10T13:16:59.3324585Z Feb 10 13:16:59    at 
org.apache.flink.runtime.leaderelection.ZooKeeperMultipleComponentLeaderElectionDriverTest.testLeaderElectionWithMultipleDrivers(ZooKeeperMultipleComponentLeaderElectionDriverTest.java:264)
2022-02-10T13:16:59.3325873Z Feb 10 13:16:59    at 
sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
2022-02-10T13:16:59.3326685Z Feb 10 13:16:59    at 
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
2022-02-10T13:16:59.3327320Z Feb 10 13:16:59    at 
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
2022-02-10T13:16:59.3327890Z Feb 10 13:16:59    at 
java.lang.reflect.Method.invoke(Method.java:498)
2022-02-10T13:16:59.3328509Z Feb 10 13:16:59    at 
org.junit.platform.commons.util.ReflectionUtils.invokeMethod(ReflectionUtils.java:725)
2022-02-10T13:16:59.3329147Z Feb 10 13:16:59    at 
org.junit.jupiter.engine.execution.MethodInvocation.proceed(MethodInvocation.java:60)
2022-02-10T13:16:59.3329864Z Feb 10 13:16:59    at 
org.junit.jupiter.engine.execution.InvocationInterceptorChain$ValidatingInvocation.proceed(InvocationInterceptorChain.java:131)
2022-02-10T13:16:59.3336949Z Feb 10 13:16:59    at 
org.junit.jupiter.engine.extension.TimeoutExtension.intercept(TimeoutExtension.java:149)
2022-02-10T13:16:59.3337933Z Feb 10 13:16:59    at 
org.junit.jupiter.engine.extension.TimeoutExtension.interceptTestableMethod(TimeoutExtension.java:140)
2022-02-10T13:16:59.3338887Z Feb 10 13:16:59    at 
org.junit.jupiter.engine.extension.TimeoutExtension.interceptTestMethod(TimeoutExtension.java:84)
2022-02-10T13:16:59.3339947Z Feb 10 13:16:59    at 
org.junit.jupiter.engine.execution.ExecutableInvoker$ReflectiveInterceptorCall.lambda$ofVoidMethod$0(ExecutableInvoker.java:115)
2022-02-10T13:16:59.3341415Z Feb 10 13:16:59    at 
org.junit.jupiter.engine.execution.ExecutableInvoker.lambda$invoke$0(ExecutableInvoker.java:105)
2022-02-10T13:16:59.3342564Z Feb 10 13:16:59    at 
org.junit.jupiter.engine.execution.InvocationInterceptorChain$InterceptedInvocation.proceed(InvocationInterceptorChain.java:106)
2022-02-10T13:16:59.3343672Z Feb 10 13:16:59    at 
org.junit.jupiter.engine.execution.InvocationInterceptorChain.proceed(InvocationInterceptorChain.java:64)
2022-02-10T13:16:59.3344663Z Feb 10 13:16:59    at 
org.junit.jupiter.engine.execution.InvocationInterceptorChain.chainAndInvoke(InvocationInterceptorChain.java:45)
2022-02-10T13:16:59.3345610Z Feb 10 13:16:59    at 
org.junit.jupiter.engine.execution.InvocationInterceptorChain.invoke(InvocationInterceptorChain.java:37)
2022-02-10T13:16:59.3346328Z Feb 10 13:16:59    at 
org.junit.jupiter.engine.execution.ExecutableInvoker.invoke(ExecutableInvoker.java:104)
2022-02-10T13:16:59.3346995Z Feb 10 13:16:59    at 
org.junit.jupiter.engine.execution.ExecutableInvoker.invoke(ExecutableInvoker.java:98)
2022-02-10T13:16:59.3347721Z Feb 10 13:16:59    at 
org.junit.jupiter.engine.descriptor.TestMethodTestDescriptor.lambda$invokeTestMethod$7(TestMethodTestDescriptor.java:214)
2022-02-10T13:16:59.3348465Z Feb 10 13:16:59    at 
org.junit.platform.engine.support.hierarchical.ThrowableCollector.execute(ThrowableCollector.java:73)
2022-02-10T13:16:59.3349172Z Feb 10 13:16:59    at 
org.junit.jupiter.engine.descriptor.TestMethodTestDescriptor.invokeTestMethod(TestMethodTestDescriptor.java:210)
2022-02-10T13:16:59.3349892Z Feb 10 13:16:59    at 
org.junit.jupiter.engine.descriptor.TestMethodTestDescriptor.execute(TestMethodTestDescriptor.java:135)
2022-02-10T13:16:59.3350599Z Feb 10 13:16:59    at 
org.junit.jupiter.engine.descriptor.TestMethodTestDescriptor.execute(TestMethodTestDescriptor.java:66)
2022-02-10T13:16:59.3351388Z Feb 10 13:16:59    at 
org.junit.platform.engine.support.hierarchical.NodeTestTask.lambda$executeRecursively$6(NodeTestTask.java:151)
2022-02-10T13:16:59.3352468Z Feb 10 13:16:59    at 
org.junit.platform.engine.support.hierarchical.ThrowableCollector.execute(ThrowableCollector.java:73)
2022-02-10T13:16:59.3353204Z Feb 10 13:16:59    at 
org.junit.platform.engine.support.hierarchical.NodeTestTask.lambda$executeRecursively$8(NodeTestTask.java:141)
2022-02-10T13:16:59.3353880Z Feb 10 13:16:59    at 
org.junit.platform.engine.support.hierarchical.Node.around(Node.java:137)
2022-02-10T13:16:59.3354525Z Feb 10 13:16:59    at 
org.junit.platform.engine.support.hierarchical.NodeTestTask.lambda$executeRecursively$9(NodeTestTask.java:139)
2022-02-10T13:16:59.3355252Z Feb 10 13:16:59    at 
org.junit.platform.engine.support.hierarchical.ThrowableCollector.execute(ThrowableCollector.java:73)
2022-02-10T13:16:59.3356005Z Feb 10 13:16:59    at 
org.junit.platform.engine.support.hierarchical.NodeTestTask.executeRecursively(NodeTestTask.java:138)
2022-02-10T13:16:59.3356684Z Feb 10 13:16:59    at 
org.junit.platform.engine.support.hierarchical.NodeTestTask.execute(NodeTestTask.java:95)
2022-02-10T13:16:59.3357247Z Feb 10 13:16:59    at 
java.util.ArrayList.forEach(ArrayList.java:1259)
2022-02-10T13:16:59.3357944Z Feb 10 13:16:59    at 
org.junit.platform.engine.support.hierarchical.SameThreadHierarchicalTestExecutorService.invokeAll(SameThreadHierarchicalTestExecutorService.java:41)
2022-02-10T13:16:59.3358766Z Feb 10 13:16:59    at 
org.junit.platform.engine.support.hierarchical.NodeTestTask.lambda$executeRecursively$6(NodeTestTask.java:155)
2022-02-10T13:16:59.3359473Z Feb 10 13:16:59    at 
org.junit.platform.engine.support.hierarchical.ThrowableCollector.execute(ThrowableCollector.java:73)
2022-02-10T13:16:59.3360165Z Feb 10 13:16:59    at 
org.junit.platform.engine.support.hierarchical.NodeTestTask.lambda$executeRecursively$8(NodeTestTask.java:141)
2022-02-10T13:16:59.3360829Z Feb 10 13:16:59    at 
org.junit.platform.engine.support.hierarchical.Node.around(Node.java:137)
2022-02-10T13:16:59.3361869Z Feb 10 13:16:59    at 
org.junit.platform.engine.support.hierarchical.NodeTestTask.lambda$executeRecursively$9(NodeTestTask.java:139)
2022-02-10T13:16:59.3363045Z Feb 10 13:16:59    at 
org.junit.platform.engine.support.hierarchical.ThrowableCollector.execute(ThrowableCollector.java:73)
2022-02-10T13:16:59.3364056Z Feb 10 13:16:59    at 
org.junit.platform.engine.support.hierarchical.NodeTestTask.executeRecursively(NodeTestTask.java:138)
2022-02-10T13:16:59.3364758Z Feb 10 13:16:59    at 
org.junit.platform.engine.support.hierarchical.NodeTestTask.execute(NodeTestTask.java:95)
2022-02-10T13:16:59.3365503Z Feb 10 13:16:59    at 
java.util.ArrayList.forEach(ArrayList.java:1259)
2022-02-10T13:16:59.3366185Z Feb 10 13:16:59    at 
org.junit.platform.engine.support.hierarchical.SameThreadHierarchicalTestExecutorService.invokeAll(SameThreadHierarchicalTestExecutorService.java:41)
2022-02-10T13:16:59.3367003Z Feb 10 13:16:59    at 
org.junit.platform.engine.support.hierarchical.NodeTestTask.lambda$executeRecursively$6(NodeTestTask.java:155)
2022-02-10T13:16:59.3367726Z Feb 10 13:16:59    at 
org.junit.platform.engine.support.hierarchical.ThrowableCollector.execute(ThrowableCollector.java:73)
2022-02-10T13:16:59.3368443Z Feb 10 13:16:59    at 
org.junit.platform.engine.support.hierarchical.NodeTestTask.lambda$executeRecursively$8(NodeTestTask.java:141)
2022-02-10T13:16:59.3369084Z Feb 10 13:16:59    at 
org.junit.platform.engine.support.hierarchical.Node.around(Node.java:137)
2022-02-10T13:16:59.3369738Z Feb 10 13:16:59    at 
org.junit.platform.engine.support.hierarchical.NodeTestTask.lambda$executeRecursively$9(NodeTestTask.java:139)
2022-02-10T13:16:59.3370449Z Feb 10 13:16:59    at 
org.junit.platform.engine.support.hierarchical.ThrowableCollector.execute(ThrowableCollector.java:73)
2022-02-10T13:16:59.3371197Z Feb 10 13:16:59    at 
org.junit.platform.engine.support.hierarchical.NodeTestTask.executeRecursively(NodeTestTask.java:138)
2022-02-10T13:16:59.3371882Z Feb 10 13:16:59    at 
org.junit.platform.engine.support.hierarchical.NodeTestTask.execute(NodeTestTask.java:95)
2022-02-10T13:16:59.3373023Z Feb 10 13:16:59    at 
org.junit.platform.engine.support.hierarchical.SameThreadHierarchicalTestExecutorService.submit(SameThreadHierarchicalTestExecutorService.java:35)
2022-02-10T13:16:59.3373845Z Feb 10 13:16:59    at 
org.junit.platform.engine.support.hierarchical.HierarchicalTestExecutor.execute(HierarchicalTestExecutor.java:57)
2022-02-10T13:16:59.3374568Z Feb 10 13:16:59    at 
org.junit.platform.engine.support.hierarchical.HierarchicalTestEngine.execute(HierarchicalTestEngine.java:54)
2022-02-10T13:16:59.3375288Z Feb 10 13:16:59    at 
org.junit.platform.launcher.core.EngineExecutionOrchestrator.execute(EngineExecutionOrchestrator.java:107)
2022-02-10T13:16:59.3376002Z Feb 10 13:16:59    at 
org.junit.platform.launcher.core.EngineExecutionOrchestrator.execute(EngineExecutionOrchestrator.java:88)
2022-02-10T13:16:59.3376730Z Feb 10 13:16:59    at 
org.junit.platform.launcher.core.EngineExecutionOrchestrator.lambda$execute$0(EngineExecutionOrchestrator.java:54)
2022-02-10T13:16:59.3377474Z Feb 10 13:16:59    at 
org.junit.platform.launcher.core.EngineExecutionOrchestrator.withInterceptedStreams(EngineExecutionOrchestrator.java:67)
2022-02-10T13:16:59.3378269Z Feb 10 13:16:59    at 
org.junit.platform.launcher.core.EngineExecutionOrchestrator.execute(EngineExecutionOrchestrator.java:52)
2022-02-10T13:16:59.3378941Z Feb 10 13:16:59    at 
org.junit.platform.launcher.core.DefaultLauncher.execute(DefaultLauncher.java:114)
2022-02-10T13:16:59.3379562Z Feb 10 13:16:59    at 
org.junit.platform.launcher.core.DefaultLauncher.execute(DefaultLauncher.java:86)
2022-02-10T13:16:59.3380230Z Feb 10 13:16:59    at 
org.junit.platform.launcher.core.DefaultLauncherSession$DelegatingLauncher.execute(DefaultLauncherSession.java:86)
2022-02-10T13:16:59.3380947Z Feb 10 13:16:59    at 
org.junit.platform.launcher.core.SessionPerRequestLauncher.execute(SessionPerRequestLauncher.java:53)
2022-02-10T13:16:59.3381709Z Feb 10 13:16:59    at 
org.apache.maven.surefire.junitplatform.JUnitPlatformProvider.execute(JUnitPlatformProvider.java:188)
2022-02-10T13:16:59.3382587Z Feb 10 13:16:59    at 
org.apache.maven.surefire.junitplatform.JUnitPlatformProvider.invokeAllTests(JUnitPlatformProvider.java:154)
2022-02-10T13:16:59.3383395Z Feb 10 13:16:59    at 
org.apache.maven.surefire.junitplatform.JUnitPlatformProvider.invoke(JUnitPlatformProvider.java:124)
2022-02-10T13:16:59.3384064Z Feb 10 13:16:59    at 
org.apache.maven.surefire.booter.ForkedBooter.runSuitesInProcess(ForkedBooter.java:428)
2022-02-10T13:16:59.3384688Z Feb 10 13:16:59    at 
org.apache.maven.surefire.booter.ForkedBooter.execute(ForkedBooter.java:162)
2022-02-10T13:16:59.3385266Z Feb 10 13:16:59    at 
org.apache.maven.surefire.booter.ForkedBooter.run(ForkedBooter.java:562)
2022-02-10T13:16:59.3385852Z Feb 10 13:16:59    at 
org.apache.maven.surefire.booter.ForkedBooter.main(ForkedBooter.java:548)
{code}

A potential reason could be that the current leader loses leadership before we 
obtain the {{leaderAndRest}} map. This could be caused by an unstable ZooKeeper 
testing server.

> ZooKeeperMultipleComponentLeaderElectionDriverTest.testLeaderElectionWithMultipleDrivers
>  failed
> -----------------------------------------------------------------------------------------------
>
>                 Key: FLINK-25981
>                 URL: https://issues.apache.org/jira/browse/FLINK-25981
>             Project: Flink
>          Issue Type: Bug
>          Components: Runtime / Coordination
>    Affects Versions: 1.15.0
>            Reporter: Matthias Pohl
>            Priority: Major
>              Labels: test-stability
>
> We experienced a [build 
> failure|https://dev.azure.com/apache-flink/apache-flink/_build/results?buildId=30783&view=logs&j=a57e0635-3fad-5b08-57c7-a4142d7d6fa9&t=2ef0effc-1da1-50e5-c2bd-aab434b1c5b7&l=15997]
>  in 
> {{ZooKeeperMultipleComponentLeaderElectionDriverTest.testLeaderElectionWithMultipleDrivers}}.
>  The test halted when waiting for the next leader in 
> [ZooKeeperMultipleComponentLeaderElectionDriverTest:256|https://github.com/apache/flink/blob/e8742f7f5cac34852d0e621036e1614bbdfe8ec3/flink-runtime/src/test/java/org/apache/flink/runtime/leaderelection/ZooKeeperMultipleComponentLeaderElectionDriverTest.java#L256]
> {code}
> Feb 04 18:02:54 "main" #1 prio=5 os_prio=0 tid=0x00007fab0800b800 nid=0xe07 
> waiting on condition [0x00007fab12574000]
> Feb 04 18:02:54    java.lang.Thread.State: WAITING (parking)
> Feb 04 18:02:54       at sun.misc.Unsafe.park(Native Method)
> Feb 04 18:02:54       - parking to wait for  <0x000000008065c5c8> (a 
> java.util.concurrent.CompletableFuture$Signaller)
> Feb 04 18:02:54       at 
> java.util.concurrent.locks.LockSupport.park(LockSupport.java:175)
> Feb 04 18:02:54       at 
> java.util.concurrent.CompletableFuture$Signaller.block(CompletableFuture.java:1707)
> Feb 04 18:02:54       at 
> java.util.concurrent.ForkJoinPool.managedBlock(ForkJoinPool.java:3323)
> Feb 04 18:02:54       at 
> java.util.concurrent.CompletableFuture.waitingGet(CompletableFuture.java:1742)
> Feb 04 18:02:54       at 
> java.util.concurrent.CompletableFuture.join(CompletableFuture.java:1947)
> Feb 04 18:02:54       at 
> org.apache.flink.runtime.leaderelection.ZooKeeperMultipleComponentLeaderElectionDriverTest.testLeaderElectionWithMultipleDrivers(ZooKeeperMultipleComponentLeaderElectionDriverTest.java:256)
> [...]
> {code}
> The extended Maven logs indicate that the timeout happened while waiting for 
> the second leader to be selected.
> {code}
> Test 
> org.apache.flink.runtime.leaderelection.ZooKeeperMultipleComponentLeaderElectionDriverTest.testLeaderElectionWithMultipleDrivers
>  is running.
> --------------------------------------------------------------------------------
> 17:15:10,437 [           Thread-16] INFO  
> org.apache.curator.test.TestingZooKeeperMain                 [] - Starting 
> server
> 17:15:10,450 [                main] INFO  
> org.apache.flink.runtime.util.ZooKeeperUtils                 [] - Enforcing 
> default ACL for ZK connections
> 17:15:10,451 [                main] INFO  
> org.apache.flink.runtime.util.ZooKeeperUtils                 [] - Using 
> '/flink/default' as Zookeeper namespace.
> 17:15:10,452 [                main] INFO  
> org.apache.flink.shaded.curator5.org.apache.curator.framework.imps.CuratorFrameworkImpl
>  [] - Starting
> 17:15:10,455 [                main] INFO  
> org.apache.flink.shaded.curator5.org.apache.curator.framework.imps.CuratorFrameworkImpl
>  [] - Default schema
> 17:15:10,462 [    main-EventThread] INFO  
> org.apache.flink.shaded.curator5.org.apache.curator.framework.state.ConnectionStateManager
>  [] - State change: CONNECTED
> 17:15:10,467 [    main-EventThread] INFO  
> org.apache.flink.shaded.curator5.org.apache.curator.framework.imps.EnsembleTracker
>  [] - New config event received: {}
> 17:15:10,482 [Curator-ConnectionStateManager-0] DEBUG 
> org.apache.flink.runtime.leaderelection.ZooKeeperMultipleComponentLeaderElectionDriver
>  [] - Connected to ZooKeeper quorum. Leader election can start.
> 17:15:10,483 [Curator-ConnectionStateManager-0] DEBUG 
> org.apache.flink.runtime.leaderelection.ZooKeeperMultipleComponentLeaderElectionDriver
>  [] - Connected to ZooKeeper quorum. Leader election can start.
> 17:15:10,483 [Curator-ConnectionStateManager-0] DEBUG 
> org.apache.flink.runtime.leaderelection.ZooKeeperMultipleComponentLeaderElectionDriver
>  [] - Connected to ZooKeeper quorum. Leader election can start.
> 17:15:10,484 [    main-EventThread] INFO  
> org.apache.flink.shaded.curator5.org.apache.curator.framework.imps.EnsembleTracker
>  [] - New config event received: {}
> 17:15:10,562 [    main-EventThread] DEBUG 
> org.apache.flink.runtime.leaderelection.ZooKeeperMultipleComponentLeaderElectionDriver
>  [] - ZooKeeperMultipleComponentLeaderElectionDriver obtained the leadership.
> 17:15:10,600 [                main] INFO  
> org.apache.flink.runtime.leaderelection.ZooKeeperMultipleComponentLeaderElectionDriver
>  [] - Closing ZooKeeperMultipleComponentLeaderElectionDriver.
> {code}



--
This message was sent by Atlassian Jira
(v8.20.1#820001)

Reply via email to