[ 
https://issues.apache.org/jira/browse/IGNITE-19662?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Aleksandr Polovtcev resolved IGNITE-19662.
------------------------------------------
    Resolution: Not A Bug

> Node from CMG restart failed
> ----------------------------
>
>                 Key: IGNITE-19662
>                 URL: https://issues.apache.org/jira/browse/IGNITE-19662
>             Project: Ignite
>          Issue Type: Bug
>            Reporter: Mikhail Pochatkin
>            Assignee: Aleksandr Polovtcev
>            Priority: Major
>              Labels: ignite-3
>
> Test case: 
>  # Start cluster with 8 nodes where CMG is nodes [0, 1, 2]
>  # Restart one node from CMG (leader or not doesn't matter)
> Exceptation: 
> Node restarted sucessful and joined to cluster 
> Actually:
> Node faild to start after 20 second timeout. Logs 
> {code:java}
> [WARNING][CompletableFutureDelayScheduler][RaftGroupServiceImpl] Recoverable 
> error during the request type=ActionRequestImpl occurred (will be retried on 
> the randomly selected node): 
> java.util.concurrent.CompletionException: 
> java.util.concurrent.TimeoutException
>       at 
> java.base/java.util.concurrent.CompletableFuture.encodeRelay(CompletableFuture.java:367)
>       at 
> java.base/java.util.concurrent.CompletableFuture.completeRelay(CompletableFuture.java:376)
>       at 
> java.base/java.util.concurrent.CompletableFuture$UniRelay.tryFire(CompletableFuture.java:1019)
>       at 
> java.base/java.util.concurrent.CompletableFuture.postComplete(CompletableFuture.java:506)
>       at 
> java.base/java.util.concurrent.CompletableFuture.completeExceptionally(CompletableFuture.java:2088)
>       at 
> java.base/java.util.concurrent.CompletableFuture$Timeout.run(CompletableFuture.java:2792)
>       at 
> java.base/java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:515)
>       at java.base/java.util.concurrent.FutureTask.run(FutureTask.java:264)
>       at 
> java.base/java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:304)
>       at 
> java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
>       at 
> java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
>       at java.base/java.lang.Thread.run(Thread.java:829)
> Caused by: java.util.concurrent.TimeoutException
>       ... 7 more 
> {code}
>  
> {code:java}
> 2023-06-06 11:05:15:972 +0300 
> [WARNING][%iduft_tdwns_2%JRaft-Request-Processor-15][RaftGroupServiceImpl] 
> Recoverable error during the request type=ActionRequestImpl occurred (will be 
> retried on the randomly selected node): 
> java.util.concurrent.CompletionException: java.net.ConnectException: Peer 
> iduft_tdwns_0 is unavailable
>     at 
> java.base/java.util.concurrent.CompletableFuture.encodeThrowable(CompletableFuture.java:331)
>     at 
> java.base/java.util.concurrent.CompletableFuture.uniComposeStage(CompletableFuture.java:1099)
>     at 
> java.base/java.util.concurrent.CompletableFuture.thenCompose(CompletableFuture.java:2235)
>     at 
> org.apache.ignite.internal.raft.RaftGroupServiceImpl.sendWithRetry(RaftGroupServiceImpl.java:520)
>     at 
> org.apache.ignite.internal.raft.RaftGroupServiceImpl.sendWithRetry(RaftGroupServiceImpl.java:487)
>     at 
> org.apache.ignite.internal.raft.RaftGroupServiceImpl.run(RaftGroupServiceImpl.java:454)
>     at 
> org.apache.ignite.internal.cluster.management.raft.CmgRaftService.validatedNodes(CmgRaftService.java:213)
>     at 
> java.base/java.util.concurrent.CompletableFuture.uniComposeStage(CompletableFuture.java:1106)
>     at 
> java.base/java.util.concurrent.CompletableFuture.thenCompose(CompletableFuture.java:2235)
>     at 
> org.apache.ignite.internal.cluster.management.ClusterManagementGroupManager.validatedNodes(ClusterManagementGroupManager.java:827)
>     at 
> org.apache.ignite.internal.cluster.management.topology.LogicalTopologyServiceImpl.validatedNodesOnLeader(LogicalTopologyServiceImpl.java:58)
>     at 
> org.apache.ignite.internal.metastorage.impl.MetaStorageRaftGroupEventsListener.lambda$removeLearner$10(MetaStorageRaftGroupEventsListener.java:194)
>     at 
> org.apache.ignite.internal.metastorage.impl.MetaStorageRaftGroupEventsListener.updateConfigUnderLock(MetaStorageRaftGroupEventsListener.java:237)
>     at 
> org.apache.ignite.internal.metastorage.impl.MetaStorageRaftGroupEventsListener.removeLearner(MetaStorageRaftGroupEventsListener.java:194)
>     at 
> org.apache.ignite.internal.metastorage.impl.MetaStorageRaftGroupEventsListener$1.lambda$onNodeInvalidated$1(MetaStorageRaftGroupEventsListener.java:117)
>     at 
> org.apache.ignite.internal.metastorage.impl.MetaStorageRaftGroupEventsListener.lambda$executeIfLeaderImpl$2(MetaStorageRaftGroupEventsListener.java:170)
>     at 
> org.apache.ignite.internal.metastorage.impl.MetaStorageRaftGroupEventsListener.lambda$executeWithStatus$3(MetaStorageRaftGroupEventsListener.java:179)
>     at 
> java.base/java.util.concurrent.CompletableFuture$UniCompose.tryFire(CompletableFuture.java:1072)
>     at 
> java.base/java.util.concurrent.CompletableFuture.postComplete(CompletableFuture.java:506)
>     at 
> java.base/java.util.concurrent.CompletableFuture.complete(CompletableFuture.java:2073)
>     at 
> org.apache.ignite.internal.raft.RaftGroupServiceImpl.lambda$sendWithRetry$39(RaftGroupServiceImpl.java:539)
>     at 
> java.base/java.util.concurrent.CompletableFuture.uniWhenComplete(CompletableFuture.java:859)
>     at 
> java.base/java.util.concurrent.CompletableFuture$UniWhenComplete.tryFire(CompletableFuture.java:837)
>     at 
> java.base/java.util.concurrent.CompletableFuture.postComplete(CompletableFuture.java:506)
>     at 
> java.base/java.util.concurrent.CompletableFuture.complete(CompletableFuture.java:2073)
>     at 
> org.apache.ignite.network.DefaultMessagingService.onInvokeResponse(DefaultMessagingService.java:371)
>     at 
> org.apache.ignite.network.DefaultMessagingService.send0(DefaultMessagingService.java:194)
>     at 
> org.apache.ignite.network.DefaultMessagingService.respond(DefaultMessagingService.java:137)
>     at 
> org.apache.ignite.network.MessagingService.respond(MessagingService.java:89)
>     at 
> org.apache.ignite.raft.jraft.rpc.impl.IgniteRpcServer$NetworkRpcContext.sendResponse(IgniteRpcServer.java:233)
>     at 
> org.apache.ignite.raft.jraft.rpc.RpcRequestProcessor.handleRequest(RpcRequestProcessor.java:52)
>     at 
> org.apache.ignite.raft.jraft.rpc.RpcRequestProcessor.handleRequest(RpcRequestProcessor.java:29)
>     at 
> org.apache.ignite.raft.jraft.rpc.impl.IgniteRpcServer$RpcMessageHandler.lambda$onReceived$0(IgniteRpcServer.java:192)
>     at 
> java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
>     at 
> java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
>     at java.base/java.lang.Thread.run(Thread.java:829)
> Caused by: java.net.ConnectException: Peer iduft_tdwns_0 is unavailable
>     at 
> org.apache.ignite.internal.raft.RaftGroupServiceImpl.resolvePeer(RaftGroupServiceImpl.java:752)
>     at 
> org.apache.ignite.internal.raft.RaftGroupServiceImpl.sendWithRetry(RaftGroupServiceImpl.java:519)
>     ... 32 more {code}



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

Reply via email to