Denis Chudov created IGNITE-27215:
-------------------------------------
Summary: Critical system error caused by HandshakeException on
cluster stop
Key: IGNITE-27215
URL: https://issues.apache.org/jira/browse/IGNITE-27215
Project: Ignite
Issue Type: Bug
Reporter: Denis Chudov
Assignee: Denis Chudov
Stack trace:
{code:java}
[2025-11-28T01:43:08,961][ERROR][ForkJoinPool.commonPool-worker-6][FailureManager]
Critical system error detected. Will be handled accordingly to configured
handler [hnd=NoOpFailureHandler [super=AbstractFailureHandler
[ignoredFailureTypes=UnmodifiableSet [SYSTEM_WORKER_BLOCKED,
SYSTEM_CRITICAL_OPERATION_TIMEOUT]]], failureCtx=CRITICAL_ERROR,
failureCtxId=506fde4f-f78c-46f1-847f-857f09b72187]01:43:09 01:43:09
org.apache.ignite.internal.failure.StackTraceCapturingException: Unable to
clean up zones resources01:43:09 at
org.apache.ignite.internal.failure.FailureManager.process(FailureManager.java:184)01:43:09
at
org.apache.ignite.internal.failure.FailureManager.process(FailureManager.java:161)01:43:09
at
org.apache.ignite.internal.partition.replicator.PartitionReplicaLifecycleManager.cleanUpPartitionsResources(PartitionReplicaLifecycleManager.java:1701)01:43:09
at
org.apache.ignite.internal.partition.replicator.PartitionReplicaLifecycleManager.beforeNodeStop(PartitionReplicaLifecycleManager.java:853)01:43:09
at
org.apache.ignite.internal.app.LifecycleManager.initiateAllComponentsStop(LifecycleManager.java:168)01:43:09
at
org.apache.ignite.internal.app.LifecycleManager.stopNode(LifecycleManager.java:148)01:43:09
at
org.apache.ignite.internal.app.IgniteImpl.stopAsync(IgniteImpl.java:2326)01:43:09
at
org.apache.ignite.internal.app.IgniteServerImpl.doShutdownAsync(IgniteServerImpl.java:320)01:43:09
at
org.apache.ignite.internal.app.IgniteServerImpl.lambda$chainRestartOrShutdownAction$5(IgniteServerImpl.java:259)01:43:09
at
java.base/java.util.concurrent.CompletableFuture.uniComposeStage(CompletableFuture.java:1187)01:43:09
at
java.base/java.util.concurrent.CompletableFuture.thenCompose(CompletableFuture.java:2341)01:43:09
at
org.apache.ignite.internal.app.IgniteServerImpl.chainRestartOrShutdownAction(IgniteServerImpl.java:259)01:43:09
at
org.apache.ignite.internal.app.IgniteServerImpl.shutdownAsync(IgniteServerImpl.java:296)01:43:09
at
org.apache.ignite.internal.app.IgniteServerImpl.shutdown(IgniteServerImpl.java:336)01:43:09
at
java.base/java.util.stream.ForEachOps$ForEachOp$OfRef.accept(ForEachOps.java:184)01:43:09
at
java.base/java.util.stream.ReferencePipeline$2$1.accept(ReferencePipeline.java:179)01:43:09
at
java.base/java.util.stream.ReferencePipeline$2$1.accept(ReferencePipeline.java:179)01:43:09
at
java.base/java.util.ArrayList$ArrayListSpliterator.forEachRemaining(ArrayList.java:1708)01:43:09
at
java.base/java.util.stream.AbstractPipeline.copyInto(AbstractPipeline.java:509)01:43:09
at
java.base/java.util.stream.ForEachOps$ForEachTask.compute(ForEachOps.java:291)01:43:09
at
java.base/java.util.concurrent.CountedCompleter.exec(CountedCompleter.java:754)01:43:09
at
java.base/java.util.concurrent.ForkJoinTask.doExec(ForkJoinTask.java:387)01:43:09
at
java.base/java.util.concurrent.ForkJoinPool$WorkQueue.topLevelExec(ForkJoinPool.java:1312)01:43:09
at
java.base/java.util.concurrent.ForkJoinPool.scan(ForkJoinPool.java:1843)01:43:09
at
java.base/java.util.concurrent.ForkJoinPool.runWorker(ForkJoinPool.java:1808)01:43:09
at
java.base/java.util.concurrent.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:188)01:43:09
Caused by: java.util.concurrent.ExecutionException:
java.lang.IllegalStateException:
org.apache.ignite.internal.network.handshake.HandshakeException:
irast_trfdl_3347:4973738b-d432-4d2a-94d5-103c61c7eafb is stale, node should be
restarted so that other nodes can connect01:43:09 at
java.base/java.util.concurrent.CompletableFuture.reportGet(CompletableFuture.java:396)01:43:09
at
java.base/java.util.concurrent.CompletableFuture.get(CompletableFuture.java:2073)01:43:09
at
org.apache.ignite.internal.partition.replicator.PartitionReplicaLifecycleManager.cleanUpPartitionsResources(PartitionReplicaLifecycleManager.java:1699)01:43:09
... 23 more01:43:09 Caused by: java.lang.IllegalStateException:
org.apache.ignite.internal.network.handshake.HandshakeException:
irast_trfdl_3347:4973738b-d432-4d2a-94d5-103c61c7eafb is stale, node should be
restarted so that other nodes can connect01:43:09 at
org.apache.ignite.internal.partition.replicator.raft.snapshot.incoming.IncomingSnapshotCopier.join(IncomingSnapshotCopier.java:245)01:43:09
at
org.apache.ignite.internal.partition.replicator.raft.snapshot.incoming.IncomingSnapshotCopier.cancel(IncomingSnapshotCopier.java:272)01:43:09
at
org.apache.ignite.raft.jraft.storage.snapshot.SnapshotExecutorImpl.interruptDownloadingSnapshots(SnapshotExecutorImpl.java:680)01:43:09
at
org.apache.ignite.raft.jraft.core.NodeImpl.stepDown(NodeImpl.java:1599)01:43:09
at
org.apache.ignite.raft.jraft.core.NodeImpl.shutdown(NodeImpl.java:3364)01:43:09
at
org.apache.ignite.raft.jraft.RaftGroupService.shutdown(RaftGroupService.java:125)01:43:09
at
org.apache.ignite.internal.raft.server.impl.JraftServerImpl.stopRaftNodes(JraftServerImpl.java:595)01:43:09
at
org.apache.ignite.internal.raft.Loza.stopRaftNodes(Loza.java:626)01:43:09
at
org.apache.ignite.internal.replicator.ReplicaManager.lambda$stopReplicaInternal$28(ReplicaManager.java:1011)01:43:09
at
java.base/java.util.concurrent.CompletableFuture$UniApply.tryFire(CompletableFuture.java:646)01:43:09
at
java.base/java.util.concurrent.CompletableFuture$Completion.run(CompletableFuture.java:482)01:43:09
at
java.base/java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:572)01:43:09
at
java.base/java.util.concurrent.FutureTask.run(FutureTask.java:317)01:43:09
at
java.base/java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:304)01:43:09
at
java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1144)01:43:09
at
java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:642)01:43:09
at java.base/java.lang.Thread.run(Thread.java:1583)01:43:09 Caused
by: org.apache.ignite.internal.network.handshake.HandshakeException:
irast_trfdl_3347:4973738b-d432-4d2a-94d5-103c61c7eafb is stale, node should be
restarted so that other nodes can connect01:43:09 at
org.apache.ignite.internal.network.recovery.HandshakeManagerUtils.lambda$sendRejectionMessageAndFailHandshake$0(HandshakeManagerUtils.java:74)01:43:09
at
java.base/java.util.concurrent.CompletableFuture.uniWhenComplete(CompletableFuture.java:863)01:43:09
at
java.base/java.util.concurrent.CompletableFuture$UniWhenComplete.tryFire(CompletableFuture.java:841)01:43:09
at
java.base/java.util.concurrent.CompletableFuture.postComplete(CompletableFuture.java:510)01:43:09
at
java.base/java.util.concurrent.CompletableFuture.complete(CompletableFuture.java:2179)01:43:09
at
org.apache.ignite.internal.network.netty.NettyUtils.lambda$toCompletableFuture$0(NettyUtils.java:62)01:43:09
at
io.netty.util.concurrent.DefaultPromise.notifyListener0(DefaultPromise.java:604)01:43:09
at
io.netty.util.concurrent.DefaultPromise.notifyListenersNow(DefaultPromise.java:571)01:43:09
at
io.netty.util.concurrent.DefaultPromise.notifyListeners(DefaultPromise.java:506)01:43:09
at
io.netty.util.concurrent.DefaultPromise.setValue0(DefaultPromise.java:650)01:43:09
at
io.netty.util.concurrent.DefaultPromise.setSuccess0(DefaultPromise.java:639)01:43:09
at
io.netty.util.concurrent.DefaultPromise.trySuccess(DefaultPromise.java:119)01:43:09
at
io.netty.channel.DefaultChannelPromise.trySuccess(DefaultChannelPromise.java:84)01:43:09
at
io.netty.handler.stream.ChunkedWriteHandler$PendingWrite.success(ChunkedWriteHandler.java:392)01:43:09
at
io.netty.handler.stream.ChunkedWriteHandler.handleEndOfInputFuture(ChunkedWriteHandler.java:348)01:43:09
at
io.netty.handler.stream.ChunkedWriteHandler.access$100(ChunkedWriteHandler.java:70)01:43:09
at
io.netty.handler.stream.ChunkedWriteHandler$2.operationComplete(ChunkedWriteHandler.java:303)01:43:09
at
io.netty.handler.stream.ChunkedWriteHandler$2.operationComplete(ChunkedWriteHandler.java:300)01:43:09
at
io.netty.util.concurrent.DefaultPromise.notifyListener0(DefaultPromise.java:604)01:43:09
at
io.netty.util.concurrent.DefaultPromise.notifyListenersNow(DefaultPromise.java:571)01:43:09
at
io.netty.util.concurrent.DefaultPromise.notifyListeners(DefaultPromise.java:506)01:43:09
at
io.netty.util.concurrent.DefaultPromise.setValue0(DefaultPromise.java:650)01:43:09
at
io.netty.util.concurrent.DefaultPromise.setSuccess0(DefaultPromise.java:639)01:43:09
at
io.netty.util.concurrent.DefaultPromise.trySuccess(DefaultPromise.java:119)01:43:09
at
io.netty.util.internal.PromiseNotificationUtil.trySuccess(PromiseNotificationUtil.java:48)01:43:09
at
io.netty.channel.ChannelOutboundBuffer.safeSuccess(ChannelOutboundBuffer.java:747)01:43:09
at
io.netty.channel.ChannelOutboundBuffer.remove(ChannelOutboundBuffer.java:302)01:43:09
at
io.netty.channel.ChannelOutboundBuffer.removeBytes(ChannelOutboundBuffer.java:382)01:43:09
at
io.netty.channel.socket.nio.NioSocketChannel.doWrite(NioSocketChannel.java:414)01:43:09
at
io.netty.channel.AbstractChannel$AbstractUnsafe.flush0(AbstractChannel.java:802)01:43:09
at
io.netty.channel.nio.AbstractNioChannel$AbstractNioUnsafe.flush0(AbstractNioChannel.java:404)01:43:09
at
io.netty.channel.AbstractChannel$AbstractUnsafe.flush(AbstractChannel.java:766)01:43:09
at
io.netty.channel.DefaultChannelPipeline$HeadContext.flush(DefaultChannelPipeline.java:1391)01:43:09
at
io.netty.channel.AbstractChannelHandlerContext.invokeFlush0(AbstractChannelHandlerContext.java:789)01:43:09
at
io.netty.channel.AbstractChannelHandlerContext.invokeFlush(AbstractChannelHandlerContext.java:775)01:43:09
at
io.netty.channel.AbstractChannelHandlerContext.flush(AbstractChannelHandlerContext.java:761)01:43:09
at
io.netty.handler.flush.FlushConsolidationHandler.flushNow(FlushConsolidationHandler.java:204)01:43:09
at
io.netty.handler.flush.FlushConsolidationHandler.flushIfNeeded(FlushConsolidationHandler.java:197)01:43:09
at
io.netty.handler.flush.FlushConsolidationHandler.resetReadAndFlushIfNeeded(FlushConsolidationHandler.java:192)01:43:09
at
io.netty.handler.flush.FlushConsolidationHandler.channelReadComplete(FlushConsolidationHandler.java:145)01:43:09
at
io.netty.channel.AbstractChannelHandlerContext.fireChannelReadComplete(AbstractChannelHandlerContext.java:384)01:43:09
at
io.netty.channel.DefaultChannelPipeline$HeadContext.channelReadComplete(DefaultChannelPipeline.java:1434)01:43:09
at
io.netty.channel.DefaultChannelPipeline.fireChannelReadComplete(DefaultChannelPipeline.java:932)01:43:09
at
io.netty.channel.nio.AbstractNioByteChannel$NioByteUnsafe.read(AbstractNioByteChannel.java:173)01:43:09
at
io.netty.channel.nio.AbstractNioChannel$AbstractNioUnsafe.handle(AbstractNioChannel.java:445)01:43:09
at
io.netty.channel.nio.NioIoHandler$DefaultNioRegistration.handle(NioIoHandler.java:388)01:43:09
at
io.netty.channel.nio.NioIoHandler.processSelectedKey(NioIoHandler.java:596)01:43:09
at
io.netty.channel.nio.NioIoHandler.processSelectedKeysOptimized(NioIoHandler.java:571)01:43:09
at
io.netty.channel.nio.NioIoHandler.processSelectedKeys(NioIoHandler.java:512)01:43:09
at io.netty.channel.nio.NioIoHandler.run(NioIoHandler.java:484)01:43:09
at
io.netty.channel.SingleThreadIoEventLoop.runIo(SingleThreadIoEventLoop.java:225)01:43:09
at
io.netty.channel.SingleThreadIoEventLoop.run(SingleThreadIoEventLoop.java:196)01:43:09
at
io.netty.util.concurrent.SingleThreadEventExecutor$5.run(SingleThreadEventExecutor.java:1193)01:43:09
at
io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)01:43:09
at
io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30)01:43:09
... 1 more {code}
--
This message was sent by Atlassian Jira
(v8.20.10#820010)