Anton Kalashnikov created IGNITE-12594: ------------------------------------------
Summary: Deadlock between GridCacheDataStore#purgeExpiredInternal and GridNearTxLocal#enlistWriteEntry Key: IGNITE-12594 URL: https://issues.apache.org/jira/browse/IGNITE-12594 Project: Ignite Issue Type: Bug Reporter: Anton Kalashnikov Assignee: Anton Kalashnikov The deadlock is reproduced occasionally in PDS3 suite and can be seen in the thread dump below. One thread attempts to unwind evicts, acquires checkpoint read lock and then locks {{GridCacheMapEntry}}. Another thread does {{GridCacheMapEntry#unswap}}, determines that the entry is expired and acquires checkpoint read lock to remove the entry from the store. We should not acquire checkpoint read lock inside of a locked {{GridCacheMapEntry}}. {code:java}Thread [name="updater-1", id=29900, state=WAITING, blockCnt=2, waitCnt=4450] Lock [object=java.util.concurrent.locks.ReentrantReadWriteLock$NonfairSync@2fc51685, ownerName=null, ownerId=-1] at sun.misc.Unsafe.park(Native Method) at java.util.concurrent.locks.LockSupport.park(LockSupport.java:175) at java.util.concurrent.locks.AbstractQueuedSynchronizer.parkAndCheckInterrupt(AbstractQueuedSynchronizer.java:836) at java.util.concurrent.locks.AbstractQueuedSynchronizer.doAcquireShared(AbstractQueuedSynchronizer.java:967) at java.util.concurrent.locks.AbstractQueuedSynchronizer.acquireShared(AbstractQueuedSynchronizer.java:1283) at java.util.concurrent.locks.ReentrantReadWriteLock$ReadLock.lock(ReentrantReadWriteLock.java:727) at o.a.i.i.processors.cache.persistence.GridCacheDatabaseSharedManager.checkpointReadLock(GridCacheDatabaseSharedManager.java:1632) <- CP read lock at o.a.i.i.processors.cache.GridCacheMapEntry.onExpired(GridCacheMapEntry.java:4081) at o.a.i.i.processors.cache.GridCacheMapEntry.unswap(GridCacheMapEntry.java:559) at o.a.i.i.processors.cache.GridCacheMapEntry.unswap(GridCacheMapEntry.java:519) <- locked entry at o.a.i.i.processors.cache.distributed.near.GridNearTxLocal.enlistWriteEntry(GridNearTxLocal.java:1437) at o.a.i.i.processors.cache.distributed.near.GridNearTxLocal.enlistWrite(GridNearTxLocal.java:1303) at o.a.i.i.processors.cache.distributed.near.GridNearTxLocal.putAllAsync0(GridNearTxLocal.java:957) at o.a.i.i.processors.cache.distributed.near.GridNearTxLocal.putAllAsync(GridNearTxLocal.java:491) at o.a.i.i.processors.cache.GridCacheAdapter$29.inOp(GridCacheAdapter.java:2526) at o.a.i.i.processors.cache.GridCacheAdapter$SyncInOp.op(GridCacheAdapter.java:4727) at o.a.i.i.processors.cache.GridCacheAdapter.syncOp(GridCacheAdapter.java:3740) at o.a.i.i.processors.cache.GridCacheAdapter.putAll0(GridCacheAdapter.java:2524) at o.a.i.i.processors.cache.GridCacheAdapter.putAll(GridCacheAdapter.java:2513) at o.a.i.i.processors.cache.IgniteCacheProxyImpl.putAll(IgniteCacheProxyImpl.java:1264) at o.a.i.i.processors.cache.GatewayProtectedCacheProxy.putAll(GatewayProtectedCacheProxy.java:863) at o.a.i.i.processors.cache.persistence.IgnitePdsContinuousRestartTest$1.call(IgnitePdsContinuousRestartTest.java:291) at o.a.i.testframework.GridTestThread.run(GridTestThread.java:83) Locked synchronizers: java.util.concurrent.locks.ReentrantLock$NonfairSync@762613f7 Thread [name="sys-stripe-0-#24086%persistence.IgnitePdsContinuousRestartTestWithExpiryPolicy0%", id=29617, state=WAITING, blockCnt=2, waitCnt=65381] Lock [object=java.util.concurrent.locks.ReentrantLock$NonfairSync@762613f7, ownerName=updater-1, ownerId=29900] at sun.misc.Unsafe.park(Native Method) at java.util.concurrent.locks.LockSupport.park(LockSupport.java:175) at java.util.concurrent.locks.AbstractQueuedSynchronizer.parkAndCheckInterrupt(AbstractQueuedSynchronizer.java:836) at java.util.concurrent.locks.AbstractQueuedSynchronizer.acquireQueued(AbstractQueuedSynchronizer.java:870) at java.util.concurrent.locks.AbstractQueuedSynchronizer.acquire(AbstractQueuedSynchronizer.java:1199) at java.util.concurrent.locks.ReentrantLock$NonfairSync.lock(ReentrantLock.java:209) at java.util.concurrent.locks.ReentrantLock.lock(ReentrantLock.java:285) <- lock entry at o.a.i.i.processors.cache.GridCacheMapEntry.lockEntry(GridCacheMapEntry.java:5017) at o.a.i.i.processors.cache.GridCacheMapEntry.markObsoleteVersion(GridCacheMapEntry.java:2799) at o.a.i.i.processors.cache.distributed.dht.topology.GridDhtLocalPartition.removeVersionedEntry(GridDhtLocalPartition.java:392) at o.a.i.i.processors.cache.distributed.dht.topology.GridDhtLocalPartition.cleanupRemoveQueue(GridDhtLocalPartition.java:416) at o.a.i.i.processors.cache.distributed.dht.topology.GridDhtLocalPartition.onDeferredDelete(GridDhtLocalPartition.java:441) at o.a.i.i.processors.cache.distributed.dht.GridDhtCacheAdapter.onDeferredDelete(GridDhtCacheAdapter.java:1696) at o.a.i.i.processors.cache.GridCacheContext.onDeferredDelete(GridCacheContext.java:1710) at o.a.i.i.processors.cache.GridCacheMapEntry.onTtlExpired(GridCacheMapEntry.java:4037) at o.a.i.i.processors.cache.GridCacheTtlManager$1.applyx(GridCacheTtlManager.java:75) at o.a.i.i.processors.cache.GridCacheTtlManager$1.applyx(GridCacheTtlManager.java:66) at o.a.i.i.util.lang.IgniteInClosure2X.apply(IgniteInClosure2X.java:37) at o.a.i.i.processors.cache.persistence.GridCacheOffheapManager$GridCacheDataStore.purgeExpiredInternal(GridCacheOffheapManager.java:2725) <- CP read lock at o.a.i.i.processors.cache.persistence.GridCacheOffheapManager$GridCacheDataStore.purgeExpired(GridCacheOffheapManager.java:2651) at o.a.i.i.processors.cache.persistence.GridCacheOffheapManager.expire(GridCacheOffheapManager.java:1047) at o.a.i.i.processors.cache.GridCacheTtlManager.expire(GridCacheTtlManager.java:242) at o.a.i.i.processors.cache.GridCacheUtils.unwindEvicts(GridCacheUtils.java:874) at o.a.i.i.processors.cache.transactions.IgniteTxStateImpl.unwindEvicts(IgniteTxStateImpl.java:106) at o.a.i.i.processors.cache.GridCacheIoManager.onMessageProcessed(GridCacheIoManager.java:1182) at o.a.i.i.processors.cache.GridCacheIoManager.processMessage(GridCacheIoManager.java:1161) at o.a.i.i.processors.cache.GridCacheIoManager.onMessage0(GridCacheIoManager.java:591) at o.a.i.i.processors.cache.GridCacheIoManager.handleMessage(GridCacheIoManager.java:392) at o.a.i.i.processors.cache.GridCacheIoManager.handleMessage(GridCacheIoManager.java:318) at o.a.i.i.processors.cache.GridCacheIoManager.access$100(GridCacheIoManager.java:109) at o.a.i.i.processors.cache.GridCacheIoManager$1.onMessage(GridCacheIoManager.java:308) at o.a.i.i.managers.communication.GridIoManager.invokeListener(GridIoManager.java:1607) at o.a.i.i.managers.communication.GridIoManager.processRegularMessage0(GridIoManager.java:1231) at o.a.i.i.managers.communication.GridIoManager.access$4300(GridIoManager.java:132) at o.a.i.i.managers.communication.GridIoManager$8.run(GridIoManager.java:1124) at o.a.i.i.util.StripedExecutor$Stripe.body(StripedExecutor.java:559) at o.a.i.i.util.worker.GridWorker.run(GridWorker.java:119) at java.lang.Thread.run(Thread.java:748){code} Reproduced by PDS 3 [https://ggtc.gridgain.com/viewLog.html?buildId=2706284&buildTypeId=Tests_GridGainCeEeUe_Latest_CE_Pds3&tab=buildResultsDiv&branch_Tests_GridGainCeEeUe_Latest_CE=<default>|https://ggtc.gridgain.com/viewLog.html?buildId=2706284&buildTypeId=Tests_GridGainCeEeUe_Latest_CE_Pds3&tab=buildResultsDiv&branch_Tests_GridGainCeEeUe_Latest_CE=%3Cdefault%3E] -- This message was sent by Atlassian Jira (v8.3.4#803005)