Anton Kalashnikov created IGNITE-12594:
------------------------------------------

             Summary: Deadlock between GridCacheDataStore#purgeExpiredInternal 
and GridNearTxLocal#enlistWriteEntry
                 Key: IGNITE-12594
                 URL: https://issues.apache.org/jira/browse/IGNITE-12594
             Project: Ignite
          Issue Type: Bug
            Reporter: Anton Kalashnikov
            Assignee: Anton Kalashnikov


The deadlock is reproduced occasionally in PDS3 suite and can be seen in the 
thread dump below.
One thread attempts to unwind evicts, acquires checkpoint read lock and then 
locks {{GridCacheMapEntry}}. Another thread does {{GridCacheMapEntry#unswap}}, 
determines that the entry is expired and acquires checkpoint read lock to 
remove the entry from the store. 
We should not acquire checkpoint read lock inside of a locked 
{{GridCacheMapEntry}}.

{code:java}Thread [name="updater-1", id=29900, state=WAITING, blockCnt=2, 
waitCnt=4450]
    Lock 
[object=java.util.concurrent.locks.ReentrantReadWriteLock$NonfairSync@2fc51685, 
ownerName=null, ownerId=-1]
        at sun.misc.Unsafe.park(Native Method)
        at java.util.concurrent.locks.LockSupport.park(LockSupport.java:175)
        at 
java.util.concurrent.locks.AbstractQueuedSynchronizer.parkAndCheckInterrupt(AbstractQueuedSynchronizer.java:836)
        at 
java.util.concurrent.locks.AbstractQueuedSynchronizer.doAcquireShared(AbstractQueuedSynchronizer.java:967)
        at 
java.util.concurrent.locks.AbstractQueuedSynchronizer.acquireShared(AbstractQueuedSynchronizer.java:1283)
        at 
java.util.concurrent.locks.ReentrantReadWriteLock$ReadLock.lock(ReentrantReadWriteLock.java:727)
        at 
o.a.i.i.processors.cache.persistence.GridCacheDatabaseSharedManager.checkpointReadLock(GridCacheDatabaseSharedManager.java:1632)
   <- CP read lock
        at 
o.a.i.i.processors.cache.GridCacheMapEntry.onExpired(GridCacheMapEntry.java:4081)
        at 
o.a.i.i.processors.cache.GridCacheMapEntry.unswap(GridCacheMapEntry.java:559)
        at 
o.a.i.i.processors.cache.GridCacheMapEntry.unswap(GridCacheMapEntry.java:519)   
                                                   <- locked entry
        at 
o.a.i.i.processors.cache.distributed.near.GridNearTxLocal.enlistWriteEntry(GridNearTxLocal.java:1437)
        at 
o.a.i.i.processors.cache.distributed.near.GridNearTxLocal.enlistWrite(GridNearTxLocal.java:1303)
        at 
o.a.i.i.processors.cache.distributed.near.GridNearTxLocal.putAllAsync0(GridNearTxLocal.java:957)
        at 
o.a.i.i.processors.cache.distributed.near.GridNearTxLocal.putAllAsync(GridNearTxLocal.java:491)
        at 
o.a.i.i.processors.cache.GridCacheAdapter$29.inOp(GridCacheAdapter.java:2526)
        at 
o.a.i.i.processors.cache.GridCacheAdapter$SyncInOp.op(GridCacheAdapter.java:4727)
        at 
o.a.i.i.processors.cache.GridCacheAdapter.syncOp(GridCacheAdapter.java:3740)
        at 
o.a.i.i.processors.cache.GridCacheAdapter.putAll0(GridCacheAdapter.java:2524)
        at 
o.a.i.i.processors.cache.GridCacheAdapter.putAll(GridCacheAdapter.java:2513)
        at 
o.a.i.i.processors.cache.IgniteCacheProxyImpl.putAll(IgniteCacheProxyImpl.java:1264)
        at 
o.a.i.i.processors.cache.GatewayProtectedCacheProxy.putAll(GatewayProtectedCacheProxy.java:863)
        at 
o.a.i.i.processors.cache.persistence.IgnitePdsContinuousRestartTest$1.call(IgnitePdsContinuousRestartTest.java:291)
        at o.a.i.testframework.GridTestThread.run(GridTestThread.java:83)

    Locked synchronizers:
        java.util.concurrent.locks.ReentrantLock$NonfairSync@762613f7


Thread 
[name="sys-stripe-0-#24086%persistence.IgnitePdsContinuousRestartTestWithExpiryPolicy0%",
 id=29617, state=WAITING, blockCnt=2, waitCnt=65381]
    Lock [object=java.util.concurrent.locks.ReentrantLock$NonfairSync@762613f7, 
ownerName=updater-1, ownerId=29900]
        at sun.misc.Unsafe.park(Native Method)
        at java.util.concurrent.locks.LockSupport.park(LockSupport.java:175)
        at 
java.util.concurrent.locks.AbstractQueuedSynchronizer.parkAndCheckInterrupt(AbstractQueuedSynchronizer.java:836)
        at 
java.util.concurrent.locks.AbstractQueuedSynchronizer.acquireQueued(AbstractQueuedSynchronizer.java:870)
        at 
java.util.concurrent.locks.AbstractQueuedSynchronizer.acquire(AbstractQueuedSynchronizer.java:1199)
        at 
java.util.concurrent.locks.ReentrantLock$NonfairSync.lock(ReentrantLock.java:209)
        at 
java.util.concurrent.locks.ReentrantLock.lock(ReentrantLock.java:285)           
                                                            <- lock entry
        at 
o.a.i.i.processors.cache.GridCacheMapEntry.lockEntry(GridCacheMapEntry.java:5017)
        at 
o.a.i.i.processors.cache.GridCacheMapEntry.markObsoleteVersion(GridCacheMapEntry.java:2799)
        at 
o.a.i.i.processors.cache.distributed.dht.topology.GridDhtLocalPartition.removeVersionedEntry(GridDhtLocalPartition.java:392)
        at 
o.a.i.i.processors.cache.distributed.dht.topology.GridDhtLocalPartition.cleanupRemoveQueue(GridDhtLocalPartition.java:416)
        at 
o.a.i.i.processors.cache.distributed.dht.topology.GridDhtLocalPartition.onDeferredDelete(GridDhtLocalPartition.java:441)
        at 
o.a.i.i.processors.cache.distributed.dht.GridDhtCacheAdapter.onDeferredDelete(GridDhtCacheAdapter.java:1696)
        at 
o.a.i.i.processors.cache.GridCacheContext.onDeferredDelete(GridCacheContext.java:1710)
        at 
o.a.i.i.processors.cache.GridCacheMapEntry.onTtlExpired(GridCacheMapEntry.java:4037)
        at 
o.a.i.i.processors.cache.GridCacheTtlManager$1.applyx(GridCacheTtlManager.java:75)
        at 
o.a.i.i.processors.cache.GridCacheTtlManager$1.applyx(GridCacheTtlManager.java:66)
        at o.a.i.i.util.lang.IgniteInClosure2X.apply(IgniteInClosure2X.java:37)
        at 
o.a.i.i.processors.cache.persistence.GridCacheOffheapManager$GridCacheDataStore.purgeExpiredInternal(GridCacheOffheapManager.java:2725)
     <- CP read lock
        at 
o.a.i.i.processors.cache.persistence.GridCacheOffheapManager$GridCacheDataStore.purgeExpired(GridCacheOffheapManager.java:2651)
        at 
o.a.i.i.processors.cache.persistence.GridCacheOffheapManager.expire(GridCacheOffheapManager.java:1047)
        at 
o.a.i.i.processors.cache.GridCacheTtlManager.expire(GridCacheTtlManager.java:242)
        at 
o.a.i.i.processors.cache.GridCacheUtils.unwindEvicts(GridCacheUtils.java:874)
        at 
o.a.i.i.processors.cache.transactions.IgniteTxStateImpl.unwindEvicts(IgniteTxStateImpl.java:106)
        at 
o.a.i.i.processors.cache.GridCacheIoManager.onMessageProcessed(GridCacheIoManager.java:1182)
        at 
o.a.i.i.processors.cache.GridCacheIoManager.processMessage(GridCacheIoManager.java:1161)
        at 
o.a.i.i.processors.cache.GridCacheIoManager.onMessage0(GridCacheIoManager.java:591)
        at 
o.a.i.i.processors.cache.GridCacheIoManager.handleMessage(GridCacheIoManager.java:392)
        at 
o.a.i.i.processors.cache.GridCacheIoManager.handleMessage(GridCacheIoManager.java:318)
        at 
o.a.i.i.processors.cache.GridCacheIoManager.access$100(GridCacheIoManager.java:109)
        at 
o.a.i.i.processors.cache.GridCacheIoManager$1.onMessage(GridCacheIoManager.java:308)
        at 
o.a.i.i.managers.communication.GridIoManager.invokeListener(GridIoManager.java:1607)
        at 
o.a.i.i.managers.communication.GridIoManager.processRegularMessage0(GridIoManager.java:1231)
        at 
o.a.i.i.managers.communication.GridIoManager.access$4300(GridIoManager.java:132)
        at 
o.a.i.i.managers.communication.GridIoManager$8.run(GridIoManager.java:1124)
        at o.a.i.i.util.StripedExecutor$Stripe.body(StripedExecutor.java:559)
        at o.a.i.i.util.worker.GridWorker.run(GridWorker.java:119)
        at java.lang.Thread.run(Thread.java:748){code}



Reproduced by PDS 3  
[https://ggtc.gridgain.com/viewLog.html?buildId=2706284&buildTypeId=Tests_GridGainCeEeUe_Latest_CE_Pds3&tab=buildResultsDiv&branch_Tests_GridGainCeEeUe_Latest_CE=<default>|https://ggtc.gridgain.com/viewLog.html?buildId=2706284&buildTypeId=Tests_GridGainCeEeUe_Latest_CE_Pds3&tab=buildResultsDiv&branch_Tests_GridGainCeEeUe_Latest_CE=%3Cdefault%3E]



--
This message was sent by Atlassian Jira
(v8.3.4#803005)

Reply via email to