[ https://issues.apache.org/jira/browse/IGNITE-25194?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17947246#comment-17947246 ]
Ignite TC Bot commented on IGNITE-25194: ---------------------------------------- {panel:title=Branch: [pull/12020/head] Base: [master] : Possible Blockers (1)|borderStyle=dashed|borderColor=#ccc|titleBGColor=#F7D6C1} {color:#d04437}Platform .NET (Windows){color} [[tests 0 TIMEOUT , Exit Code |https://ci2.ignite.apache.org/viewLog.html?buildId=8410820]] {panel} {panel:title=Branch: [pull/12020/head] Base: [master] : New Tests (25)|borderStyle=dashed|borderColor=#ccc|titleBGColor=#D6F7C1} {color:#00008b}PDS (Compatibility){color} [[tests 24|https://ci2.ignite.apache.org/viewLog.html?buildId=8409986]] * {color:#013220}IgniteCompatibilityBasicTestSuite: SnapshotCompatibilityTest.testSnapshotRestore[incrementalSnp=true, consistentID=8d802f6e-bfc2-4024-a680-b5f8b6fea117, oldNodesCnt=1, cacheDump=false, customSnpPath=false, testCacheGrp=false] - PASSED{color} * {color:#013220}IgniteCompatibilityBasicTestSuite: SnapshotCompatibilityTest.testSnapshotRestore[incrementalSnp=true, consistentID=8d802f6e-bfc2-4024-a680-b5f8b6fea117, oldNodesCnt=1, cacheDump=false, customSnpPath=false, testCacheGrp=true] - PASSED{color} * {color:#013220}IgniteCompatibilityBasicTestSuite: SnapshotCompatibilityTest.testSnapshotRestore[incrementalSnp=true, consistentID=8d802f6e-bfc2-4024-a680-b5f8b6fea117, oldNodesCnt=1, cacheDump=false, customSnpPath=true, testCacheGrp=false] - PASSED{color} * {color:#013220}IgniteCompatibilityBasicTestSuite: SnapshotCompatibilityTest.testSnapshotRestore[incrementalSnp=true, consistentID=8d802f6e-bfc2-4024-a680-b5f8b6fea117, oldNodesCnt=1, cacheDump=false, customSnpPath=true, testCacheGrp=true] - PASSED{color} * {color:#013220}IgniteCompatibilityBasicTestSuite: SnapshotCompatibilityTest.testSnapshotRestore[incrementalSnp=true, consistentID=8d802f6e-bfc2-4024-a680-b5f8b6fea117, oldNodesCnt=3, cacheDump=false, customSnpPath=false, testCacheGrp=false] - PASSED{color} * {color:#013220}IgniteCompatibilityBasicTestSuite: SnapshotCompatibilityTest.testSnapshotRestore[incrementalSnp=true, consistentID=8d802f6e-bfc2-4024-a680-b5f8b6fea117, oldNodesCnt=3, cacheDump=false, customSnpPath=false, testCacheGrp=true] - PASSED{color} * {color:#013220}IgniteCompatibilityBasicTestSuite: SnapshotCompatibilityTest.testSnapshotRestore[incrementalSnp=true, consistentID=8d802f6e-bfc2-4024-a680-b5f8b6fea117, oldNodesCnt=3, cacheDump=false, customSnpPath=true, testCacheGrp=false] - PASSED{color} * {color:#013220}IgniteCompatibilityBasicTestSuite: SnapshotCompatibilityTest.testSnapshotRestore[incrementalSnp=true, consistentID=8d802f6e-bfc2-4024-a680-b5f8b6fea117, oldNodesCnt=3, cacheDump=false, customSnpPath=true, testCacheGrp=true] - PASSED{color} * {color:#013220}IgniteCompatibilityBasicTestSuite: SnapshotCompatibilityTest.testSnapshotRestore[incrementalSnp=false, consistentID=8d802f6e-bfc2-4024-a680-b5f8b6fea117, oldNodesCnt=1, cacheDump=true, customSnpPath=false, testCacheGrp=false] - PASSED{color} * {color:#013220}IgniteCompatibilityBasicTestSuite: SnapshotCompatibilityTest.testSnapshotRestore[incrementalSnp=false, consistentID=8d802f6e-bfc2-4024-a680-b5f8b6fea117, oldNodesCnt=1, cacheDump=true, customSnpPath=false, testCacheGrp=true] - PASSED{color} * {color:#013220}IgniteCompatibilityBasicTestSuite: SnapshotCompatibilityTest.testSnapshotRestore[incrementalSnp=false, consistentID=8d802f6e-bfc2-4024-a680-b5f8b6fea117, oldNodesCnt=1, cacheDump=true, customSnpPath=true, testCacheGrp=false] - PASSED{color} ... and 13 new tests {color:#00008b}PDS 8{color} [[tests 1|https://ci2.ignite.apache.org/viewLog.html?buildId=8409985]] * {color:#013220}IgnitePdsTestSuite8: PendingTreeCorruptionTest.testCorruptionOnExpiration - PASSED{color} {panel} [TeamCity *--> Run :: All* Results|https://ci2.ignite.apache.org/viewLog.html?buildId=8410035&buildTypeId=IgniteTests24Java8_RunAll] > Pending entries tree corruption when entry recreated with the same expire time > ------------------------------------------------------------------------------ > > Key: IGNITE-25194 > URL: https://issues.apache.org/jira/browse/IGNITE-25194 > Project: Ignite > Issue Type: Bug > Reporter: Aleksey Plekhanov > Assignee: Aleksey Plekhanov > Priority: Major > Labels: ise > Time Spent: 50m > Remaining Estimate: 0h > > Pending entries tree can be corrupted when the entry is recreated on the same > place with the same expire time. Consider the following situation: > # There is one expired entry in the cache and one corresponding pending tree > entry. > # Pending entry row is deleted by ttl-cleanup-worker and cache entry is > preparing for removal. > # Concurrently another thread deletes the same key and puts to the same > place the same key with the same expire time (already expired, but this is > possible for CDC for example). This operation produces the new row in pending > tree. > # ttl-cleanup-worker continues to remove expired entry, removes cache data, > but skip pending entries cleanup, because expire times and links are equal > (see {{CacheDataStoreImpl#finishRemove}} method) > # Pending tree row refers to not existing link and next iteration over > pending tree leads to node failure due to corrupted tree exception. > We've faced with this problem several times on production clusters (CDC > consumer) with the folowing exception: > {noformat} > Caused by: > org.apache.ignite.internal.processors.cache.persistence.tree.BPlusTreeRuntimeException: > java.lang.AssertionError: 284687612248073 > at > org.apache.ignite.internal.processors.cache.persistence.CacheDataRowAdapter.doInitFromLink(CacheDataRowAdapter.java:340) > at > org.apache.ignite.internal.processors.cache.persistence.CacheDataRowAdapter.initFromLink(CacheDataRowAdapter.java:160) > at > org.apache.ignite.internal.processors.cache.persistence.CacheDataRowAdapter.initFromLink(CacheDataRowAdapter.java:131) > at > org.apache.ignite.internal.processors.cache.persistence.CacheDataRowAdapter.initFromLink(CacheDataRowAdapter.java:118) > at > org.apache.ignite.internal.processors.cache.tree.PendingRow.initKey(PendingRow.java:72) > at > org.apache.ignite.internal.processors.cache.tree.PendingEntriesTree.getRow(PendingEntriesTree.java:140) > at > org.apache.ignite.internal.processors.cache.tree.PendingEntriesTree.getRow(PendingEntriesTree.java:32) > at > org.apache.ignite.internal.processors.cache.persistence.tree.BPlusTree$RemoveRange.removeDataRowFromLeaf(BPlusTree.java:6174) > at > org.apache.ignite.internal.processors.cache.persistence.tree.BPlusTree$RemoveFromLeaf.doRemoveOrLockTail(BPlusTree.java:674) > at > org.apache.ignite.internal.processors.cache.persistence.tree.BPlusTree$RemoveRangeFromLeaf.run0(BPlusTree.java:722) > at > org.apache.ignite.internal.processors.cache.persistence.tree.BPlusTree$RemoveRangeFromLeaf.run0(BPlusTree.java:686) > at > org.apache.ignite.internal.processors.cache.persistence.tree.BPlusTree$GetPageHandler.run(BPlusTree.java:5830) > at > org.apache.ignite.internal.processors.cache.persistence.tree.BPlusTree$GetPageHandler.run(BPlusTree.java:5816) > at > org.apache.ignite.internal.processors.cache.persistence.tree.util.PageHandler.writePage(PageHandler.java:381) > at > org.apache.ignite.internal.processors.cache.persistence.DataStructure.write(DataStructure.java:348) > at > org.apache.ignite.internal.processors.cache.persistence.tree.BPlusTree.access$4900(BPlusTree.java:214) > at > org.apache.ignite.internal.processors.cache.persistence.tree.BPlusTree$Remove.doRemoveFromLeaf(BPlusTree.java:4730) > at > org.apache.ignite.internal.processors.cache.persistence.tree.BPlusTree$Remove.removeFromLeaf(BPlusTree.java:4709) > at > org.apache.ignite.internal.processors.cache.persistence.tree.BPlusTree$Remove.tryRemoveFromLeaf(BPlusTree.java:5161) > at > org.apache.ignite.internal.processors.cache.persistence.tree.BPlusTree.removeDown(BPlusTree.java:2411) > at > org.apache.ignite.internal.processors.cache.persistence.tree.BPlusTree.doRemove(BPlusTree.java:2294) > ... 10 more > {noformat} > Reproducer: > {code:java} > @Test > @WithSystemProperty(key = > IgniteSystemProperties.IGNITE_UNWIND_THROTTLING_TIMEOUT, value = "1000") > public void testCorruptionOnExpiration() throws Exception { > IgniteEx srv = startGrid(); > CountDownLatch expirationStarted = new CountDownLatch(1); > CountDownLatch entryUpdated = new CountDownLatch(1); > IgniteCache<Object, Object> cache = srv.getOrCreateCache(new > CacheConfiguration<>(DEFAULT_CACHE_NAME) > .setAffinity(new > ExpirationWaitingRendezvousAffinityFunction(expirationStarted, > entryUpdated))); > // Warmup to ensure that the next put/remove/put will create row with > the same link. > cache.put(0, 0); > cache.remove(0); > IgniteInternalCache<Object, Object> cachex = > srv.cachex(DEFAULT_CACHE_NAME); > GridCacheVersion ver = new GridCacheVersion(1, 1, 1, 2); > KeyCacheObject key = new KeyCacheObjectImpl(0, null, -1); > CacheObjectImpl val = new CacheObjectImpl(0, null); > GridCacheDrInfo drInfo = new GridCacheDrExpirationInfo(val, ver, 1, > CU.toExpireTime(1000)); > Map<KeyCacheObject, GridCacheDrInfo> map = F.asMap(key, drInfo); > cachex.putAllConflict(map); > // Wait for PendingTree row removal. > assertTrue(expirationStarted.await(10, SECONDS)); > // Remove entry and put entry with the same key, with the same expire > time to the same place (with the same link). > cachex.removeAllConflict(F.asMap(key, ver)); > cachex.putAllConflict(map); > // Resume expiration thread. > entryUpdated.countDown(); > // Wait for entry removal by expiration. > assertTrue(GridTestUtils.waitForCondition(() -> > !cache.containsKey(0), 1_000L)); > // Check pending tree is in consistent state. > CacheGroupContext grp = cachex.context().group(); > PendingEntriesTree pendingTree = > grp.topology().localPartition(0).dataStore().pendingTree(); > int cacheId = CU.cacheId(DEFAULT_CACHE_NAME); > List<PendingRow> rows = pendingTree.remove(new PendingRow(cacheId, > Long.MIN_VALUE, 0), > new PendingRow(cacheId, U.currentTimeMillis(), 0), 1); > assertTrue(rows.isEmpty()); > } > /** */ > @SuppressWarnings("TransientFieldNotInitialized") > private static class ExpirationWaitingRendezvousAffinityFunction extends > RendezvousAffinityFunction { > /** */ > private final transient CountDownLatch expirationStarted; > /** */ > private final transient CountDownLatch entryUpdated; > /** */ > public ExpirationWaitingRendezvousAffinityFunction( > CountDownLatch expirationStarted, > CountDownLatch entryUpdated > ) { > this.expirationStarted = expirationStarted; > this.entryUpdated = entryUpdated; > } > /** {@inheritDoc} */ > @Override public int partition(Object key) { > if > (Thread.currentThread().getName().contains("ttl-cleanup-worker")) { > expirationStarted.countDown(); > // Suspend ttl-cleanup-worker after PendingTree row is > removed, but before the corresponding > // expired row is deleted from cache data tree and row store. > U.awaitQuiet(entryUpdated); > } > return super.partition(key); > } > } > {code} -- This message was sent by Atlassian Jira (v8.20.10#820010)