[ https://issues.apache.org/jira/browse/IGNITE-23958?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Mikhail Petrov updated IGNITE-23958: ------------------------------------ Description: Reproducer: {code:java} /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.ignite; import java.security.Permissions; import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; import org.apache.ignite.client.IgniteClient; import org.apache.ignite.client.IgniteClientFuture; import org.apache.ignite.configuration.CacheConfiguration; import org.apache.ignite.configuration.ClientConfiguration; import org.apache.ignite.configuration.ClientConnectorConfiguration; import org.apache.ignite.configuration.DataRegionConfiguration; import org.apache.ignite.configuration.DataStorageConfiguration; import org.apache.ignite.configuration.IgniteConfiguration; import org.apache.ignite.configuration.ThinClientConfiguration; import org.apache.ignite.internal.IgniteEx; import org.apache.ignite.internal.IgniteInternalFuture; import org.apache.ignite.internal.events.DiscoveryCustomEvent; import org.apache.ignite.internal.management.snapshot.SnapshotCreateCommandArg; import org.apache.ignite.internal.management.snapshot.SnapshotCreateTask; import org.apache.ignite.internal.managers.discovery.DiscoveryCustomMessage; import org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionsExchangeFuture; import org.apache.ignite.internal.processors.cache.distributed.dht.preloader.PartitionsExchangeAware; import org.apache.ignite.internal.processors.cache.persistence.snapshot.SnapshotDiscoveryMessage; import org.apache.ignite.internal.processors.security.impl.TestSecurityData; import org.apache.ignite.internal.processors.security.impl.TestSecurityPluginProvider; import org.apache.ignite.internal.util.typedef.internal.U; import org.apache.ignite.internal.visor.VisorTaskArgument; import org.apache.ignite.internal.visor.VisorTaskResult; import org.apache.ignite.plugin.security.SecurityPermissionSet; import org.apache.ignite.testframework.GridTestUtils; import org.apache.ignite.testframework.junits.common.GridCommonAbstractTest; import org.junit.Test; import static org.apache.ignite.cache.CacheAtomicityMode.TRANSACTIONAL; import static org.apache.ignite.cluster.ClusterState.ACTIVE; import static org.apache.ignite.internal.events.DiscoveryCustomEvent.EVT_DISCOVERY_CUSTOM_EVT; import static org.apache.ignite.plugin.security.SecurityPermission.ADMIN_CLUSTER_STATE; import static org.apache.ignite.plugin.security.SecurityPermission.ADMIN_SNAPSHOT; import static org.apache.ignite.plugin.security.SecurityPermission.CACHE_CREATE; import static org.apache.ignite.plugin.security.SecurityPermission.CACHE_PUT; import static org.apache.ignite.plugin.security.SecurityPermission.CACHE_READ; import static org.apache.ignite.plugin.security.SecurityPermission.JOIN_AS_SERVER; import static org.apache.ignite.plugin.security.SecurityPermissionSetBuilder.create; import static org.apache.ignite.plugin.security.SecurityPermissionSetBuilder.systemPermissions; /** */ public class TransactionsHangingTest extends GridCommonAbstractTest { /** {@inheritDoc} */ @Override protected IgniteConfiguration getConfiguration(String igniteInstanceName) throws Exception { IgniteConfiguration cfg = super.getConfiguration(igniteInstanceName); cfg.setDataStorageConfiguration(new DataStorageConfiguration() .setDefaultDataRegionConfiguration(new DataRegionConfiguration() .setPersistenceEnabled(true))); cfg.setClientConnectorConfiguration(new ClientConnectorConfiguration() .setThreadPoolSize(2) // first thread is occupied by snapshot operation, the second is designated to perform cache operations. .setThinClientConfiguration(new ThinClientConfiguration() .setMaxActiveComputeTasksPerConnection(1))); cfg.setPluginProviders(new TestSecurityPluginProvider( igniteInstanceName, "", create() .defaultAllowAll(false) .appendSystemPermissions(JOIN_AS_SERVER, ADMIN_CLUSTER_STATE) .appendCachePermissions(DEFAULT_CACHE_NAME, CACHE_CREATE) .build(), null, false, userData("admin-snapshot", systemPermissions(ADMIN_SNAPSHOT)), userData("client", create() .defaultAllowAll(false) .appendCachePermissions(DEFAULT_CACHE_NAME, CACHE_READ, CACHE_PUT) .build()) )); return cfg; } /** {@inheritDoc} */ @Override protected void beforeTest() throws Exception { super.beforeTest(); cleanPersistenceDir(); } /** {@inheritDoc} */ @Override protected void afterTest() throws Exception { super.afterTest(); stopAllGrids(); cleanPersistenceDir(); } /** */ @Test public void test() throws Exception { IgniteEx ignite = startGrid(0); ignite.cluster().state(ACTIVE); ignite.createCache(new CacheConfiguration<>().setName(DEFAULT_CACHE_NAME).setAtomicityMode(TRANSACTIONAL)); CountDownLatch snapshotBlockedLatch = new CountDownLatch(1); CountDownLatch snapshotUnblockedLatch = new CountDownLatch(1); ignite.context().cache().context().exchange().registerExchangeAwareComponent(new PartitionsExchangeAware() { /** {@inheritDoc} */ @Override public void onInitAfterTopologyLock(GridDhtPartitionsExchangeFuture fut) { if (fut.firstEvent().type() != EVT_DISCOVERY_CUSTOM_EVT) return; DiscoveryCustomMessage msg = ((DiscoveryCustomEvent)fut.firstEvent()).customMessage(); assertNotNull(msg); if (msg instanceof SnapshotDiscoveryMessage) { snapshotBlockedLatch.countDown(); try { snapshotUnblockedLatch.await(getTestTimeout(), TimeUnit.MILLISECONDS); } catch (InterruptedException e) { throw new IgniteException(e); } } } }); try ( IgniteClient snpAdmin = Ignition.startClient(new ClientConfiguration().setAddresses("127.0.0.1:10800").setUserName("admin-snapshot").setUserPassword("")); IgniteClient cli = Ignition.startClient(new ClientConfiguration().setAddresses("127.0.0.1:10800").setUserName("client").setUserPassword("")) ) { SnapshotCreateCommandArg arg = new SnapshotCreateCommandArg(); arg.snapshotName("test_snapshot"); arg.sync(true); IgniteClientFuture<VisorTaskResult<String>> snpFut = snpAdmin.compute().<VisorTaskArgument<?>, VisorTaskResult<String>>executeAsync2( SnapshotCreateTask.class.getName(), new VisorTaskArgument<>( grid(0).localNode().id(), arg, false) ); assertTrue(snapshotBlockedLatch.await(getTestTimeout(), TimeUnit.MILLISECONDS)); IgniteInternalFuture<Object> putFut0 = GridTestUtils.runAsync(() -> cli.cache(DEFAULT_CACHE_NAME).put(0, 0)); IgniteInternalFuture<Object> putFut1 = GridTestUtils.runAsync(() -> cli.cache(DEFAULT_CACHE_NAME).put(1, 1)); U.sleep(1000); // Wait for tx requests chaining. snapshotUnblockedLatch.countDown(); snpFut.get(getTestTimeout(), TimeUnit.MILLISECONDS); putFut0.get(getTestTimeout()); putFut1.get(getTestTimeout()); // Will hang. } } /** */ private TestSecurityData userData(String login, SecurityPermissionSet perms) { return new TestSecurityData( login, "", perms, new Permissions() ); } } {code} Code execution steps that lead to the hanging: 1. Start of the snapshot operation initiated by "administrator user account" causes PME to start. 2. Thin client sends tx-1, which is blocked until PME is completed. 3. Lets consider that thread-1 was used to handle tx-1 request. After https://issues.apache.org/jira/browse/IGNITE-21183 threads of thin client thread pool are not longer blocked until the end of transactions. Transactions requests are handled asynchronously. See suspend/resume of transacitons. This allows thread-1 to handle another tx request. 4. Thin client sends tx-2 which is handled by the thread-1. 5. Since this thread did not complete the previous tx-1, tx-2 chains itself with tx-1 future and starts waiting its completion. See org/apache/ignite/internal/processors/cache/GridCacheAdapter.java:3856 6. PME caused by the snapshot finishes. tx-1 future is notified to proceed by the PME thread. The crucial thing here is that tx-1 proceeds its execution in PME thread that is associated with the user that started snapshot. By the end of tx-1 the tx-2 starts its execution. Also in thread that is associated with the user that started snapshot. 7. If snapshot administrator user was not granted permissions for cache operations - tx-2 fails. And future chain described in clause 5 becomes broken. It leads to the situation when all new transaction handled by thread-1 will chain itself with previous transaction executed by this thread. But thy will never complete because the future chain is broken. We must fix exceptions handling during tx operations chaining and manually restore security context while executing transactional operation from the future listener. was: Reproducer: {code:java} /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.ignite; import java.security.Permissions; import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; import org.apache.ignite.client.IgniteClient; import org.apache.ignite.client.IgniteClientFuture; import org.apache.ignite.configuration.CacheConfiguration; import org.apache.ignite.configuration.ClientConfiguration; import org.apache.ignite.configuration.ClientConnectorConfiguration; import org.apache.ignite.configuration.DataRegionConfiguration; import org.apache.ignite.configuration.DataStorageConfiguration; import org.apache.ignite.configuration.IgniteConfiguration; import org.apache.ignite.configuration.ThinClientConfiguration; import org.apache.ignite.internal.IgniteEx; import org.apache.ignite.internal.IgniteInternalFuture; import org.apache.ignite.internal.events.DiscoveryCustomEvent; import org.apache.ignite.internal.management.snapshot.SnapshotCreateCommandArg; import org.apache.ignite.internal.management.snapshot.SnapshotCreateTask; import org.apache.ignite.internal.managers.discovery.DiscoveryCustomMessage; import org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionsExchangeFuture; import org.apache.ignite.internal.processors.cache.distributed.dht.preloader.PartitionsExchangeAware; import org.apache.ignite.internal.processors.cache.persistence.snapshot.SnapshotDiscoveryMessage; import org.apache.ignite.internal.processors.security.impl.TestSecurityData; import org.apache.ignite.internal.processors.security.impl.TestSecurityPluginProvider; import org.apache.ignite.internal.util.typedef.internal.U; import org.apache.ignite.internal.visor.VisorTaskArgument; import org.apache.ignite.internal.visor.VisorTaskResult; import org.apache.ignite.plugin.security.SecurityPermissionSet; import org.apache.ignite.testframework.GridTestUtils; import org.apache.ignite.testframework.junits.common.GridCommonAbstractTest; import org.junit.Test; import static org.apache.ignite.cache.CacheAtomicityMode.TRANSACTIONAL; import static org.apache.ignite.cluster.ClusterState.ACTIVE; import static org.apache.ignite.internal.events.DiscoveryCustomEvent.EVT_DISCOVERY_CUSTOM_EVT; import static org.apache.ignite.plugin.security.SecurityPermission.ADMIN_CLUSTER_STATE; import static org.apache.ignite.plugin.security.SecurityPermission.ADMIN_SNAPSHOT; import static org.apache.ignite.plugin.security.SecurityPermission.CACHE_CREATE; import static org.apache.ignite.plugin.security.SecurityPermission.CACHE_PUT; import static org.apache.ignite.plugin.security.SecurityPermission.CACHE_READ; import static org.apache.ignite.plugin.security.SecurityPermission.JOIN_AS_SERVER; import static org.apache.ignite.plugin.security.SecurityPermissionSetBuilder.create; import static org.apache.ignite.plugin.security.SecurityPermissionSetBuilder.systemPermissions; /** */ public class TransactionsHangingTest extends GridCommonAbstractTest { /** {@inheritDoc} */ @Override protected IgniteConfiguration getConfiguration(String igniteInstanceName) throws Exception { IgniteConfiguration cfg = super.getConfiguration(igniteInstanceName); cfg.setDataStorageConfiguration(new DataStorageConfiguration() .setDefaultDataRegionConfiguration(new DataRegionConfiguration() .setPersistenceEnabled(true))); cfg.setClientConnectorConfiguration(new ClientConnectorConfiguration() .setThreadPoolSize(2) // first thread is occupied by snapshot operation, the second is designated to perform cache operations. .setThinClientConfiguration(new ThinClientConfiguration() .setMaxActiveComputeTasksPerConnection(1))); cfg.setPluginProviders(new TestSecurityPluginProvider( igniteInstanceName, "", create() .defaultAllowAll(false) .appendSystemPermissions(JOIN_AS_SERVER, ADMIN_CLUSTER_STATE) .appendCachePermissions(DEFAULT_CACHE_NAME, CACHE_CREATE) .build(), null, false, userData("admin-snapshot", systemPermissions(ADMIN_SNAPSHOT)), userData("client", create() .defaultAllowAll(false) .appendCachePermissions(DEFAULT_CACHE_NAME, CACHE_READ, CACHE_PUT) .build()) )); return cfg; } /** {@inheritDoc} */ @Override protected void beforeTest() throws Exception { super.beforeTest(); cleanPersistenceDir(); } /** {@inheritDoc} */ @Override protected void afterTest() throws Exception { super.afterTest(); stopAllGrids(); cleanPersistenceDir(); } /** */ @Test public void test() throws Exception { IgniteEx ignite = startGrid(0); ignite.cluster().state(ACTIVE); ignite.createCache(new CacheConfiguration<>().setName(DEFAULT_CACHE_NAME).setAtomicityMode(TRANSACTIONAL)); CountDownLatch snapshotBlockedLatch = new CountDownLatch(1); CountDownLatch snapshotUnblockedLatch = new CountDownLatch(1); ignite.context().cache().context().exchange().registerExchangeAwareComponent(new PartitionsExchangeAware() { /** {@inheritDoc} */ @Override public void onInitAfterTopologyLock(GridDhtPartitionsExchangeFuture fut) { if (fut.firstEvent().type() != EVT_DISCOVERY_CUSTOM_EVT) return; DiscoveryCustomMessage msg = ((DiscoveryCustomEvent)fut.firstEvent()).customMessage(); assertNotNull(msg); if (msg instanceof SnapshotDiscoveryMessage) { snapshotBlockedLatch.countDown(); try { snapshotUnblockedLatch.await(getTestTimeout(), TimeUnit.MILLISECONDS); } catch (InterruptedException e) { throw new IgniteException(e); } } } }); try ( IgniteClient snpAdmin = Ignition.startClient(new ClientConfiguration().setAddresses("127.0.0.1:10800").setUserName("admin-snapshot").setUserPassword("")); IgniteClient cli = Ignition.startClient(new ClientConfiguration().setAddresses("127.0.0.1:10800").setUserName("client").setUserPassword("")) ) { SnapshotCreateCommandArg arg = new SnapshotCreateCommandArg(); arg.snapshotName("test_snapshot"); arg.sync(true); IgniteClientFuture<VisorTaskResult<String>> snpFut = snpAdmin.compute().<VisorTaskArgument<?>, VisorTaskResult<String>>executeAsync2( SnapshotCreateTask.class.getName(), new VisorTaskArgument<>( grid(0).localNode().id(), arg, false) ); assertTrue(snapshotBlockedLatch.await(getTestTimeout(), TimeUnit.MILLISECONDS)); IgniteInternalFuture<Object> putFut0 = GridTestUtils.runAsync(() -> cli.cache(DEFAULT_CACHE_NAME).put(0, 0)); IgniteInternalFuture<Object> putFut1 = GridTestUtils.runAsync(() -> cli.cache(DEFAULT_CACHE_NAME).put(1, 1)); U.sleep(1000); // Wait for tx requests chaining. snapshotUnblockedLatch.countDown(); snpFut.get(getTestTimeout(), TimeUnit.MILLISECONDS); putFut0.get(getTestTimeout()); putFut1.get(getTestTimeout()); // Will hang. } } /** */ private TestSecurityData userData(String login, SecurityPermissionSet perms) { return new TestSecurityData( login, "", perms, new Permissions() ); } } {code} > Fix security context propagation for async transactional operations > ------------------------------------------------------------------- > > Key: IGNITE-23958 > URL: https://issues.apache.org/jira/browse/IGNITE-23958 > Project: Ignite > Issue Type: Bug > Reporter: Mikhail Petrov > Assignee: Mikhail Petrov > Priority: Blocker > Labels: ise > Fix For: 2.17 > > Time Spent: 40m > Remaining Estimate: 0h > > Reproducer: > {code:java} > /* > * Licensed to the Apache Software Foundation (ASF) under one or more > * contributor license agreements. See the NOTICE file distributed with > * this work for additional information regarding copyright ownership. > * The ASF licenses this file to You under the Apache License, Version 2.0 > * (the "License"); you may not use this file except in compliance with > * the License. You may obtain a copy of the License at > * > * http://www.apache.org/licenses/LICENSE-2.0 > * > * Unless required by applicable law or agreed to in writing, software > * distributed under the License is distributed on an "AS IS" BASIS, > * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. > * See the License for the specific language governing permissions and > * limitations under the License. > */ > package org.apache.ignite; > import java.security.Permissions; > import java.util.concurrent.CountDownLatch; > import java.util.concurrent.TimeUnit; > import org.apache.ignite.client.IgniteClient; > import org.apache.ignite.client.IgniteClientFuture; > import org.apache.ignite.configuration.CacheConfiguration; > import org.apache.ignite.configuration.ClientConfiguration; > import org.apache.ignite.configuration.ClientConnectorConfiguration; > import org.apache.ignite.configuration.DataRegionConfiguration; > import org.apache.ignite.configuration.DataStorageConfiguration; > import org.apache.ignite.configuration.IgniteConfiguration; > import org.apache.ignite.configuration.ThinClientConfiguration; > import org.apache.ignite.internal.IgniteEx; > import org.apache.ignite.internal.IgniteInternalFuture; > import org.apache.ignite.internal.events.DiscoveryCustomEvent; > import > org.apache.ignite.internal.management.snapshot.SnapshotCreateCommandArg; > import org.apache.ignite.internal.management.snapshot.SnapshotCreateTask; > import org.apache.ignite.internal.managers.discovery.DiscoveryCustomMessage; > import > org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionsExchangeFuture; > import > org.apache.ignite.internal.processors.cache.distributed.dht.preloader.PartitionsExchangeAware; > import > org.apache.ignite.internal.processors.cache.persistence.snapshot.SnapshotDiscoveryMessage; > import org.apache.ignite.internal.processors.security.impl.TestSecurityData; > import > org.apache.ignite.internal.processors.security.impl.TestSecurityPluginProvider; > import org.apache.ignite.internal.util.typedef.internal.U; > import org.apache.ignite.internal.visor.VisorTaskArgument; > import org.apache.ignite.internal.visor.VisorTaskResult; > import org.apache.ignite.plugin.security.SecurityPermissionSet; > import org.apache.ignite.testframework.GridTestUtils; > import org.apache.ignite.testframework.junits.common.GridCommonAbstractTest; > import org.junit.Test; > import static org.apache.ignite.cache.CacheAtomicityMode.TRANSACTIONAL; > import static org.apache.ignite.cluster.ClusterState.ACTIVE; > import static > org.apache.ignite.internal.events.DiscoveryCustomEvent.EVT_DISCOVERY_CUSTOM_EVT; > import static > org.apache.ignite.plugin.security.SecurityPermission.ADMIN_CLUSTER_STATE; > import static > org.apache.ignite.plugin.security.SecurityPermission.ADMIN_SNAPSHOT; > import static > org.apache.ignite.plugin.security.SecurityPermission.CACHE_CREATE; > import static org.apache.ignite.plugin.security.SecurityPermission.CACHE_PUT; > import static org.apache.ignite.plugin.security.SecurityPermission.CACHE_READ; > import static > org.apache.ignite.plugin.security.SecurityPermission.JOIN_AS_SERVER; > import static > org.apache.ignite.plugin.security.SecurityPermissionSetBuilder.create; > import static > org.apache.ignite.plugin.security.SecurityPermissionSetBuilder.systemPermissions; > /** */ > public class TransactionsHangingTest extends GridCommonAbstractTest { > /** {@inheritDoc} */ > @Override protected IgniteConfiguration getConfiguration(String > igniteInstanceName) throws Exception { > IgniteConfiguration cfg = super.getConfiguration(igniteInstanceName); > cfg.setDataStorageConfiguration(new DataStorageConfiguration() > .setDefaultDataRegionConfiguration(new DataRegionConfiguration() > .setPersistenceEnabled(true))); > cfg.setClientConnectorConfiguration(new ClientConnectorConfiguration() > .setThreadPoolSize(2) // first thread is occupied by snapshot > operation, the second is designated to perform cache operations. > .setThinClientConfiguration(new ThinClientConfiguration() > .setMaxActiveComputeTasksPerConnection(1))); > cfg.setPluginProviders(new TestSecurityPluginProvider( > igniteInstanceName, > "", > create() > .defaultAllowAll(false) > .appendSystemPermissions(JOIN_AS_SERVER, ADMIN_CLUSTER_STATE) > .appendCachePermissions(DEFAULT_CACHE_NAME, CACHE_CREATE) > .build(), > null, > false, > userData("admin-snapshot", systemPermissions(ADMIN_SNAPSHOT)), > userData("client", create() > .defaultAllowAll(false) > .appendCachePermissions(DEFAULT_CACHE_NAME, CACHE_READ, > CACHE_PUT) > .build()) > )); > return cfg; > } > /** {@inheritDoc} */ > @Override protected void beforeTest() throws Exception { > super.beforeTest(); > cleanPersistenceDir(); > } > /** {@inheritDoc} */ > @Override protected void afterTest() throws Exception { > super.afterTest(); > stopAllGrids(); > cleanPersistenceDir(); > } > /** */ > @Test > public void test() throws Exception { > IgniteEx ignite = startGrid(0); > ignite.cluster().state(ACTIVE); > ignite.createCache(new > CacheConfiguration<>().setName(DEFAULT_CACHE_NAME).setAtomicityMode(TRANSACTIONAL)); > CountDownLatch snapshotBlockedLatch = new CountDownLatch(1); > CountDownLatch snapshotUnblockedLatch = new CountDownLatch(1); > > ignite.context().cache().context().exchange().registerExchangeAwareComponent(new > PartitionsExchangeAware() { > /** {@inheritDoc} */ > @Override public void > onInitAfterTopologyLock(GridDhtPartitionsExchangeFuture fut) { > if (fut.firstEvent().type() != EVT_DISCOVERY_CUSTOM_EVT) > return; > DiscoveryCustomMessage msg = > ((DiscoveryCustomEvent)fut.firstEvent()).customMessage(); > assertNotNull(msg); > if (msg instanceof SnapshotDiscoveryMessage) { > snapshotBlockedLatch.countDown(); > try { > snapshotUnblockedLatch.await(getTestTimeout(), > TimeUnit.MILLISECONDS); > } > catch (InterruptedException e) { > throw new IgniteException(e); > } > } > } > }); > try ( > IgniteClient snpAdmin = Ignition.startClient(new > ClientConfiguration().setAddresses("127.0.0.1:10800").setUserName("admin-snapshot").setUserPassword("")); > IgniteClient cli = Ignition.startClient(new > ClientConfiguration().setAddresses("127.0.0.1:10800").setUserName("client").setUserPassword("")) > ) { > SnapshotCreateCommandArg arg = new SnapshotCreateCommandArg(); > arg.snapshotName("test_snapshot"); > arg.sync(true); > IgniteClientFuture<VisorTaskResult<String>> snpFut = > snpAdmin.compute().<VisorTaskArgument<?>, > VisorTaskResult<String>>executeAsync2( > SnapshotCreateTask.class.getName(), > new VisorTaskArgument<>( > grid(0).localNode().id(), > arg, > false) > ); > assertTrue(snapshotBlockedLatch.await(getTestTimeout(), > TimeUnit.MILLISECONDS)); > IgniteInternalFuture<Object> putFut0 = GridTestUtils.runAsync(() > -> cli.cache(DEFAULT_CACHE_NAME).put(0, 0)); > IgniteInternalFuture<Object> putFut1 = GridTestUtils.runAsync(() > -> cli.cache(DEFAULT_CACHE_NAME).put(1, 1)); > U.sleep(1000); // Wait for tx requests chaining. > snapshotUnblockedLatch.countDown(); > snpFut.get(getTestTimeout(), TimeUnit.MILLISECONDS); > putFut0.get(getTestTimeout()); > putFut1.get(getTestTimeout()); // Will hang. > } > } > /** */ > private TestSecurityData userData(String login, SecurityPermissionSet > perms) { > return new TestSecurityData( > login, > "", > perms, > new Permissions() > ); > } > } > {code} > Code execution steps that lead to the hanging: > 1. Start of the snapshot operation initiated by "administrator user account" > causes PME to start. > 2. Thin client sends tx-1, which is blocked until PME is completed. > 3. Lets consider that thread-1 was used to handle tx-1 request. After > https://issues.apache.org/jira/browse/IGNITE-21183 threads of thin client > thread pool are not longer blocked until the end of transactions. > Transactions requests are handled asynchronously. See suspend/resume of > transacitons. This allows thread-1 to handle another tx request. > 4. Thin client sends tx-2 which is handled by the thread-1. > 5. Since this thread did not complete the previous tx-1, tx-2 chains itself > with tx-1 future and starts waiting its completion. See > org/apache/ignite/internal/processors/cache/GridCacheAdapter.java:3856 > 6. PME caused by the snapshot finishes. tx-1 future is notified to proceed by > the PME thread. The crucial thing here is that tx-1 proceeds its execution in > PME thread that is associated with the user that started snapshot. By the end > of tx-1 the tx-2 starts its execution. Also in thread that is associated with > the user that started snapshot. > 7. If snapshot administrator user was not granted permissions for cache > operations - tx-2 fails. And future chain described in clause 5 becomes > broken. > It leads to the situation when all new transaction handled by thread-1 will > chain itself with previous transaction executed by this thread. But thy will > never complete because the future chain is broken. > We must fix exceptions handling during tx operations chaining and manually > restore security context while executing transactional operation from the > future listener. -- This message was sent by Atlassian Jira (v8.20.10#820010)