[ 
https://issues.apache.org/jira/browse/IGNITE-23958?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Mikhail Petrov updated IGNITE-23958:
------------------------------------
    Description: 
Reproducer:

{code:java}
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.ignite;

import java.security.Permissions;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.TimeUnit;
import org.apache.ignite.client.IgniteClient;
import org.apache.ignite.client.IgniteClientFuture;
import org.apache.ignite.configuration.CacheConfiguration;
import org.apache.ignite.configuration.ClientConfiguration;
import org.apache.ignite.configuration.ClientConnectorConfiguration;
import org.apache.ignite.configuration.DataRegionConfiguration;
import org.apache.ignite.configuration.DataStorageConfiguration;
import org.apache.ignite.configuration.IgniteConfiguration;
import org.apache.ignite.configuration.ThinClientConfiguration;
import org.apache.ignite.internal.IgniteEx;
import org.apache.ignite.internal.IgniteInternalFuture;
import org.apache.ignite.internal.events.DiscoveryCustomEvent;
import org.apache.ignite.internal.management.snapshot.SnapshotCreateCommandArg;
import org.apache.ignite.internal.management.snapshot.SnapshotCreateTask;
import org.apache.ignite.internal.managers.discovery.DiscoveryCustomMessage;
import 
org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionsExchangeFuture;
import 
org.apache.ignite.internal.processors.cache.distributed.dht.preloader.PartitionsExchangeAware;
import 
org.apache.ignite.internal.processors.cache.persistence.snapshot.SnapshotDiscoveryMessage;
import org.apache.ignite.internal.processors.security.impl.TestSecurityData;
import 
org.apache.ignite.internal.processors.security.impl.TestSecurityPluginProvider;
import org.apache.ignite.internal.util.typedef.internal.U;
import org.apache.ignite.internal.visor.VisorTaskArgument;
import org.apache.ignite.internal.visor.VisorTaskResult;
import org.apache.ignite.plugin.security.SecurityPermissionSet;
import org.apache.ignite.testframework.GridTestUtils;
import org.apache.ignite.testframework.junits.common.GridCommonAbstractTest;
import org.junit.Test;

import static org.apache.ignite.cache.CacheAtomicityMode.TRANSACTIONAL;
import static org.apache.ignite.cluster.ClusterState.ACTIVE;
import static 
org.apache.ignite.internal.events.DiscoveryCustomEvent.EVT_DISCOVERY_CUSTOM_EVT;
import static 
org.apache.ignite.plugin.security.SecurityPermission.ADMIN_CLUSTER_STATE;
import static 
org.apache.ignite.plugin.security.SecurityPermission.ADMIN_SNAPSHOT;
import static org.apache.ignite.plugin.security.SecurityPermission.CACHE_CREATE;
import static org.apache.ignite.plugin.security.SecurityPermission.CACHE_PUT;
import static org.apache.ignite.plugin.security.SecurityPermission.CACHE_READ;
import static 
org.apache.ignite.plugin.security.SecurityPermission.JOIN_AS_SERVER;
import static 
org.apache.ignite.plugin.security.SecurityPermissionSetBuilder.create;
import static 
org.apache.ignite.plugin.security.SecurityPermissionSetBuilder.systemPermissions;

/** */
public class TransactionsHangingTest extends GridCommonAbstractTest {
    /** {@inheritDoc} */
    @Override protected IgniteConfiguration getConfiguration(String 
igniteInstanceName) throws Exception {
        IgniteConfiguration cfg = super.getConfiguration(igniteInstanceName);

        cfg.setDataStorageConfiguration(new DataStorageConfiguration()
            .setDefaultDataRegionConfiguration(new DataRegionConfiguration()
                .setPersistenceEnabled(true)));

        cfg.setClientConnectorConfiguration(new ClientConnectorConfiguration()
            .setThreadPoolSize(2) // first thread is occupied by snapshot 
operation, the second is designated to perform cache operations.
            .setThinClientConfiguration(new ThinClientConfiguration()
                .setMaxActiveComputeTasksPerConnection(1)));

        cfg.setPluginProviders(new TestSecurityPluginProvider(
            igniteInstanceName,
            "",
            create()
                .defaultAllowAll(false)
                .appendSystemPermissions(JOIN_AS_SERVER, ADMIN_CLUSTER_STATE)
                .appendCachePermissions(DEFAULT_CACHE_NAME, CACHE_CREATE)
                .build(),
            null,
            false,
            userData("admin-snapshot", systemPermissions(ADMIN_SNAPSHOT)),
            userData("client", create()
                .defaultAllowAll(false)
                .appendCachePermissions(DEFAULT_CACHE_NAME, CACHE_READ, 
CACHE_PUT)
                .build())
        ));

        return cfg;
    }

    /** {@inheritDoc} */
    @Override protected void beforeTest() throws Exception {
        super.beforeTest();

        cleanPersistenceDir();
    }

    /** {@inheritDoc} */
    @Override protected void afterTest() throws Exception {
        super.afterTest();

        stopAllGrids();

        cleanPersistenceDir();
    }

    /** */
    @Test
    public void test() throws Exception {
        IgniteEx ignite = startGrid(0);

        ignite.cluster().state(ACTIVE);

        ignite.createCache(new 
CacheConfiguration<>().setName(DEFAULT_CACHE_NAME).setAtomicityMode(TRANSACTIONAL));

        CountDownLatch snapshotBlockedLatch = new CountDownLatch(1);
        CountDownLatch snapshotUnblockedLatch = new CountDownLatch(1);

        
ignite.context().cache().context().exchange().registerExchangeAwareComponent(new
 PartitionsExchangeAware() {
            /** {@inheritDoc} */
            @Override public void 
onInitAfterTopologyLock(GridDhtPartitionsExchangeFuture fut) {
                if (fut.firstEvent().type() != EVT_DISCOVERY_CUSTOM_EVT)
                    return;

                DiscoveryCustomMessage msg = 
((DiscoveryCustomEvent)fut.firstEvent()).customMessage();

                assertNotNull(msg);

                if (msg instanceof SnapshotDiscoveryMessage) {
                    snapshotBlockedLatch.countDown();

                    try {
                        snapshotUnblockedLatch.await(getTestTimeout(), 
TimeUnit.MILLISECONDS);
                    }
                    catch (InterruptedException e) {
                        throw new IgniteException(e);
                    }
                }
            }
        });

        try (
            IgniteClient snpAdmin = Ignition.startClient(new 
ClientConfiguration().setAddresses("127.0.0.1:10800").setUserName("admin-snapshot").setUserPassword(""));
            IgniteClient cli = Ignition.startClient(new 
ClientConfiguration().setAddresses("127.0.0.1:10800").setUserName("client").setUserPassword(""))
        ) {
            SnapshotCreateCommandArg arg = new SnapshotCreateCommandArg();

            arg.snapshotName("test_snapshot");
            arg.sync(true);

            IgniteClientFuture<VisorTaskResult<String>> snpFut = 
snpAdmin.compute().<VisorTaskArgument<?>, VisorTaskResult<String>>executeAsync2(
                SnapshotCreateTask.class.getName(),
                new VisorTaskArgument<>(
                    grid(0).localNode().id(),
                    arg,
                    false)
            );

            assertTrue(snapshotBlockedLatch.await(getTestTimeout(), 
TimeUnit.MILLISECONDS));

            IgniteInternalFuture<Object> putFut0 = GridTestUtils.runAsync(() -> 
cli.cache(DEFAULT_CACHE_NAME).put(0, 0));
            IgniteInternalFuture<Object> putFut1 = GridTestUtils.runAsync(() -> 
cli.cache(DEFAULT_CACHE_NAME).put(1, 1));

            U.sleep(1000); // Wait for tx requests chaining.

            snapshotUnblockedLatch.countDown();

            snpFut.get(getTestTimeout(), TimeUnit.MILLISECONDS);
            putFut0.get(getTestTimeout());
            putFut1.get(getTestTimeout()); // Will hang.
        }
    }

    /** */
    private TestSecurityData userData(String login, SecurityPermissionSet 
perms) {
        return new TestSecurityData(
            login,
            "",
            perms,
            new Permissions()
        );
    }
}
{code}

Code execution steps that lead to the hanging:

1. Start of the snapshot operation initiated by "administrator user account" 
causes PME to start.
2. Thin client sends tx-1, which is blocked until PME is completed.
3. Lets consider that thread-1 was used to handle tx-1 request. After 
https://issues.apache.org/jira/browse/IGNITE-21183 threads of thin client 
thread pool are not longer blocked until the end of transactions. Transactions 
requests are handled asynchronously. See suspend/resume of transacitons. This 
allows thread-1 to handle another tx request.
4. Thin client sends tx-2 which is handled by the thread-1.
5. Since this thread did not complete the previous tx-1, tx-2 chains itself 
with tx-1 future and starts waiting its completion. See 
org/apache/ignite/internal/processors/cache/GridCacheAdapter.java:3856
6. PME caused by the snapshot finishes. tx-1 future is notified to proceed by 
the PME thread. The crucial thing here is that tx-1 proceeds its execution in 
PME thread that is associated with the user that started snapshot. By the end 
of tx-1 the tx-2 starts its execution. Also in thread that is associated with 
the user that started snapshot.
7. If snapshot administrator user was not granted permissions for cache 
operations - tx-2 fails. And future chain described in clause 5 becomes broken.

It leads to the situation when all new transaction handled by thread-1 will 
chain itself with previous transaction executed by this thread. But thy will 
never complete because the future chain is broken.

We must fix exceptions handling during tx operations chaining and manually 
restore security context while executing transactional operation from the 
future listener.  


  was:
Reproducer:

{code:java}
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.ignite;

import java.security.Permissions;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.TimeUnit;
import org.apache.ignite.client.IgniteClient;
import org.apache.ignite.client.IgniteClientFuture;
import org.apache.ignite.configuration.CacheConfiguration;
import org.apache.ignite.configuration.ClientConfiguration;
import org.apache.ignite.configuration.ClientConnectorConfiguration;
import org.apache.ignite.configuration.DataRegionConfiguration;
import org.apache.ignite.configuration.DataStorageConfiguration;
import org.apache.ignite.configuration.IgniteConfiguration;
import org.apache.ignite.configuration.ThinClientConfiguration;
import org.apache.ignite.internal.IgniteEx;
import org.apache.ignite.internal.IgniteInternalFuture;
import org.apache.ignite.internal.events.DiscoveryCustomEvent;
import org.apache.ignite.internal.management.snapshot.SnapshotCreateCommandArg;
import org.apache.ignite.internal.management.snapshot.SnapshotCreateTask;
import org.apache.ignite.internal.managers.discovery.DiscoveryCustomMessage;
import 
org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionsExchangeFuture;
import 
org.apache.ignite.internal.processors.cache.distributed.dht.preloader.PartitionsExchangeAware;
import 
org.apache.ignite.internal.processors.cache.persistence.snapshot.SnapshotDiscoveryMessage;
import org.apache.ignite.internal.processors.security.impl.TestSecurityData;
import 
org.apache.ignite.internal.processors.security.impl.TestSecurityPluginProvider;
import org.apache.ignite.internal.util.typedef.internal.U;
import org.apache.ignite.internal.visor.VisorTaskArgument;
import org.apache.ignite.internal.visor.VisorTaskResult;
import org.apache.ignite.plugin.security.SecurityPermissionSet;
import org.apache.ignite.testframework.GridTestUtils;
import org.apache.ignite.testframework.junits.common.GridCommonAbstractTest;
import org.junit.Test;

import static org.apache.ignite.cache.CacheAtomicityMode.TRANSACTIONAL;
import static org.apache.ignite.cluster.ClusterState.ACTIVE;
import static 
org.apache.ignite.internal.events.DiscoveryCustomEvent.EVT_DISCOVERY_CUSTOM_EVT;
import static 
org.apache.ignite.plugin.security.SecurityPermission.ADMIN_CLUSTER_STATE;
import static 
org.apache.ignite.plugin.security.SecurityPermission.ADMIN_SNAPSHOT;
import static org.apache.ignite.plugin.security.SecurityPermission.CACHE_CREATE;
import static org.apache.ignite.plugin.security.SecurityPermission.CACHE_PUT;
import static org.apache.ignite.plugin.security.SecurityPermission.CACHE_READ;
import static 
org.apache.ignite.plugin.security.SecurityPermission.JOIN_AS_SERVER;
import static 
org.apache.ignite.plugin.security.SecurityPermissionSetBuilder.create;
import static 
org.apache.ignite.plugin.security.SecurityPermissionSetBuilder.systemPermissions;

/** */
public class TransactionsHangingTest extends GridCommonAbstractTest {
    /** {@inheritDoc} */
    @Override protected IgniteConfiguration getConfiguration(String 
igniteInstanceName) throws Exception {
        IgniteConfiguration cfg = super.getConfiguration(igniteInstanceName);

        cfg.setDataStorageConfiguration(new DataStorageConfiguration()
            .setDefaultDataRegionConfiguration(new DataRegionConfiguration()
                .setPersistenceEnabled(true)));

        cfg.setClientConnectorConfiguration(new ClientConnectorConfiguration()
            .setThreadPoolSize(2) // first thread is occupied by snapshot 
operation, the second is designated to perform cache operations.
            .setThinClientConfiguration(new ThinClientConfiguration()
                .setMaxActiveComputeTasksPerConnection(1)));

        cfg.setPluginProviders(new TestSecurityPluginProvider(
            igniteInstanceName,
            "",
            create()
                .defaultAllowAll(false)
                .appendSystemPermissions(JOIN_AS_SERVER, ADMIN_CLUSTER_STATE)
                .appendCachePermissions(DEFAULT_CACHE_NAME, CACHE_CREATE)
                .build(),
            null,
            false,
            userData("admin-snapshot", systemPermissions(ADMIN_SNAPSHOT)),
            userData("client", create()
                .defaultAllowAll(false)
                .appendCachePermissions(DEFAULT_CACHE_NAME, CACHE_READ, 
CACHE_PUT)
                .build())
        ));

        return cfg;
    }

    /** {@inheritDoc} */
    @Override protected void beforeTest() throws Exception {
        super.beforeTest();

        cleanPersistenceDir();
    }

    /** {@inheritDoc} */
    @Override protected void afterTest() throws Exception {
        super.afterTest();

        stopAllGrids();

        cleanPersistenceDir();
    }

    /** */
    @Test
    public void test() throws Exception {
        IgniteEx ignite = startGrid(0);

        ignite.cluster().state(ACTIVE);

        ignite.createCache(new 
CacheConfiguration<>().setName(DEFAULT_CACHE_NAME).setAtomicityMode(TRANSACTIONAL));

        CountDownLatch snapshotBlockedLatch = new CountDownLatch(1);
        CountDownLatch snapshotUnblockedLatch = new CountDownLatch(1);

        
ignite.context().cache().context().exchange().registerExchangeAwareComponent(new
 PartitionsExchangeAware() {
            /** {@inheritDoc} */
            @Override public void 
onInitAfterTopologyLock(GridDhtPartitionsExchangeFuture fut) {
                if (fut.firstEvent().type() != EVT_DISCOVERY_CUSTOM_EVT)
                    return;

                DiscoveryCustomMessage msg = 
((DiscoveryCustomEvent)fut.firstEvent()).customMessage();

                assertNotNull(msg);

                if (msg instanceof SnapshotDiscoveryMessage) {
                    snapshotBlockedLatch.countDown();

                    try {
                        snapshotUnblockedLatch.await(getTestTimeout(), 
TimeUnit.MILLISECONDS);
                    }
                    catch (InterruptedException e) {
                        throw new IgniteException(e);
                    }
                }
            }
        });

        try (
            IgniteClient snpAdmin = Ignition.startClient(new 
ClientConfiguration().setAddresses("127.0.0.1:10800").setUserName("admin-snapshot").setUserPassword(""));
            IgniteClient cli = Ignition.startClient(new 
ClientConfiguration().setAddresses("127.0.0.1:10800").setUserName("client").setUserPassword(""))
        ) {
            SnapshotCreateCommandArg arg = new SnapshotCreateCommandArg();

            arg.snapshotName("test_snapshot");
            arg.sync(true);

            IgniteClientFuture<VisorTaskResult<String>> snpFut = 
snpAdmin.compute().<VisorTaskArgument<?>, VisorTaskResult<String>>executeAsync2(
                SnapshotCreateTask.class.getName(),
                new VisorTaskArgument<>(
                    grid(0).localNode().id(),
                    arg,
                    false)
            );

            assertTrue(snapshotBlockedLatch.await(getTestTimeout(), 
TimeUnit.MILLISECONDS));

            IgniteInternalFuture<Object> putFut0 = GridTestUtils.runAsync(() -> 
cli.cache(DEFAULT_CACHE_NAME).put(0, 0));
            IgniteInternalFuture<Object> putFut1 = GridTestUtils.runAsync(() -> 
cli.cache(DEFAULT_CACHE_NAME).put(1, 1));

            U.sleep(1000); // Wait for tx requests chaining.

            snapshotUnblockedLatch.countDown();

            snpFut.get(getTestTimeout(), TimeUnit.MILLISECONDS);
            putFut0.get(getTestTimeout());
            putFut1.get(getTestTimeout()); // Will hang.
        }
    }

    /** */
    private TestSecurityData userData(String login, SecurityPermissionSet 
perms) {
        return new TestSecurityData(
            login,
            "",
            perms,
            new Permissions()
        );
    }
}
{code}



> Fix security context propagation for async transactional operations
> -------------------------------------------------------------------
>
>                 Key: IGNITE-23958
>                 URL: https://issues.apache.org/jira/browse/IGNITE-23958
>             Project: Ignite
>          Issue Type: Bug
>            Reporter: Mikhail Petrov
>            Assignee: Mikhail Petrov
>            Priority: Blocker
>              Labels: ise
>             Fix For: 2.17
>
>          Time Spent: 40m
>  Remaining Estimate: 0h
>
> Reproducer:
> {code:java}
> /*
>  * Licensed to the Apache Software Foundation (ASF) under one or more
>  * contributor license agreements.  See the NOTICE file distributed with
>  * this work for additional information regarding copyright ownership.
>  * The ASF licenses this file to You under the Apache License, Version 2.0
>  * (the "License"); you may not use this file except in compliance with
>  * the License.  You may obtain a copy of the License at
>  *
>  *      http://www.apache.org/licenses/LICENSE-2.0
>  *
>  * Unless required by applicable law or agreed to in writing, software
>  * distributed under the License is distributed on an "AS IS" BASIS,
>  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
>  * See the License for the specific language governing permissions and
>  * limitations under the License.
>  */
> package org.apache.ignite;
> import java.security.Permissions;
> import java.util.concurrent.CountDownLatch;
> import java.util.concurrent.TimeUnit;
> import org.apache.ignite.client.IgniteClient;
> import org.apache.ignite.client.IgniteClientFuture;
> import org.apache.ignite.configuration.CacheConfiguration;
> import org.apache.ignite.configuration.ClientConfiguration;
> import org.apache.ignite.configuration.ClientConnectorConfiguration;
> import org.apache.ignite.configuration.DataRegionConfiguration;
> import org.apache.ignite.configuration.DataStorageConfiguration;
> import org.apache.ignite.configuration.IgniteConfiguration;
> import org.apache.ignite.configuration.ThinClientConfiguration;
> import org.apache.ignite.internal.IgniteEx;
> import org.apache.ignite.internal.IgniteInternalFuture;
> import org.apache.ignite.internal.events.DiscoveryCustomEvent;
> import 
> org.apache.ignite.internal.management.snapshot.SnapshotCreateCommandArg;
> import org.apache.ignite.internal.management.snapshot.SnapshotCreateTask;
> import org.apache.ignite.internal.managers.discovery.DiscoveryCustomMessage;
> import 
> org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionsExchangeFuture;
> import 
> org.apache.ignite.internal.processors.cache.distributed.dht.preloader.PartitionsExchangeAware;
> import 
> org.apache.ignite.internal.processors.cache.persistence.snapshot.SnapshotDiscoveryMessage;
> import org.apache.ignite.internal.processors.security.impl.TestSecurityData;
> import 
> org.apache.ignite.internal.processors.security.impl.TestSecurityPluginProvider;
> import org.apache.ignite.internal.util.typedef.internal.U;
> import org.apache.ignite.internal.visor.VisorTaskArgument;
> import org.apache.ignite.internal.visor.VisorTaskResult;
> import org.apache.ignite.plugin.security.SecurityPermissionSet;
> import org.apache.ignite.testframework.GridTestUtils;
> import org.apache.ignite.testframework.junits.common.GridCommonAbstractTest;
> import org.junit.Test;
> import static org.apache.ignite.cache.CacheAtomicityMode.TRANSACTIONAL;
> import static org.apache.ignite.cluster.ClusterState.ACTIVE;
> import static 
> org.apache.ignite.internal.events.DiscoveryCustomEvent.EVT_DISCOVERY_CUSTOM_EVT;
> import static 
> org.apache.ignite.plugin.security.SecurityPermission.ADMIN_CLUSTER_STATE;
> import static 
> org.apache.ignite.plugin.security.SecurityPermission.ADMIN_SNAPSHOT;
> import static 
> org.apache.ignite.plugin.security.SecurityPermission.CACHE_CREATE;
> import static org.apache.ignite.plugin.security.SecurityPermission.CACHE_PUT;
> import static org.apache.ignite.plugin.security.SecurityPermission.CACHE_READ;
> import static 
> org.apache.ignite.plugin.security.SecurityPermission.JOIN_AS_SERVER;
> import static 
> org.apache.ignite.plugin.security.SecurityPermissionSetBuilder.create;
> import static 
> org.apache.ignite.plugin.security.SecurityPermissionSetBuilder.systemPermissions;
> /** */
> public class TransactionsHangingTest extends GridCommonAbstractTest {
>     /** {@inheritDoc} */
>     @Override protected IgniteConfiguration getConfiguration(String 
> igniteInstanceName) throws Exception {
>         IgniteConfiguration cfg = super.getConfiguration(igniteInstanceName);
>         cfg.setDataStorageConfiguration(new DataStorageConfiguration()
>             .setDefaultDataRegionConfiguration(new DataRegionConfiguration()
>                 .setPersistenceEnabled(true)));
>         cfg.setClientConnectorConfiguration(new ClientConnectorConfiguration()
>             .setThreadPoolSize(2) // first thread is occupied by snapshot 
> operation, the second is designated to perform cache operations.
>             .setThinClientConfiguration(new ThinClientConfiguration()
>                 .setMaxActiveComputeTasksPerConnection(1)));
>         cfg.setPluginProviders(new TestSecurityPluginProvider(
>             igniteInstanceName,
>             "",
>             create()
>                 .defaultAllowAll(false)
>                 .appendSystemPermissions(JOIN_AS_SERVER, ADMIN_CLUSTER_STATE)
>                 .appendCachePermissions(DEFAULT_CACHE_NAME, CACHE_CREATE)
>                 .build(),
>             null,
>             false,
>             userData("admin-snapshot", systemPermissions(ADMIN_SNAPSHOT)),
>             userData("client", create()
>                 .defaultAllowAll(false)
>                 .appendCachePermissions(DEFAULT_CACHE_NAME, CACHE_READ, 
> CACHE_PUT)
>                 .build())
>         ));
>         return cfg;
>     }
>     /** {@inheritDoc} */
>     @Override protected void beforeTest() throws Exception {
>         super.beforeTest();
>         cleanPersistenceDir();
>     }
>     /** {@inheritDoc} */
>     @Override protected void afterTest() throws Exception {
>         super.afterTest();
>         stopAllGrids();
>         cleanPersistenceDir();
>     }
>     /** */
>     @Test
>     public void test() throws Exception {
>         IgniteEx ignite = startGrid(0);
>         ignite.cluster().state(ACTIVE);
>         ignite.createCache(new 
> CacheConfiguration<>().setName(DEFAULT_CACHE_NAME).setAtomicityMode(TRANSACTIONAL));
>         CountDownLatch snapshotBlockedLatch = new CountDownLatch(1);
>         CountDownLatch snapshotUnblockedLatch = new CountDownLatch(1);
>         
> ignite.context().cache().context().exchange().registerExchangeAwareComponent(new
>  PartitionsExchangeAware() {
>             /** {@inheritDoc} */
>             @Override public void 
> onInitAfterTopologyLock(GridDhtPartitionsExchangeFuture fut) {
>                 if (fut.firstEvent().type() != EVT_DISCOVERY_CUSTOM_EVT)
>                     return;
>                 DiscoveryCustomMessage msg = 
> ((DiscoveryCustomEvent)fut.firstEvent()).customMessage();
>                 assertNotNull(msg);
>                 if (msg instanceof SnapshotDiscoveryMessage) {
>                     snapshotBlockedLatch.countDown();
>                     try {
>                         snapshotUnblockedLatch.await(getTestTimeout(), 
> TimeUnit.MILLISECONDS);
>                     }
>                     catch (InterruptedException e) {
>                         throw new IgniteException(e);
>                     }
>                 }
>             }
>         });
>         try (
>             IgniteClient snpAdmin = Ignition.startClient(new 
> ClientConfiguration().setAddresses("127.0.0.1:10800").setUserName("admin-snapshot").setUserPassword(""));
>             IgniteClient cli = Ignition.startClient(new 
> ClientConfiguration().setAddresses("127.0.0.1:10800").setUserName("client").setUserPassword(""))
>         ) {
>             SnapshotCreateCommandArg arg = new SnapshotCreateCommandArg();
>             arg.snapshotName("test_snapshot");
>             arg.sync(true);
>             IgniteClientFuture<VisorTaskResult<String>> snpFut = 
> snpAdmin.compute().<VisorTaskArgument<?>, 
> VisorTaskResult<String>>executeAsync2(
>                 SnapshotCreateTask.class.getName(),
>                 new VisorTaskArgument<>(
>                     grid(0).localNode().id(),
>                     arg,
>                     false)
>             );
>             assertTrue(snapshotBlockedLatch.await(getTestTimeout(), 
> TimeUnit.MILLISECONDS));
>             IgniteInternalFuture<Object> putFut0 = GridTestUtils.runAsync(() 
> -> cli.cache(DEFAULT_CACHE_NAME).put(0, 0));
>             IgniteInternalFuture<Object> putFut1 = GridTestUtils.runAsync(() 
> -> cli.cache(DEFAULT_CACHE_NAME).put(1, 1));
>             U.sleep(1000); // Wait for tx requests chaining.
>             snapshotUnblockedLatch.countDown();
>             snpFut.get(getTestTimeout(), TimeUnit.MILLISECONDS);
>             putFut0.get(getTestTimeout());
>             putFut1.get(getTestTimeout()); // Will hang.
>         }
>     }
>     /** */
>     private TestSecurityData userData(String login, SecurityPermissionSet 
> perms) {
>         return new TestSecurityData(
>             login,
>             "",
>             perms,
>             new Permissions()
>         );
>     }
> }
> {code}
> Code execution steps that lead to the hanging:
> 1. Start of the snapshot operation initiated by "administrator user account" 
> causes PME to start.
> 2. Thin client sends tx-1, which is blocked until PME is completed.
> 3. Lets consider that thread-1 was used to handle tx-1 request. After 
> https://issues.apache.org/jira/browse/IGNITE-21183 threads of thin client 
> thread pool are not longer blocked until the end of transactions. 
> Transactions requests are handled asynchronously. See suspend/resume of 
> transacitons. This allows thread-1 to handle another tx request.
> 4. Thin client sends tx-2 which is handled by the thread-1.
> 5. Since this thread did not complete the previous tx-1, tx-2 chains itself 
> with tx-1 future and starts waiting its completion. See 
> org/apache/ignite/internal/processors/cache/GridCacheAdapter.java:3856
> 6. PME caused by the snapshot finishes. tx-1 future is notified to proceed by 
> the PME thread. The crucial thing here is that tx-1 proceeds its execution in 
> PME thread that is associated with the user that started snapshot. By the end 
> of tx-1 the tx-2 starts its execution. Also in thread that is associated with 
> the user that started snapshot.
> 7. If snapshot administrator user was not granted permissions for cache 
> operations - tx-2 fails. And future chain described in clause 5 becomes 
> broken.
> It leads to the situation when all new transaction handled by thread-1 will 
> chain itself with previous transaction executed by this thread. But thy will 
> never complete because the future chain is broken.
> We must fix exceptions handling during tx operations chaining and manually 
> restore security context while executing transactional operation from the 
> future listener.  



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

Reply via email to