[
https://issues.apache.org/jira/browse/IGNITE-12774?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Sergey Antonov updated IGNITE-12774:
------------------------------------
Fix Version/s: 2.9
> Transaction hangs after too many open files NIO exception
> ---------------------------------------------------------
>
> Key: IGNITE-12774
> URL: https://issues.apache.org/jira/browse/IGNITE-12774
> Project: Ignite
> Issue Type: Bug
> Reporter: Sergey Antonov
> Assignee: Sergey Antonov
> Priority: Major
> Fix For: 2.9
>
> Time Spent: 10m
> Remaining Estimate: 0h
>
> Transaction hung after “Open too many files” error and never been finished.
> {code:java}
> import java.net.SocketException;
> import java.util.concurrent.atomic.AtomicBoolean;
> import org.apache.ignite.cluster.ClusterNode;
> import org.apache.ignite.configuration.CacheConfiguration;
> import org.apache.ignite.configuration.IgniteConfiguration;
> import org.apache.ignite.failure.StopNodeOrHaltFailureHandler;
> import org.apache.ignite.internal.IgniteEx;
> import org.apache.ignite.lang.IgniteInClosure;
> import org.apache.ignite.plugin.extensions.communication.Message;
> import org.apache.ignite.spi.IgniteSpiException;
> import org.apache.ignite.spi.communication.tcp.TcpCommunicationSpi;
> import org.apache.ignite.testframework.junits.common.GridCommonAbstractTest;
> import org.apache.ignite.transactions.Transaction;
> import org.apache.ignite.transactions.TransactionConcurrency;
> import org.apache.ignite.transactions.TransactionIsolation;
> import static org.apache.ignite.cache.CacheAtomicityMode.TRANSACTIONAL;
> import static org.apache.ignite.cache.CacheMode.PARTITIONED;
> public class TooManyOpenFilesTest extends GridCommonAbstractTest {
> @Override protected IgniteConfiguration getConfiguration(String
> igniteInstanceName) throws Exception {
> return super.getConfiguration(igniteInstanceName)
> .setFailureHandler(new StopNodeOrHaltFailureHandler())
> .setCommunicationSpi(new TooManyOpenFilesTcpCommunicationSpi())
> .setConsistentId(igniteInstanceName);
> }
> @Override protected void beforeTest() throws Exception {
> super.beforeTest();
> stopAllGrids();
> cleanPersistenceDir();
> }
> @Override protected void afterTest() throws Exception {
> stopAllGrids();
> cleanPersistenceDir();
> super.afterTest();
> }
> public void test() throws Exception {
> IgniteEx crd = startGrids(3);
> crd.cluster().active(true);
> crd.getOrCreateCache(new
> CacheConfiguration<>().setName(DEFAULT_CACHE_NAME).setAtomicityMode(TRANSACTIONAL).setBackups(1).setCacheMode(PARTITIONED));
> TooManyOpenFilesTcpCommunicationSpi spi =
> (TooManyOpenFilesTcpCommunicationSpi)grid(2).context().config().getCommunicationSpi();
> try (Transaction tx =
> grid(1).transactions().txStart(TransactionConcurrency.PESSIMISTIC,
> TransactionIsolation.REPEATABLE_READ)) {
> IgniteCache<Object, Object> cache =
> grid(1).cache(DEFAULT_CACHE_NAME);
> cache.put(1, 1);
> spi.throwException.set(true);
> cache.put(2, 2);
> cache.put(3, 2);
> cache.put(4, 2);
> // hungs here.
> tx.commit();
> }
> for (int i=0; i < 3 ; i++) {
> assertEquals(1, grid(i).cache(DEFAULT_CACHE_NAME).get(1));
> assertEquals(2, grid(i).cache(DEFAULT_CACHE_NAME).get(2));
> }
> }
> private static class TooManyOpenFilesTcpCommunicationSpi extends
> TcpCommunicationSpi {
> private final AtomicBoolean throwException = new AtomicBoolean();
> /** {@inheritDoc} */
> @Override public void sendMessage(ClusterNode node, Message msg)
> throws IgniteSpiException {
> if (throwException.get())
> throw getException(node);
> super.sendMessage(node, msg);
> }
> /** {@inheritDoc} */
> @Override public void sendMessage(
> ClusterNode node,
> Message msg,
> IgniteInClosure<IgniteException> ackC
> ) throws IgniteSpiException {
> if (throwException.get())
> throw getException(node);
> super.sendMessage(node, msg, ackC);
> }
> private IgniteSpiException getException(ClusterNode node) {
> String checkedExceptionMsg = "Failed to connect to node (is node
> still alive?). " +
> "Make sure that each ComputeTask and cache Transaction has a
> timeout set " +
> "in order to prevent parties from waiting forever in case of
> network issues " +
> "[nodeId=" + node.id() + ", addrs=null]";
> return new IgniteSpiException("Failed to send message to remote
> node: " + node.id(), new IgniteCheckedException(checkedExceptionMsg, new
> SocketException("Too many open files")));
> }
> }
> }
> {code}
--
This message was sent by Atlassian Jira
(v8.3.4#803005)