[ https://issues.apache.org/jira/browse/HDDS-1821?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Shashikant Banerjee resolved HDDS-1821. --------------------------------------- Resolution: Duplicate Assignee: Shashikant Banerjee > BlockOutputStream#watchForCommit fails with UnsupportedOperationException > when one DN is down > --------------------------------------------------------------------------------------------- > > Key: HDDS-1821 > URL: https://issues.apache.org/jira/browse/HDDS-1821 > Project: Hadoop Distributed Data Store > Issue Type: Bug > Components: Ozone Client > Reporter: Nanda kumar > Assignee: Shashikant Banerjee > Priority: Major > Labels: blockade > > When one of the datanode from the ratis pipeline is excluded by introducing > network failure, the client write is failing with the following exception > {noformat} > 2019-07-18 07:13:33 WARN XceiverClientRatis:262 - 3 way commit failed on > pipeline Pipeline[ Id: b338512c-1a3b-4ae6-b89c-7b7737d9bd93, Nodes: > ce90cf89-0444-45bf-8c49-a126d8da5a5f{ip: 192.168.240.4, host: > ozoneblockade_datanode_2.ozoneblockade_default, networkLocation: > /default-rack, certSerialId: null}fa65a457-155d-4bf3-8d1b-b0e11ec157ae{ip: > 192.168.240.6, host: ozoneblockade_datanode_3.ozoneblockade_default, > networkLocation: /default-rack, certSerialId: > null}c5785c99-7dc2-4afc-9054-2efa28a41e7e{ip: 192.168.240.2, host: > ozoneblockade_datanode_1.ozoneblockade_default, networkLocation: > /default-rack, certSerialId: null}, Type:RATIS, Factor:THREE, State:OPEN] > E java.util.concurrent.ExecutionException: > org.apache.ratis.protocol.NotReplicatedException: Request with call Id 2 and > log index 9 is not yet replicated to ALL_COMMITTED > E at > java.base/java.util.concurrent.CompletableFuture.reportGet(CompletableFuture.java:395) > E at > java.base/java.util.concurrent.CompletableFuture.get(CompletableFuture.java:2022) > E at > org.apache.hadoop.hdds.scm.XceiverClientRatis.watchForCommit(XceiverClientRatis.java:259) > E at > org.apache.hadoop.hdds.scm.storage.CommitWatcher.watchForCommit(CommitWatcher.java:194) > E at > org.apache.hadoop.hdds.scm.storage.CommitWatcher.watchOnLastIndex(CommitWatcher.java:157) > E at > org.apache.hadoop.hdds.scm.storage.BlockOutputStream.watchForCommit(BlockOutputStream.java:348) > E at > org.apache.hadoop.hdds.scm.storage.BlockOutputStream.handleFlush(BlockOutputStream.java:480) > E at > org.apache.hadoop.hdds.scm.storage.BlockOutputStream.close(BlockOutputStream.java:494) > E at > org.apache.hadoop.ozone.client.io.BlockOutputStreamEntry.close(BlockOutputStreamEntry.java:143) > E at > org.apache.hadoop.ozone.client.io.KeyOutputStream.handleFlushOrClose(KeyOutputStream.java:434) > E at > org.apache.hadoop.ozone.client.io.KeyOutputStream.close(KeyOutputStream.java:472) > E at > org.apache.hadoop.ozone.client.io.OzoneOutputStream.close(OzoneOutputStream.java:60) > E at > org.apache.hadoop.ozone.freon.RandomKeyGenerator.createKey(RandomKeyGenerator.java:706) > E at > org.apache.hadoop.ozone.freon.RandomKeyGenerator.access$1100(RandomKeyGenerator.java:88) > E at > org.apache.hadoop.ozone.freon.RandomKeyGenerator$ObjectCreator.run(RandomKeyGenerator.java:609) > E at > java.base/java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:515) > E at > java.base/java.util.concurrent.FutureTask.run(FutureTask.java:264) > E at > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128) > E at > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628) > E at java.base/java.lang.Thread.run(Thread.java:834) > E Caused by: org.apache.ratis.protocol.NotReplicatedException: > Request with call Id 2 and log index 9 is not yet replicated to ALL_COMMITTED > E at > org.apache.ratis.client.impl.ClientProtoUtils.toRaftClientReply(ClientProtoUtils.java:245) > E at > org.apache.ratis.grpc.client.GrpcClientProtocolClient$AsyncStreamObservers$1.onNext(GrpcClientProtocolClient.java:254) > E at > org.apache.ratis.grpc.client.GrpcClientProtocolClient$AsyncStreamObservers$1.onNext(GrpcClientProtocolClient.java:249) > E at > org.apache.ratis.thirdparty.io.grpc.stub.ClientCalls$StreamObserverToCallListenerAdapter.onMessage(ClientCalls.java:421) > E at > org.apache.ratis.thirdparty.io.grpc.ForwardingClientCallListener.onMessage(ForwardingClientCallListener.java:33) > E at > org.apache.ratis.thirdparty.io.grpc.ForwardingClientCallListener.onMessage(ForwardingClientCallListener.java:33) > E at > org.apache.ratis.thirdparty.io.grpc.internal.ClientCallImpl$ClientStreamListenerImpl$1MessagesAvailable.runInContext(ClientCallImpl.java:519) > E at > org.apache.ratis.thirdparty.io.grpc.internal.ContextRunnable.run(ContextRunnable.java:37) > E at > org.apache.ratis.thirdparty.io.grpc.internal.SerializingExecutor.run(SerializingExecutor.java:123) > E ... 3 more > E 2019-07-18 07:13:33 INFO XceiverClientRatis:280 - Could not commit > index 9 on pipeline Pipeline[ Id: b338512c-1a3b-4ae6-b89c-7b7737d9bd93, > Nodes: ce90cf89-0444-45bf-8c49-a126d8da5a5f{ip: 192.168.240.4, host: > ozoneblockade_datanode_2.ozoneblockade_default, networkLocation: > /default-rack, certSerialId: null}fa65a457-155d-4bf3-8d1b-b0e11ec157ae{ip: > 192.168.240.6, host: ozoneblockade_datanode_3.ozoneblockade_default, > networkLocation: /default-rack, certSerialId: > null}c5785c99-7dc2-4afc-9054-2efa28a41e7e{ip: 192.168.240.2, host: > ozoneblockade_datanode_1.ozoneblockade_default, networkLocation: > /default-rack, certSerialId: null}, Type:RATIS, Factor:THREE, State:OPEN] to > all the nodes. Server fa65a457-155d-4bf3-8d1b-b0e11ec157ae has failed. > Committed by majority. > E 2019-07-18 07:13:33 WARN BlockOutputStream:354 - Failed to commit > BlockId conID: 1 locID: 102461208643108865 bcsId: 9 on Pipeline[ Id: > b338512c-1a3b-4ae6-b89c-7b7737d9bd93, Nodes: > ce90cf89-0444-45bf-8c49-a126d8da5a5f{ip: 192.168.240.4, host: > ozoneblockade_datanode_2.ozoneblockade_default, networkLocation: > /default-rack, certSerialId: null}fa65a457-155d-4bf3-8d1b-b0e11ec157ae{ip: > 192.168.240.6, host: ozoneblockade_datanode_3.ozoneblockade_default, > networkLocation: /default-rack, certSerialId: > null}c5785c99-7dc2-4afc-9054-2efa28a41e7e{ip: 192.168.240.2, host: > ozoneblockade_datanode_1.ozoneblockade_default, networkLocation: > /default-rack, certSerialId: null}, Type:RATIS, Factor:THREE, State:OPEN]. > Failed nodes: [fa65a457-155d-4bf3-8d1b-b0e11ec157ae{ip: null, host: null, > networkLocation: /default-rack, certSerialId: null}] > E 2019-07-18 07:13:33 ERROR RandomKeyGenerator:730 - Exception while > adding key: key-0-06904 in bucket: bucket-0-14179 of volume: vol-0-21379. > E java.lang.UnsupportedOperationException > E at java.base/java.util.AbstractList.add(AbstractList.java:153) > E at java.base/java.util.AbstractList.add(AbstractList.java:111) > E at > java.base/java.util.AbstractCollection.addAll(AbstractCollection.java:352) > E at > org.apache.hadoop.hdds.scm.storage.BlockOutputStream.watchForCommit(BlockOutputStream.java:356) > E at > org.apache.hadoop.hdds.scm.storage.BlockOutputStream.handleFlush(BlockOutputStream.java:480) > E at > org.apache.hadoop.hdds.scm.storage.BlockOutputStream.close(BlockOutputStream.java:494) > E at > org.apache.hadoop.ozone.client.io.BlockOutputStreamEntry.close(BlockOutputStreamEntry.java:143) > E at > org.apache.hadoop.ozone.client.io.KeyOutputStream.handleFlushOrClose(KeyOutputStream.java:434) > E at > org.apache.hadoop.ozone.client.io.KeyOutputStream.close(KeyOutputStream.java:472) > E at > org.apache.hadoop.ozone.client.io.OzoneOutputStream.close(OzoneOutputStream.java:60) > E at > org.apache.hadoop.ozone.freon.RandomKeyGenerator.createKey(RandomKeyGenerator.java:706) > E at > org.apache.hadoop.ozone.freon.RandomKeyGenerator.access$1100(RandomKeyGenerator.java:88) > E at > org.apache.hadoop.ozone.freon.RandomKeyGenerator$ObjectCreator.run(RandomKeyGenerator.java:609) > E at > java.base/java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:515) > E at > java.base/java.util.concurrent.FutureTask.run(FutureTask.java:264) > E at > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128) > E at > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628) > E at java.base/java.lang.Thread.run(Thread.java:834) > E java.lang.UnsupportedOperationException > E at java.base/java.util.AbstractList.add(AbstractList.java:153) > E at java.base/java.util.AbstractList.add(AbstractList.java:111) > E at > java.base/java.util.AbstractCollection.addAll(AbstractCollection.java:352) > E at > org.apache.hadoop.hdds.scm.storage.BlockOutputStream.watchForCommit(BlockOutputStream.java:356) > E at > org.apache.hadoop.hdds.scm.storage.BlockOutputStream.handleFlush(BlockOutputStream.java:480) > E at > org.apache.hadoop.hdds.scm.storage.BlockOutputStream.close(BlockOutputStream.java:494) > E at > org.apache.hadoop.ozone.client.io.BlockOutputStreamEntry.close(BlockOutputStreamEntry.java:143) > E at > org.apache.hadoop.ozone.client.io.KeyOutputStream.handleFlushOrClose(KeyOutputStream.java:434) > E at > org.apache.hadoop.ozone.client.io.KeyOutputStream.close(KeyOutputStream.java:472) > E at > org.apache.hadoop.ozone.client.io.OzoneOutputStream.close(OzoneOutputStream.java:60) > E at > org.apache.hadoop.ozone.freon.RandomKeyGenerator.createKey(RandomKeyGenerator.java:706) > E at > org.apache.hadoop.ozone.freon.RandomKeyGenerator.access$1100(RandomKeyGenerator.java:88) > E at > org.apache.hadoop.ozone.freon.RandomKeyGenerator$ObjectCreator.run(RandomKeyGenerator.java:609) > E at > java.base/java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:515) > E at > java.base/java.util.concurrent.FutureTask.run(FutureTask.java:264) > E at > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128) > E at > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628) > E at java.base/java.lang.Thread.run(Thread.java:834) > E > {noformat} -- This message was sent by Atlassian JIRA (v7.6.14#76016) --------------------------------------------------------------------- To unsubscribe, e-mail: hdfs-dev-unsubscr...@hadoop.apache.org For additional commands, e-mail: hdfs-dev-h...@hadoop.apache.org