Bob created HDFS-9479: ------------------------- Summary: DeadLock Between DFSOutputStream and LeaseRenewer when Network unstable Key: HDFS-9479 URL: https://issues.apache.org/jira/browse/HDFS-9479 Project: Hadoop HDFS Issue Type: Bug Components: hdfs-client Affects Versions: 2.7.1 Reporter: Bob Priority: Critical
{code} Java stack information for the threads listed above: =================================================== "Thread-1": at org.apache.hadoop.hdfs.client.impl.LeaseRenewer.addClient(LeaseRenewer.java:228) - waiting to lock <0x00000000d5c3c868> (a org.apache.hadoop.hdfs.client.impl.LeaseRenewer) at org.apache.hadoop.hdfs.client.impl.LeaseRenewer.getInstance(LeaseRenewer.java:85) at org.apache.hadoop.hdfs.DFSClient.getLeaseRenewer(DFSClient.java:480) at org.apache.hadoop.hdfs.DFSClient.endFileLease(DFSClient.java:491) at org.apache.hadoop.hdfs.DFSOutputStream.closeImpl(DFSOutputStream.java:803) - locked <0x00000000d5c1a860> (a org.apache.hadoop.hdfs.DFSOutputStream) at org.apache.hadoop.hdfs.DFSOutputStream.close(DFSOutputStream.java:765) - locked <0x00000000d5c1a860> (a org.apache.hadoop.hdfs.DFSOutputStream) at org.apache.hadoop.fs.FSDataOutputStream$PositionCache.close(FSDataOutputStream.java:72) at org.apache.hadoop.fs.FSDataOutputStream.close(FSDataOutputStream.java:106) at org.apache.hadoop.mapreduce.jobhistory.EventWriter.close(EventWriter.java:80) at org.apache.hadoop.mapreduce.jobhistory.JobHistoryEventHandler$MetaInfo.closeWriter(JobHistoryEventHandler.java:1242) - locked <0x00000000d593aed0> (a java.lang.Object) at org.apache.hadoop.mapreduce.jobhistory.JobHistoryEventHandler.serviceStop(JobHistoryEventHandler.java:406) at org.apache.hadoop.service.AbstractService.stop(AbstractService.java:221) - locked <0x00000000d593ae88> (a java.lang.Object) at org.apache.hadoop.service.ServiceOperations.stop(ServiceOperations.java:52) at org.apache.hadoop.service.ServiceOperations.stopQuietly(ServiceOperations.java:80) at org.apache.hadoop.service.CompositeService.stop(CompositeService.java:157) at org.apache.hadoop.service.CompositeService.serviceStop(CompositeService.java:131) at org.apache.hadoop.mapreduce.v2.app.MRAppMaster.serviceStop(MRAppMaster.java:1677) at org.apache.hadoop.service.AbstractService.stop(AbstractService.java:221) - locked <0x00000000d55d9678> (a java.lang.Object) at org.apache.hadoop.mapreduce.v2.app.MRAppMaster.stop(MRAppMaster.java:1176) at org.apache.hadoop.mapreduce.v2.app.MRAppMaster$MRAppMasterShutdownHook.run(MRAppMaster.java:1524) at org.apache.hadoop.util.ShutdownHookManager$1.run(ShutdownHookManager.java:54) "LeaseRenewer:hdfs@hacluster:8020": at org.apache.hadoop.hdfs.DFSOutputStream.abort(DFSOutputStream.java:720) - waiting to lock <0x00000000d5c1a860> (a org.apache.hadoop.hdfs.DFSOutputStream) at org.apache.hadoop.hdfs.DFSClient.closeAllFilesBeingWritten(DFSClient.java:598) at org.apache.hadoop.hdfs.client.impl.LeaseRenewer.run(LeaseRenewer.java:465) - locked <0x00000000d5c3c868> (a org.apache.hadoop.hdfs.client.impl.LeaseRenewer) at org.apache.hadoop.hdfs.client.impl.LeaseRenewer.access$700(LeaseRenewer.java:75) at org.apache.hadoop.hdfs.client.impl.LeaseRenewer$1.run(LeaseRenewer.java:311) at java.lang.Thread.run(Thread.java:745) {code} -- This message was sent by Atlassian JIRA (v6.3.4#6332)