vesence created HDFS-17427: ------------------------------ Summary: thread hang when using NativeIO for shortcircuit-read Key: HDFS-17427 URL: https://issues.apache.org/jira/browse/HDFS-17427 Project: Hadoop HDFS Issue Type: Bug Components: dfsclient Affects Versions: 3.2.3, 3.3.1 Environment: system os: centos 7
java version: 8 Reporter: vesence using hdfs remote jars in the classpath for custom URLClassLoader deploy the application on one of the datanode enable shortcircuit-read then the application main thread would hang up. below is the stacktrace: "main" #1 prio=5 os_prio=0 tid=0x00007f8344012000 nid=0x51da waiting on condition [0x00007f834dbea000] java.lang.Thread.State: WAITING (parking) at sun.misc.Unsafe.park(Native Method) - parking to wait for <0x00000000c050b150> (a java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject) at java.util.concurrent.locks.LockSupport.park(LockSupport.java:175) at java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.awaitUninterruptibly(AbstractQueuedSynchronizer.java:1976) at org.apache.hadoop.hdfs.shortcircuit.DfsClientShmManager$EndpointShmManager.allocSlot(DfsClientShmManager.java:244) at org.apache.hadoop.hdfs.shortcircuit.DfsClientShmManager.allocSlot(DfsClientShmManager.java:417) at org.apache.hadoop.hdfs.shortcircuit.ShortCircuitCache.allocShmSlot(ShortCircuitCache.java:984) at org.apache.hadoop.hdfs.client.impl.BlockReaderFactory.createShortCircuitReplicaInfo(BlockReaderFactory.java:532) at org.apache.hadoop.hdfs.shortcircuit.ShortCircuitCache.create(ShortCircuitCache.java:754) at org.apache.hadoop.hdfs.shortcircuit.ShortCircuitCache.fetchOrCreate(ShortCircuitCache.java:691) at org.apache.hadoop.hdfs.client.impl.BlockReaderFactory.getBlockReaderLocal(BlockReaderFactory.java:481) at org.apache.hadoop.hdfs.client.impl.BlockReaderFactory.build(BlockReaderFactory.java:359) at org.apache.hadoop.hdfs.DFSInputStream.getBlockReader(DFSInputStream.java:644) at org.apache.hadoop.hdfs.DFSInputStream.blockSeekTo(DFSInputStream.java:575) - locked <0x00000000f23a0e78> (a org.apache.hadoop.hdfs.DFSInputStream) at org.apache.hadoop.hdfs.DFSInputStream.readWithStrategy(DFSInputStream.java:757) - locked <0x00000000f23a0e78> (a org.apache.hadoop.hdfs.DFSInputStream) at org.apache.hadoop.hdfs.DFSInputStream.read(DFSInputStream.java:829) - locked <0x00000000f23a0e78> (a org.apache.hadoop.hdfs.DFSInputStream) at java.io.DataInputStream.read(DataInputStream.java:100) at java.nio.file.Files.copy(Files.java:2908) at java.nio.file.Files.copy(Files.java:3027) at sun.net.www.protocol.jar.URLJarFile$1.run(URLJarFile.java:220) at sun.net.www.protocol.jar.URLJarFile$1.run(URLJarFile.java:216) at java.security.AccessController.doPrivileged(Native Method) at sun.net.www.protocol.jar.URLJarFile.retrieve(URLJarFile.java:215) at sun.net.www.protocol.jar.URLJarFile.getJarFile(URLJarFile.java:71) at sun.net.www.protocol.jar.JarFileFactory.get(JarFileFactory.java:84) at sun.net.www.protocol.jar.JarURLConnection.connect(JarURLConnection.java:122) at sun.net.www.protocol.jar.JarURLConnection.getJarFile(JarURLConnection.java:89) at sun.misc.URLClassPath$JarLoader.getJarFile(URLClassPath.java:944) at sun.misc.URLClassPath$JarLoader.access$800(URLClassPath.java:801) at sun.misc.URLClassPath$JarLoader$1.run(URLClassPath.java:886) at sun.misc.URLClassPath$JarLoader$1.run(URLClassPath.java:879) at java.security.AccessController.doPrivileged(Native Method) at sun.misc.URLClassPath$JarLoader.ensureOpen(URLClassPath.java:878) at sun.misc.URLClassPath$JarLoader.<init>(URLClassPath.java:829) at sun.misc.URLClassPath$3.run(URLClassPath.java:575) at sun.misc.URLClassPath$3.run(URLClassPath.java:565) at java.security.AccessController.doPrivileged(Native Method) at sun.misc.URLClassPath.getLoader(URLClassPath.java:564) at sun.misc.URLClassPath.getLoader(URLClassPath.java:529) - eliminated <0x00000000f0e87448> (a sun.misc.URLClassPath) at sun.misc.URLClassPath.getNextLoader(URLClassPath.java:494) - locked <0x00000000f0e87448> (a sun.misc.URLClassPath) at sun.misc.URLClassPath.findResource(URLClassPath.java:224) at java.net.URLClassLoader$2.run(URLClassLoader.java:572) at java.net.URLClassLoader$2.run(URLClassLoader.java:570) at java.security.AccessController.doPrivileged(Native Method) at java.net.URLClassLoader.findResource(URLClassLoader.java:569) at java.lang.ClassLoader.getResource(ClassLoader.java:1089) at java.lang.ClassLoader.getResource(ClassLoader.java:1084) at org.apache.hadoop.conf.Configuration.getResource(Configuration.java:2780) at org.apache.hadoop.conf.Configuration.getStreamReader(Configuration.java:3036) at org.apache.hadoop.conf.Configuration.loadResource(Configuration.java:2995) at org.apache.hadoop.conf.Configuration.loadResources(Configuration.java:2968) at org.apache.hadoop.conf.Configuration.getProps(Configuration.java:2848) - locked <0x00000000f23977a8> (a org.apache.hadoop.conf.Configuration) at org.apache.hadoop.conf.Configuration.get(Configuration.java:1200) at org.apache.hadoop.conf.Configuration.getTrimmed(Configuration.java:1254) at org.apache.hadoop.conf.Configuration.getBoolean(Configuration.java:1660) at org.apache.hadoop.io.nativeio.NativeIO$POSIX.<clinit>(NativeIO.java:186) at org.apache.hadoop.io.nativeio.NativeIO.initNative(Native Method) at org.apache.hadoop.io.nativeio.NativeIO.<clinit>(NativeIO.java:683) at org.apache.hadoop.hdfs.shortcircuit.ShortCircuitShm.<init>(ShortCircuitShm.java:469) at org.apache.hadoop.hdfs.shortcircuit.DfsClientShm.<init>(DfsClientShm.java:70) at org.apache.hadoop.hdfs.shortcircuit.DfsClientShmManager$EndpointShmManager.requestNewShm(DfsClientShmManager.java:181) at org.apache.hadoop.hdfs.shortcircuit.DfsClientShmManager$EndpointShmManager.allocSlot(DfsClientShmManager.java:251) at org.apache.hadoop.hdfs.shortcircuit.DfsClientShmManager.allocSlot(DfsClientShmManager.java:417) at org.apache.hadoop.hdfs.shortcircuit.ShortCircuitCache.allocShmSlot(ShortCircuitCache.java:984) at org.apache.hadoop.hdfs.client.impl.BlockReaderFactory.createShortCircuitReplicaInfo(BlockReaderFactory.java:532) at org.apache.hadoop.hdfs.shortcircuit.ShortCircuitCache.create(ShortCircuitCache.java:754) at org.apache.hadoop.hdfs.shortcircuit.ShortCircuitCache.fetchOrCreate(ShortCircuitCache.java:691) at org.apache.hadoop.hdfs.client.impl.BlockReaderFactory.getBlockReaderLocal(BlockReaderFactory.java:481) at org.apache.hadoop.hdfs.client.impl.BlockReaderFactory.build(BlockReaderFactory.java:359) at org.apache.hadoop.hdfs.DFSInputStream.getBlockReader(DFSInputStream.java:644) at org.apache.hadoop.hdfs.DFSInputStream.blockSeekTo(DFSInputStream.java:575) - locked <0x00000000f20e98e0> (a org.apache.hadoop.hdfs.DFSInputStream) at org.apache.hadoop.hdfs.DFSInputStream.readWithStrategy(DFSInputStream.java:757) - locked <0x00000000f20e98e0> (a org.apache.hadoop.hdfs.DFSInputStream) at org.apache.hadoop.hdfs.DFSInputStream.read(DFSInputStream.java:829) - locked <0x00000000f20e98e0> (a org.apache.hadoop.hdfs.DFSInputStream) at java.io.DataInputStream.read(DataInputStream.java:100) at java.nio.file.Files.copy(Files.java:2908) at java.nio.file.Files.copy(Files.java:3027) at sun.net.www.protocol.jar.URLJarFile$1.run(URLJarFile.java:220) at sun.net.www.protocol.jar.URLJarFile$1.run(URLJarFile.java:216) at java.security.AccessController.doPrivileged(Native Method) at sun.net.www.protocol.jar.URLJarFile.retrieve(URLJarFile.java:215) at sun.net.www.protocol.jar.URLJarFile.getJarFile(URLJarFile.java:71) at sun.net.www.protocol.jar.JarFileFactory.get(JarFileFactory.java:84) at sun.net.www.protocol.jar.JarURLConnection.connect(JarURLConnection.java:122) at sun.net.www.protocol.jar.JarURLConnection.getJarFile(JarURLConnection.java:89) at sun.misc.URLClassPath$JarLoader.getJarFile(URLClassPath.java:944) at sun.misc.URLClassPath$JarLoader.access$800(URLClassPath.java:801) at sun.misc.URLClassPath$JarLoader$1.run(URLClassPath.java:886) at sun.misc.URLClassPath$JarLoader$1.run(URLClassPath.java:879) at java.security.AccessController.doPrivileged(Native Method) at sun.misc.URLClassPath$JarLoader.ensureOpen(URLClassPath.java:878) at sun.misc.URLClassPath$JarLoader.<init>(URLClassPath.java:829) at sun.misc.URLClassPath$3.run(URLClassPath.java:575) at sun.misc.URLClassPath$3.run(URLClassPath.java:565) at java.security.AccessController.doPrivileged(Native Method) at sun.misc.URLClassPath.getLoader(URLClassPath.java:564) at sun.misc.URLClassPath.getLoader(URLClassPath.java:529) - eliminated <0x00000000f0e87448> (a sun.misc.URLClassPath) at sun.misc.URLClassPath.getNextLoader(URLClassPath.java:494) - locked <0x00000000f0e87448> (a sun.misc.URLClassPath) at sun.misc.URLClassPath.getResource(URLClassPath.java:248) at java.net.URLClassLoader$1.run(URLClassLoader.java:366) at java.net.URLClassLoader$1.run(URLClassLoader.java:363) at java.security.AccessController.doPrivileged(Native Method) at java.net.URLClassLoader.findClass(URLClassLoader.java:362) at java.lang.ClassLoader.loadClass(ClassLoader.java:418) - locked <0x00000000f0e87078> (a org.apache.spark.sql.hive.client.IsolatedClientLoader$$anon$1) at org.apache.spark.sql.hive.client.IsolatedClientLoader$$anon$1.doLoadClass(IsolatedClientLoader.scala:264) at org.apache.spark.sql.hive.client.IsolatedClientLoader$$anon$1.loadClass(IsolatedClientLoader.scala:253) at java.lang.ClassLoader.loadClass(ClassLoader.java:351) at org.apache.spark.sql.hive.client.HiveClientImpl$.newHiveConf(HiveClientImpl.scala:1245) at org.apache.spark.sql.hive.client.HiveClientImpl.newState(HiveClientImpl.scala:164) at org.apache.spark.sql.hive.client.HiveClientImpl.<init>(HiveClientImpl.scala:129) at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method) at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62) at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45) at java.lang.reflect.Constructor.newInstance(Constructor.java:423) at org.apache.spark.sql.hive.client.IsolatedClientLoader.createClient(IsolatedClientLoader.scala:310) - locked <0x00000000f0e86db0> (a org.apache.spark.sql.hive.client.IsolatedClientLoader) at org.apache.spark.sql.hive.HiveUtils$.newClientForMetadata(HiveUtils.scala:496) at org.apache.spark.sql.hive.HiveUtils$.newClientForMetadata(HiveUtils.scala:356) at org.apache.spark.sql.hive.HiveExternalCatalog.client$lzycompute(HiveExternalCatalog.scala:71) - locked <0x00000000f0a487c8> (a org.apache.spark.sql.hive.HiveExternalCatalog) at org.apache.spark.sql.hive.HiveExternalCatalog.client(HiveExternalCatalog.scala:70) at org.apache.spark.sql.hive.HiveExternalCatalog.$anonfun$databaseExists$1(HiveExternalCatalog.scala:224) at org.apache.spark.sql.hive.HiveExternalCatalog$$Lambda$1303/2102224373.apply$mcZ$sp(Unknown Source) at scala.runtime.java8.JFunction0$mcZ$sp.apply(JFunction0$mcZ$sp.java:23) at org.apache.spark.sql.hive.HiveExternalCatalog.withClient(HiveExternalCatalog.scala:102) - locked <0x00000000f0a487c8> (a org.apache.spark.sql.hive.HiveExternalCatalog) at org.apache.spark.sql.hive.HiveExternalCatalog.databaseExists(HiveExternalCatalog.scala:224) at org.apache.spark.sql.internal.SharedState.externalCatalog$lzycompute(SharedState.scala:150) - locked <0x00000000faef6648> (a org.apache.spark.sql.internal.SharedState) at org.apache.spark.sql.internal.SharedState.externalCatalog(SharedState.scala:140) at org.apache.spark.sql.hive.thriftserver.SparkSQLEnv$.init(SparkSQLEnv.scala:64) at org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.<init>(SparkSQLCLIDriver.scala:328) at org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver$.main(SparkSQLCLIDriver.scala:160) at org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.main(SparkSQLCLIDriver.scala) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at org.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52) at org.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:955) at org.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180) at org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203) at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90) at org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1043) at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1052) at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala) -- This message was sent by Atlassian Jira (v8.20.10#820010) --------------------------------------------------------------------- To unsubscribe, e-mail: hdfs-dev-unsubscr...@hadoop.apache.org For additional commands, e-mail: hdfs-dev-h...@hadoop.apache.org