Abhi created KAFKA-7812:
---------------------------

             Summary: Deadlock in SaslServerAuthenticator related threads
                 Key: KAFKA-7812
                 URL: https://issues.apache.org/jira/browse/KAFKA-7812
             Project: Kafka
          Issue Type: Bug
          Components: core
    Affects Versions: 2.0.0
            Reporter: Abhi
         Attachments: threaddump-100cpu-tbd-broker-5

I am encountering a deadlock situation in SaslServerAuthenticator related code 
path where one thread is waiting for a monitor object locked by another thread.

+*Thread 1:*+

"kafka-network-thread-5-ListenerName(SASL_PLAINTEXT)-SASL_PLAINTEXT-0" #66 
prio=5 os_prio=0 tid=0x00007fe131e17000 nid=0x78f7 runnable 
[{color:#d04437}*0x00007fde287ed000*{color}]
 java.lang.Thread.State: RUNNABLE
 at java.util.HashMap$TreeNode.find(HashMap.java:1865)
 at java.util.HashMap$TreeNode.find(HashMap.java:1861)
 at java.util.HashMap$TreeNode.find(HashMap.java:1861)
 at java.util.HashMap$TreeNode.find(HashMap.java:1861)
 at java.util.HashMap$TreeNode.find(HashMap.java:1861)
 at java.util.HashMap$TreeNode.find(HashMap.java:1861)
 at java.util.HashMap$TreeNode.find(HashMap.java:1861)
 at java.util.HashMap$TreeNode.find(HashMap.java:1861)
 at java.util.HashMap$TreeNode.putTreeVal(HashMap.java:1979)
 at java.util.HashMap.putVal(HashMap.java:637)
 at java.util.HashMap.put(HashMap.java:611)
 at java.util.HashSet.add(HashSet.java:219)
 at javax.security.auth.Subject$ClassSet.populateSet(Subject.java:1418)
 at javax.security.auth.Subject$ClassSet.<init>(Subject.java:1372)
 - *{color:#f79232}locked <0x000000068893aae8>{color}* (a 
java.util.Collections$SynchronizedSet)
 at javax.security.auth.Subject.getPrivateCredentials(Subject.java:767)
 at sun.security.jgss.GSSUtil$1.run(GSSUtil.java:343)
 at sun.security.jgss.GSSUtil$1.run(GSSUtil.java:335)
 at java.security.AccessController.doPrivileged(Native Method)
 at sun.security.jgss.GSSUtil.searchSubject(GSSUtil.java:335)
 at 
sun.security.jgss.wrapper.NativeGSSFactory.getCredFromSubject(NativeGSSFactory.java:53)
 at 
sun.security.jgss.wrapper.NativeGSSFactory.getCredentialElement(NativeGSSFactory.java:116)
 at 
sun.security.jgss.GSSManagerImpl.getCredentialElement(GSSManagerImpl.java:193)
 at sun.security.jgss.GSSCredentialImpl.add(GSSCredentialImpl.java:427)
 at sun.security.jgss.GSSCredentialImpl.<init>(GSSCredentialImpl.java:62)
 at sun.security.jgss.GSSManagerImpl.createCredential(GSSManagerImpl.java:154)
 at com.sun.security.sasl.gsskerb.GssKrb5Server.<init>(GssKrb5Server.java:108)
 at 
com.sun.security.sasl.gsskerb.FactoryImpl.createSaslServer(FactoryImpl.java:85)
 at javax.security.sasl.Sasl.createSaslServer(Sasl.java:524)
 at 
org.apache.kafka.common.security.authenticator.SaslServerAuthenticator$2.run(SaslServerAuthenticator.java:215)
 at 
org.apache.kafka.common.security.authenticator.SaslServerAuthenticator$2.run(SaslServerAuthenticator.java:213)
 at java.security.AccessController.doPrivileged(Native Method)
 at javax.security.auth.Subject.doAs(Subject.java:422)
 at 
org.apache.kafka.common.security.authenticator.SaslServerAuthenticator.createSaslKerberosServer(SaslServerAuthenticator.java:213)
 at 
org.apache.kafka.common.security.authenticator.SaslServerAuthenticator.createSaslServer(SaslServerAuthenticator.java:162)
 at 
org.apache.kafka.common.security.authenticator.SaslServerAuthenticator.handleKafkaRequest(SaslServerAuthenticator.java:443)
 at 
org.apache.kafka.common.security.authenticator.SaslServerAuthenticator.authenticate(SaslServerAuthenticator.java:253)
 at org.apache.kafka.common.network.KafkaChannel.prepare(KafkaChannel.java:127)
 at 
org.apache.kafka.common.network.Selector.pollSelectionKeys(Selector.java:487)
 at org.apache.kafka.common.network.Selector.poll(Selector.java:425)
 at kafka.network.Processor.poll(SocketServer.scala:678)
 at kafka.network.Processor.run(SocketServer.scala:583)
 at java.lang.Thread.run(Thread.java:745)

Locked ownable synchronizers:
 - None

*+Thread 2:+*

"kafka-network-thread-5-ListenerName(SASL_PLAINTEXT)-SASL_PLAINTEXT-2" #68 
prio=5 os_prio=0 tid=0x00007fe131e1a800 nid=0x78f9 waiting for monitor entry 
[{color:#d04437}*0x00007fde277ed000*{color}]
 java.lang.Thread.State: BLOCKED (on object monitor)
 at java.util.Collections$SynchronizedCollection.add(Collections.java:2035)
 - {color:#f79232}*waiting to lock <0x000000068893aae8>*{color} (a 
java.util.Collections$SynchronizedSet)
 at 
org.apache.kafka.common.security.authenticator.SaslServerAuthenticator.createSaslKerberosServer(SaslServerAuthenticator.java:206)
 at 
org.apache.kafka.common.security.authenticator.SaslServerAuthenticator.createSaslServer(SaslServerAuthenticator.java:162)
 at 
org.apache.kafka.common.security.authenticator.SaslServerAuthenticator.handleKafkaRequest(SaslServerAuthenticator.java:443)
 at 
org.apache.kafka.common.security.authenticator.SaslServerAuthenticator.authenticate(SaslServerAuthenticator.java:253)
 at org.apache.kafka.common.network.KafkaChannel.prepare(KafkaChannel.java:127)
 at 
org.apache.kafka.common.network.Selector.pollSelectionKeys(Selector.java:487)
 at org.apache.kafka.common.network.Selector.poll(Selector.java:425)
 at kafka.network.Processor.poll(SocketServer.scala:678)
 at kafka.network.Processor.run(SocketServer.scala:583)
 at java.lang.Thread.run(Thread.java:745)

Locked ownable synchronizers:
 - None

 

+*Thread 3:*+

"kafka-network-thread-5-ListenerName(SASL_PLAINTEXT)-SASL_PLAINTEXT-1" #67 
prio=5 os_prio=0 tid=0x00007fe131e18800 nid=0x78f8 waiting for monitor entry 
[{color:#59afe1}0x00007fde27fec000{color}]
 java.lang.Thread.State: BLOCKED (on object monitor)
 at javax.security.auth.Subject$ClassSet.<init>(Subject.java:1372)
 - *{color:#f79232}waiting to lock <0x000000068893aae8>{color}* (a 
java.util.Collections$SynchronizedSet)
 at javax.security.auth.Subject.getPrivateCredentials(Subject.java:767)
 at sun.security.jgss.GSSUtil$1.run(GSSUtil.java:343)
 at sun.security.jgss.GSSUtil$1.run(GSSUtil.java:335)
 at java.security.AccessController.doPrivileged(Native Method)
 at sun.security.jgss.GSSUtil.searchSubject(GSSUtil.java:335)
 at 
sun.security.jgss.wrapper.NativeGSSFactory.getCredFromSubject(NativeGSSFactory.java:53)
 at 
sun.security.jgss.wrapper.NativeGSSFactory.getCredentialElement(NativeGSSFactory.java:116)
 at 
sun.security.jgss.GSSManagerImpl.getCredentialElement(GSSManagerImpl.java:193)
 at sun.security.jgss.GSSCredentialImpl.add(GSSCredentialImpl.java:427)
 at sun.security.jgss.GSSCredentialImpl.<init>(GSSCredentialImpl.java:62)
 at sun.security.jgss.GSSManagerImpl.createCredential(GSSManagerImpl.java:154)
 at com.sun.security.sasl.gsskerb.GssKrb5Server.<init>(GssKrb5Server.java:108)
 at 
com.sun.security.sasl.gsskerb.FactoryImpl.createSaslServer(FactoryImpl.java:85)
 at javax.security.sasl.Sasl.createSaslServer(Sasl.java:524)
 at 
org.apache.kafka.common.security.authenticator.SaslServerAuthenticator$2.run(SaslServerAuthenticator.java:215)
 at 
org.apache.kafka.common.security.authenticator.SaslServerAuthenticator$2.run(SaslServerAuthenticator.java:213)
 at java.security.AccessController.doPrivileged(Native Method)
 at javax.security.auth.Subject.doAs(Subject.java:422)
 at 
org.apache.kafka.common.security.authenticator.SaslServerAuthenticator.createSaslKerberosServer(SaslServerAuthenticator.java:213)
 at 
org.apache.kafka.common.security.authenticator.SaslServerAuthenticator.createSaslServer(SaslServerAuthenticator.java:162)
 at 
org.apache.kafka.common.security.authenticator.SaslServerAuthenticator.handleKafkaRequest(SaslServerAuthenticator.java:443)
 at 
org.apache.kafka.common.security.authenticator.SaslServerAuthenticator.authenticate(SaslServerAuthenticator.java:253)
 at org.apache.kafka.common.network.KafkaChannel.prepare(KafkaChannel.java:127)
 at 
org.apache.kafka.common.network.Selector.pollSelectionKeys(Selector.java:487)
 at org.apache.kafka.common.network.Selector.poll(Selector.java:425)
 at kafka.network.Processor.poll(SocketServer.scala:678)
 at kafka.network.Processor.run(SocketServer.scala:583)
 at java.lang.Thread.run(Thread.java:745)

Locked ownable synchronizers:
 - None

 

This deadlock is causing the kafka broker to run at 100% causing exceptions in 
other brokers and affecting replication and other functionalities.

+output of top:+

PID USER PR NI VIRT RES SHR S *%CPU* %MEM TIME+ COMMAND 
 30857 kafkatst 20 0 14.9g 3.9g 17600 S *100.0* 12.7 2727:53 java

The connections with other brokers are in stuck in CLOSE_WAIT state:

afka...@kafkatest-01.tbd[nj2]:/local/kafka> lsof -p 30857 | grep TCP
java 30857 kafkatst 88u IPv4 63730863 0t0 TCP *:34982 (LISTEN)
java 30857 kafkatst 89u IPv4 63730864 0t0 TCP *:mwkafka_staging_jmx (LISTEN)
java 30857 kafkatst 108u IPv4 67204682 0t0 TCP 
kafkatest-01.tbd.xxxx.com:44540->mwkafka-zk-test-02.nyc.xxxx.com:eforward 
(ESTABLISHED)
java 30857 kafkatst 288u IPv4 63728109 0t0 TCP *:XmlIpcRegSvc (LISTEN)
java 30857 kafkatst 298u IPv4 68617055 0t0 TCP 
kafkatest-01.tbd.xxxx.com:XmlIpcRegSvc->kafkatest-01.dr.xxxx.com:34318 
(ESTABLISHED)
java 30857 kafkatst 305u IPv4 63722102 0t0 TCP 
kafkatest-01.tbd.xxxx.com:41354->mwkafka-zk-test-05.tbd.xxxx.com:eforward 
(ESTABLISHED)
java 30857 kafkatst 308u IPv4 68616940 0t0 TCP 
kafkatest-01.tbd.xxxx.com:XmlIpcRegSvc->kafkatest-02.dr.xxxx.com:37050 
(CLOSE_WAIT)
java 30857 kafkatst 318u IPv4 63728144 0t0 TCP 
kafkatest-01.tbd.xxxx.com:m3ap->kafkatest-01.dr.xxxx.com:XmlIpcRegSvc 
(ESTABLISHED)
java 30857 kafkatst 319u IPv4 63722104 0t0 TCP 
kafkatest-01.tbd.xxxx.com:50960->kafkatest-01.nyc.xxxx.com:XmlIpcRegSvc 
(ESTABLISHED)
java 30857 kafkatst 320u IPv4 63722109 0t0 TCP 
kafkatest-01.tbd.xxxx.com:42892->kafkatest-02.nyc.xxxx.com:XmlIpcRegSvc 
(ESTABLISHED)
java 30857 kafkatst 321u IPv4 68616946 0t0 TCP 
kafkatest-01.tbd.xxxx.com:XmlIpcRegSvc->kafkatest-02.nyc.xxxx.com:54892 
(CLOSE_WAIT)
java 30857 kafkatst 322u IPv4 68617070 0t0 TCP 
kafkatest-01.tbd.xxxx.com:XmlIpcRegSvc->kafkatest-01.nyc.xxxx.com:47308 
(ESTABLISHED)
java 30857 kafkatst 323u IPv4 68617081 0t0 TCP 
kafkatest-01.tbd.xxxx.com:XmlIpcRegSvc->kafkatest-02.dr.xxxx.com:37060 
(ESTABLISHED)
java 30857 kafkatst 324u IPv4 68617095 0t0 TCP 
kafkatest-01.tbd.xxxx.com:XmlIpcRegSvc->kafkatest-02.nyc.xxxx.com:54904 
(ESTABLISHED)
java 30857 kafkatst 327u IPv4 68616885 0t0 TCP 
kafkatest-01.tbd.xxxx.com:XmlIpcRegSvc->kafkatest-01.dr.xxxx.com:34298 
(CLOSE_WAIT)
java 30857 kafkatst 328u IPv4 68616894 0t0 TCP 
kafkatest-01.tbd.xxxx.com:XmlIpcRegSvc->kafkatest-01.nyc.xxxx.com:47290 
(CLOSE_WAIT)

 

I have attached a full thread dump here for reference. Please check this.



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

Reply via email to