James Blackburn created KAFKA-1458: -------------------------------------- Summary: kafka hanging on shutdown Key: KAFKA-1458 URL: https://issues.apache.org/jira/browse/KAFKA-1458 Project: Kafka Issue Type: Bug Components: core Affects Versions: 0.8.1 Reporter: James Blackburn
I tried to restart the kafka broker because of KAFKA-1407. However a normal kill wouldn't kill it: jstack shows: {code} Full thread dump Java HotSpot(TM) 64-Bit Server VM (24.0-b56 mixed mode): "Attach Listener" daemon prio=10 tid=0x000000001c2e8800 nid=0x6174 waiting on condition [0x0000000000000000] java.lang.Thread.State: RUNNABLE "SIGTERM handler" daemon prio=10 tid=0x000000001c377800 nid=0x6076 waiting for monitor entry [0x00000000431f2000] java.lang.Thread.State: BLOCKED (on object monitor) at java.lang.Shutdown.exit(Shutdown.java:212) - waiting to lock <0x00000000d042f068> (a java.lang.Class for java.lang.Shutdown) at java.lang.Terminator$1.handle(Terminator.java:52) at sun.misc.Signal$1.run(Signal.java:212) at java.lang.Thread.run(Thread.java:724) "SIGTERM handler" daemon prio=10 tid=0x000000001c652800 nid=0x6069 waiting for monitor entry [0x00000000430f1000] java.lang.Thread.State: BLOCKED (on object monitor) at java.lang.Shutdown.exit(Shutdown.java:212) - waiting to lock <0x00000000d042f068> (a java.lang.Class for java.lang.Shutdown) at java.lang.Terminator$1.handle(Terminator.java:52) at sun.misc.Signal$1.run(Signal.java:212) at java.lang.Thread.run(Thread.java:724) "SIGTERM handler" daemon prio=10 tid=0x000000001c204000 nid=0x6068 waiting for monitor entry [0x0000000044303000] java.lang.Thread.State: BLOCKED (on object monitor) at java.lang.Shutdown.exit(Shutdown.java:212) - waiting to lock <0x00000000d042f068> (a java.lang.Class for java.lang.Shutdown) at java.lang.Terminator$1.handle(Terminator.java:52) at sun.misc.Signal$1.run(Signal.java:212) at java.lang.Thread.run(Thread.java:724) "SIGTERM handler" daemon prio=10 tid=0x000000001c319000 nid=0x605b waiting for monitor entry [0x00000000409f3000] java.lang.Thread.State: BLOCKED (on object monitor) at java.lang.Shutdown.exit(Shutdown.java:212) - waiting to lock <0x00000000d042f068> (a java.lang.Class for java.lang.Shutdown) at java.lang.Terminator$1.handle(Terminator.java:52) at sun.misc.Signal$1.run(Signal.java:212) at java.lang.Thread.run(Thread.java:724) "SIGTERM handler" daemon prio=10 tid=0x000000001c625000 nid=0x604c waiting for monitor entry [0x00000000439fa000] java.lang.Thread.State: BLOCKED (on object monitor) at java.lang.Shutdown.exit(Shutdown.java:212) - waiting to lock <0x00000000d042f068> (a java.lang.Class for java.lang.Shutdown) at java.lang.Terminator$1.handle(Terminator.java:52) at sun.misc.Signal$1.run(Signal.java:212) at java.lang.Thread.run(Thread.java:724) "SIGTERM handler" daemon prio=10 tid=0x000000001c2e9800 nid=0x5d8b waiting for monitor entry [0x00000000438f9000] java.lang.Thread.State: BLOCKED (on object monitor) at java.lang.Shutdown.exit(Shutdown.java:212) - waiting to lock <0x00000000d042f068> (a java.lang.Class for java.lang.Shutdown) at java.lang.Terminator$1.handle(Terminator.java:52) at sun.misc.Signal$1.run(Signal.java:212) at java.lang.Thread.run(Thread.java:724) "Thread-2" prio=10 tid=0x000000001c31a000 nid=0x3d4f waiting on condition [0x0000000044707000] java.lang.Thread.State: WAITING (parking) at sun.misc.Unsafe.park(Native Method) - parking to wait for <0x00000000d04f28b8> (a java.util.concurrent.locks.ReentrantLock$NonfairSync) at java.util.concurrent.locks.LockSupport.park(LockSupport.java:186) at java.util.concurrent.locks.AbstractQueuedSynchronizer.parkAndCheckInterrupt(AbstractQueuedSynchronizer.java:834) at java.util.concurrent.locks.AbstractQueuedSynchronizer.acquireQueued(AbstractQueuedSynchronizer.java:867) at java.util.concurrent.locks.AbstractQueuedSynchronizer.acquire(AbstractQueuedSynchronizer.java:1197) at java.util.concurrent.locks.ReentrantLock$NonfairSync.lock(ReentrantLock.java:214) at java.util.concurrent.locks.ReentrantLock.lock(ReentrantLock.java:290) at kafka.utils.Utils$.inLock(Utils.scala:536) at kafka.controller.KafkaController.shutdown(KafkaController.scala:641) at kafka.server.KafkaServer$$anonfun$shutdown$8.apply$mcV$sp(KafkaServer.scala:233) at kafka.utils.Utils$.swallow(Utils.scala:167) at kafka.utils.Logging$class.swallowWarn(Logging.scala:92) at kafka.utils.Utils$.swallowWarn(Utils.scala:46) at kafka.utils.Logging$class.swallow(Logging.scala:94) at kafka.utils.Utils$.swallow(Utils.scala:46) at kafka.server.KafkaServer.shutdown(KafkaServer.scala:233) at kafka.server.KafkaServerStartable.shutdown(KafkaServerStartable.scala:46) at kafka.Kafka$$anon$1.run(Kafka.scala:42) "SIGTERM handler" daemon prio=10 tid=0x000000001c5ac800 nid=0x3d4d in Object.wait() [0x0000000040d5a000] java.lang.Thread.State: WAITING (on object monitor) at java.lang.Object.wait(Native Method) at java.lang.Thread.join(Thread.java:1260) - locked <0x00000000d0410f90> (a kafka.Kafka$$anon$1) at java.lang.Thread.join(Thread.java:1334) at java.lang.ApplicationShutdownHooks.runHooks(ApplicationShutdownHooks.java:106) at java.lang.ApplicationShutdownHooks$1.run(ApplicationShutdownHooks.java:46) at java.lang.Shutdown.runHooks(Shutdown.java:123) at java.lang.Shutdown.sequence(Shutdown.java:167) at java.lang.Shutdown.exit(Shutdown.java:212) - locked <0x00000000d042f068> (a java.lang.Class for java.lang.Shutdown) at java.lang.Terminator$1.handle(Terminator.java:52) at sun.misc.Signal$1.run(Signal.java:212) at java.lang.Thread.run(Thread.java:724) "main-EventThread" daemon prio=10 tid=0x000000001c939800 nid=0x3504 waiting on condition [0x0000000044606000] java.lang.Thread.State: WAITING (parking) at sun.misc.Unsafe.park(Native Method) - parking to wait for <0x00000000d69e0f48> (a java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject) at java.util.concurrent.locks.LockSupport.park(LockSupport.java:186) at java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.await(AbstractQueuedSynchronizer.java:2043) at java.util.concurrent.LinkedBlockingQueue.take(LinkedBlockingQueue.java:442) at org.apache.zookeeper.ClientCnxn$EventThread.run(ClientCnxn.java:503) "main-SendThread(reggie-svc-257:2181)" daemon prio=10 tid=0x000000001c5ca000 nid=0x3503 runnable [0x0000000041030000] java.lang.Thread.State: RUNNABLE at sun.nio.ch.EPollArrayWrapper.epollWait(Native Method) at sun.nio.ch.EPollArrayWrapper.poll(EPollArrayWrapper.java:257) at sun.nio.ch.EPollSelectorImpl.doSelect(EPollSelectorImpl.java:79) at sun.nio.ch.SelectorImpl.lockAndDoSelect(SelectorImpl.java:87) - locked <0x00000000fa134dc0> (a sun.nio.ch.Util$2) - locked <0x00000000fa134dd8> (a java.util.Collections$UnmodifiableSet) - locked <0x00000000e2348030> (a sun.nio.ch.EPollSelectorImpl) at sun.nio.ch.SelectorImpl.select(SelectorImpl.java:98) at org.apache.zookeeper.ClientCnxn$SendThread.run(ClientCnxn.java:1134) "Controller-1-to-broker-0-send-thread" prio=10 tid=0x00002aaab0731800 nid=0x4535 waiting on condition [0x000000004031f000] java.lang.Thread.State: WAITING (parking) at sun.misc.Unsafe.park(Native Method) - parking to wait for <0x00000000d13d3878> (a java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject) at java.util.concurrent.locks.LockSupport.park(LockSupport.java:186) at java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.await(AbstractQueuedSynchronizer.java:2043) at java.util.concurrent.LinkedBlockingQueue.take(LinkedBlockingQueue.java:442) at kafka.controller.RequestSendThread.doWork(ControllerChannelManager.scala:121) at kafka.utils.ShutdownableThread.run(ShutdownableThread.scala:51) "delete-topics-thread" prio=10 tid=0x00002aaab10f8800 nid=0x4e2a waiting on condition [0x0000000044505000] java.lang.Thread.State: WAITING (parking) at sun.misc.Unsafe.park(Native Method) - parking to wait for <0x00000000d04f28b8> (a java.util.concurrent.locks.ReentrantLock$NonfairSync) at java.util.concurrent.locks.LockSupport.park(LockSupport.java:186) at java.util.concurrent.locks.AbstractQueuedSynchronizer.parkAndCheckInterrupt(AbstractQueuedSynchronizer.java:834) at java.util.concurrent.locks.AbstractQueuedSynchronizer.acquireQueued(AbstractQueuedSynchronizer.java:867) at java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.await(AbstractQueuedSynchronizer.java:2047) at kafka.controller.TopicDeletionManager.kafka$controller$TopicDeletionManager$$awaitTopicDeletionNotification(TopicDeletionManager.scala:178) at kafka.controller.TopicDeletionManager$DeleteTopicsThread$$anonfun$doWork$1.apply$mcV$sp(TopicDeletionManager.scala:334) at kafka.controller.TopicDeletionManager$DeleteTopicsThread$$anonfun$doWork$1.apply(TopicDeletionManager.scala:333) at kafka.controller.TopicDeletionManager$DeleteTopicsThread$$anonfun$doWork$1.apply(TopicDeletionManager.scala:333) at kafka.utils.Utils$.inLock(Utils.scala:538) at kafka.controller.TopicDeletionManager$DeleteTopicsThread.doWork(TopicDeletionManager.scala:333) at kafka.utils.ShutdownableThread.run(ShutdownableThread.scala:51) "Controller-1-to-broker-1-send-thread" prio=10 tid=0x000000001c714800 nid=0x4e1d waiting on condition [0x0000000044404000] java.lang.Thread.State: WAITING (parking) at sun.misc.Unsafe.park(Native Method) - parking to wait for <0x00000000d06f96f8> (a java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject) at java.util.concurrent.locks.LockSupport.park(LockSupport.java:186) at java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.await(AbstractQueuedSynchronizer.java:2043) at java.util.concurrent.LinkedBlockingQueue.take(LinkedBlockingQueue.java:442) at kafka.controller.RequestSendThread.doWork(ControllerChannelManager.scala:121) at kafka.utils.ShutdownableThread.run(ShutdownableThread.scala:51) "metrics-meter-tick-thread-2" daemon prio=10 tid=0x000000001c600000 nid=0x2add waiting on condition [0x00000000437f8000] java.lang.Thread.State: WAITING (parking) at sun.misc.Unsafe.park(Native Method) - parking to wait for <0x00000000d0363bf0> (a java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject) at java.util.concurrent.locks.LockSupport.park(LockSupport.java:186) at java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.await(AbstractQueuedSynchronizer.java:2043) at java.util.concurrent.ScheduledThreadPoolExecutor$DelayedWorkQueue.take(ScheduledThreadPoolExecutor.java:1085) at java.util.concurrent.ScheduledThreadPoolExecutor$DelayedWorkQueue.take(ScheduledThreadPoolExecutor.java:807) at java.util.concurrent.ThreadPoolExecutor.getTask(ThreadPoolExecutor.java:1068) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1130) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) at java.lang.Thread.run(Thread.java:724) "metrics-meter-tick-thread-1" daemon prio=10 tid=0x000000001c5ce000 nid=0x2adb waiting on condition [0x00000000436f7000] java.lang.Thread.State: TIMED_WAITING (parking) at sun.misc.Unsafe.park(Native Method) - parking to wait for <0x00000000d0363bf0> (a java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject) at java.util.concurrent.locks.LockSupport.parkNanos(LockSupport.java:226) at java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.awaitNanos(AbstractQueuedSynchronizer.java:2082) at java.util.concurrent.ScheduledThreadPoolExecutor$DelayedWorkQueue.take(ScheduledThreadPoolExecutor.java:1090) at java.util.concurrent.ScheduledThreadPoolExecutor$DelayedWorkQueue.take(ScheduledThreadPoolExecutor.java:807) at java.util.concurrent.ThreadPoolExecutor.getTask(ThreadPoolExecutor.java:1068) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1130) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) at java.lang.Thread.run(Thread.java:724) "ZkClient-EventThread-13-reggie-svc-257:2181,reggie-svc-258:2181" daemon prio=10 tid=0x00002aaab04e1800 nid=0x2a2b waiting on condition [0x0000000040c59000] java.lang.Thread.State: WAITING (parking) at sun.misc.Unsafe.park(Native Method) - parking to wait for <0x00000000d03658e8> (a java.util.concurrent.CountDownLatch$Sync) at java.util.concurrent.locks.LockSupport.park(LockSupport.java:186) at java.util.concurrent.locks.AbstractQueuedSynchronizer.parkAndCheckInterrupt(AbstractQueuedSynchronizer.java:834) at java.util.concurrent.locks.AbstractQueuedSynchronizer.doAcquireSharedInterruptibly(AbstractQueuedSynchronizer.java:994) at java.util.concurrent.locks.AbstractQueuedSynchronizer.acquireSharedInterruptibly(AbstractQueuedSynchronizer.java:1303) at java.util.concurrent.CountDownLatch.await(CountDownLatch.java:236) at kafka.utils.ShutdownableThread.shutdown(ShutdownableThread.scala:36) at kafka.controller.TopicDeletionManager.shutdown(TopicDeletionManager.scala:93) at kafka.controller.KafkaController$$anonfun$onControllerResignation$1.apply$mcV$sp(KafkaController.scala:340) at kafka.controller.KafkaController$$anonfun$onControllerResignation$1.apply(KafkaController.scala:337) at kafka.controller.KafkaController$$anonfun$onControllerResignation$1.apply(KafkaController.scala:337) at kafka.utils.Utils$.inLock(Utils.scala:538) at kafka.controller.KafkaController.onControllerResignation(KafkaController.scala:337) at kafka.controller.KafkaController$SessionExpirationListener$$anonfun$handleNewSession$1.apply$mcZ$sp(KafkaController.scala:1068) at kafka.controller.KafkaController$SessionExpirationListener$$anonfun$handleNewSession$1.apply(KafkaController.scala:1067) at kafka.controller.KafkaController$SessionExpirationListener$$anonfun$handleNewSession$1.apply(KafkaController.scala:1067) at kafka.utils.Utils$.inLock(Utils.scala:538) at kafka.controller.KafkaController$SessionExpirationListener.handleNewSession(KafkaController.scala:1067) at org.I0Itec.zkclient.ZkClient$4.run(ZkClient.java:472) at org.I0Itec.zkclient.ZkEventThread.run(ZkEventThread.java:71) "RMI TCP Accept-0" daemon prio=10 tid=0x00002aaab0316800 nid=0x298f runnable [0x0000000040731000] java.lang.Thread.State: RUNNABLE at java.net.PlainSocketImpl.socketAccept(Native Method) at java.net.AbstractPlainSocketImpl.accept(AbstractPlainSocketImpl.java:398) at java.net.ServerSocket.implAccept(ServerSocket.java:530) at java.net.ServerSocket.accept(ServerSocket.java:498) at sun.management.jmxremote.LocalRMIServerSocketFactory$1.accept(LocalRMIServerSocketFactory.java:52) at sun.rmi.transport.tcp.TCPTransport$AcceptLoop.executeAcceptLoop(TCPTransport.java:388) at sun.rmi.transport.tcp.TCPTransport$AcceptLoop.run(TCPTransport.java:360) at java.lang.Thread.run(Thread.java:724) "Service Thread" daemon prio=10 tid=0x00002aaab0242000 nid=0x2979 runnable [0x0000000000000000] java.lang.Thread.State: RUNNABLE "C2 CompilerThread1" daemon prio=10 tid=0x00002aaab023f800 nid=0x2978 waiting on condition [0x0000000000000000] java.lang.Thread.State: RUNNABLE "C2 CompilerThread0" daemon prio=10 tid=0x00002aaab023d800 nid=0x2977 waiting on condition [0x0000000000000000] java.lang.Thread.State: RUNNABLE "Signal Dispatcher" daemon prio=10 tid=0x00002aaab023b000 nid=0x2976 runnable [0x0000000000000000] java.lang.Thread.State: RUNNABLE "Surrogate Locker Thread (Concurrent GC)" daemon prio=10 tid=0x00002aaab0239000 nid=0x2974 waiting on condition [0x0000000000000000] java.lang.Thread.State: RUNNABLE "Finalizer" daemon prio=10 tid=0x00002aaab021b000 nid=0x296e in Object.wait() [0x0000000041a6d000] java.lang.Thread.State: WAITING (on object monitor) at java.lang.Object.wait(Native Method) at java.lang.ref.ReferenceQueue.remove(ReferenceQueue.java:135) - locked <0x00000000d0351828> (a java.lang.ref.ReferenceQueue$Lock) at java.lang.ref.ReferenceQueue.remove(ReferenceQueue.java:151) at java.lang.ref.Finalizer$FinalizerThread.run(Finalizer.java:189) "Reference Handler" daemon prio=10 tid=0x00002aaab0217000 nid=0x296d in Object.wait() [0x0000000040b58000] java.lang.Thread.State: WAITING (on object monitor) at java.lang.Object.wait(Native Method) at java.lang.Object.wait(Object.java:503) at java.lang.ref.Reference$ReferenceHandler.run(Reference.java:133) - locked <0x00000000d0356c10> (a java.lang.ref.Reference$Lock) "main" prio=10 tid=0x000000001c152800 nid=0x2945 waiting on condition [0x000000004021e000] java.lang.Thread.State: WAITING (parking) at sun.misc.Unsafe.park(Native Method) - parking to wait for <0x00000000d0355760> (a java.util.concurrent.CountDownLatch$Sync) at java.util.concurrent.locks.LockSupport.park(LockSupport.java:186) at java.util.concurrent.locks.AbstractQueuedSynchronizer.parkAndCheckInterrupt(AbstractQueuedSynchronizer.java:834) at java.util.concurrent.locks.AbstractQueuedSynchronizer.doAcquireSharedInterruptibly(AbstractQueuedSynchronizer.java:994) at java.util.concurrent.locks.AbstractQueuedSynchronizer.acquireSharedInterruptibly(AbstractQueuedSynchronizer.java:1303) at java.util.concurrent.CountDownLatch.await(CountDownLatch.java:236) at kafka.server.KafkaServer.awaitShutdown(KafkaServer.scala:246) at kafka.server.KafkaServerStartable.awaitShutdown(KafkaServerStartable.scala:56) at kafka.Kafka$.main(Kafka.scala:47) at kafka.Kafka.main(Kafka.scala) "VM Thread" prio=10 tid=0x00002aaab0214800 nid=0x296b runnable "Gang worker#0 (Parallel GC Threads)" prio=10 tid=0x000000001c164000 nid=0x2946 runnable "Gang worker#1 (Parallel GC Threads)" prio=10 tid=0x000000001c165800 nid=0x2947 runnable "Gang worker#2 (Parallel GC Threads)" prio=10 tid=0x00002aaab0001000 nid=0x2949 runnable "Gang worker#3 (Parallel GC Threads)" prio=10 tid=0x00002aaab0003000 nid=0x294b runnable "Gang worker#4 (Parallel GC Threads)" prio=10 tid=0x00002aaab0004800 nid=0x294c runnable "Gang worker#5 (Parallel GC Threads)" prio=10 tid=0x00002aaab0006800 nid=0x294e runnable "Gang worker#6 (Parallel GC Threads)" prio=10 tid=0x00002aaab0008800 nid=0x294f runnable "Gang worker#7 (Parallel GC Threads)" prio=10 tid=0x00002aaab000a000 nid=0x2950 runnable "Gang worker#8 (Parallel GC Threads)" prio=10 tid=0x00002aaab000c000 nid=0x2952 runnable "Gang worker#9 (Parallel GC Threads)" prio=10 tid=0x00002aaab000e000 nid=0x2954 runnable "Gang worker#10 (Parallel GC Threads)" prio=10 tid=0x00002aaab0010000 nid=0x2955 runnable "Gang worker#11 (Parallel GC Threads)" prio=10 tid=0x00002aaab0011800 nid=0x2956 runnable "Gang worker#12 (Parallel GC Threads)" prio=10 tid=0x00002aaab0013800 nid=0x2957 runnable "Gang worker#13 (Parallel GC Threads)" prio=10 tid=0x00002aaab0015800 nid=0x2959 runnable "Gang worker#14 (Parallel GC Threads)" prio=10 tid=0x00002aaab0017000 nid=0x295b runnable "Gang worker#15 (Parallel GC Threads)" prio=10 tid=0x00002aaab0019000 nid=0x295d runnable "Gang worker#16 (Parallel GC Threads)" prio=10 tid=0x00002aaab001b000 nid=0x295e runnable "Gang worker#17 (Parallel GC Threads)" prio=10 tid=0x00002aaab001d000 nid=0x2961 runnable "Concurrent Mark-Sweep GC Thread" prio=10 tid=0x00002aaab01d0000 nid=0x296a runnable "Gang worker#0 (Parallel CMS Threads)" prio=10 tid=0x00002aaab01c6000 nid=0x2962 runnable "Gang worker#1 (Parallel CMS Threads)" prio=10 tid=0x00002aaab01c8000 nid=0x2963 runnable "Gang worker#2 (Parallel CMS Threads)" prio=10 tid=0x00002aaab01c9800 nid=0x2964 runnable "Gang worker#3 (Parallel CMS Threads)" prio=10 tid=0x00002aaab01cb800 nid=0x2967 runnable "Gang worker#4 (Parallel CMS Threads)" prio=10 tid=0x00002aaab01cd800 nid=0x2968 runnable "VM Periodic Task Thread" prio=10 tid=0x00002aaab0326800 nid=0x2997 waiting on condition JNI global references: 162 {code} -- This message was sent by Atlassian JIRA (v6.2#6252)