[ https://issues.apache.org/jira/browse/HIVE-13730?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15279098#comment-15279098 ]
Wei Zheng commented on HIVE-13730: ---------------------------------- It's stuck in an infinite while loop in BytesBytesMultiHashMap.findKeySlotToWrite(). {code} $ jps 90673 TezChild 90976 TezChild 90855 TezChild 91225 Jps 82923 RemoteMavenServer 90205 surefirebooter3625226115924096543.jar 90191 Launcher 90542 DAGAppMaster $ jstack 90673 2016-05-10 15:13:47 Full thread dump Java HotSpot(TM) 64-Bit Server VM (25.74-b02 mixed mode): "Attach Listener" #138 daemon prio=9 os_prio=31 tid=0x00007feea4800000 nid=0x3d3b waiting on condition [0x0000000000000000] java.lang.Thread.State: RUNNABLE "TezTaskEventRouter{attempt_1462916018098_0001_32_01_000000_0}" #134 daemon prio=5 os_prio=31 tid=0x00007feea684f000 nid=0x692f waiting on condition [0x0000700001be7000] java.lang.Thread.State: WAITING (parking) at sun.misc.Unsafe.park(Native Method) - parking to wait for <0x00000007bc9d6490> (a java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject) at java.util.concurrent.locks.LockSupport.park(LockSupport.java:175) at java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.await(AbstractQueuedSynchronizer.java:2039) at java.util.concurrent.LinkedBlockingQueue.take(LinkedBlockingQueue.java:442) at org.apache.tez.runtime.LogicalIOProcessorRuntimeTask$1.runInternal(LogicalIOProcessorRuntimeTask.java:773) at org.apache.tez.common.RunnableWithNdc.run(RunnableWithNdc.java:35) at java.lang.Thread.run(Thread.java:745) "org.apache.hadoop.hdfs.PeerCache@35f41fc9" #22 daemon prio=5 os_prio=31 tid=0x00007feea686d800 nid=0x6a03 waiting on condition [0x0000700001cea000] java.lang.Thread.State: TIMED_WAITING (sleeping) at java.lang.Thread.sleep(Native Method) at org.apache.hadoop.hdfs.PeerCache.run(PeerCache.java:244) at org.apache.hadoop.hdfs.PeerCache.access$000(PeerCache.java:41) at org.apache.hadoop.hdfs.PeerCache$1.run(PeerCache.java:119) at java.lang.Thread.run(Thread.java:745) "TaskHeartbeatThread" #15 daemon prio=5 os_prio=31 tid=0x00007feea310c000 nid=0x6403 waiting on condition [0x00007000019e1000] java.lang.Thread.State: TIMED_WAITING (parking) at sun.misc.Unsafe.park(Native Method) - parking to wait for <0x00000007bcb6aa40> (a java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject) at java.util.concurrent.locks.LockSupport.parkNanos(LockSupport.java:215) at java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.await(AbstractQueuedSynchronizer.java:2163) at org.apache.tez.runtime.task.TaskReporter$HeartbeatCallable.call(TaskReporter.java:200) at org.apache.tez.runtime.task.TaskReporter$HeartbeatCallable.call(TaskReporter.java:128) at java.util.concurrent.FutureTask.run(FutureTask.java:266) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) at java.lang.Thread.run(Thread.java:745) "IPC Parameter Sending Thread #0" #14 daemon prio=5 os_prio=31 tid=0x00007feea0979000 nid=0x6203 waiting on condition [0x00007000018de000] java.lang.Thread.State: TIMED_WAITING (parking) at sun.misc.Unsafe.park(Native Method) - parking to wait for <0x000000078df78428> (a java.util.concurrent.SynchronousQueue$TransferStack) at java.util.concurrent.locks.LockSupport.parkNanos(LockSupport.java:215) at java.util.concurrent.SynchronousQueue$TransferStack.awaitFulfill(SynchronousQueue.java:460) at java.util.concurrent.SynchronousQueue$TransferStack.transfer(SynchronousQueue.java:362) at java.util.concurrent.SynchronousQueue.poll(SynchronousQueue.java:941) at java.util.concurrent.ThreadPoolExecutor.getTask(ThreadPoolExecutor.java:1066) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1127) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) at java.lang.Thread.run(Thread.java:745) "IPC Client (1617838096) connection to /10.22.27.129:64289 from application_1462916018098_0001" #13 daemon prio=5 os_prio=31 tid=0x00007feea11f6800 nid=0x6003 in Object.wait() [0x00007000017db000] java.lang.Thread.State: TIMED_WAITING (on object monitor) at java.lang.Object.wait(Native Method) at org.apache.hadoop.ipc.Client$Connection.waitForWork(Client.java:920) - locked <0x000000078df52318> (a org.apache.hadoop.ipc.Client$Connection) at org.apache.hadoop.ipc.Client$Connection.run(Client.java:965) "TezChild" #12 daemon prio=5 os_prio=31 tid=0x00007feea0a65000 nid=0x5e07 runnable [0x00007000016d7000] java.lang.Thread.State: RUNNABLE at org.apache.hadoop.hive.ql.exec.persistence.BytesBytesMultiHashMap.findKeySlotToWrite(BytesBytesMultiHashMap.java:602) at org.apache.hadoop.hive.ql.exec.persistence.BytesBytesMultiHashMap.put(BytesBytesMultiHashMap.java:454) at org.apache.hadoop.hive.ql.exec.MapJoinOperator.reloadHashTable(MapJoinOperator.java:646) at org.apache.hadoop.hive.ql.exec.MapJoinOperator.continueProcess(MapJoinOperator.java:591) at org.apache.hadoop.hive.ql.exec.MapJoinOperator.closeOp(MapJoinOperator.java:528) at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:641) at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:655) at org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.close(MapRecordProcessor.java:413) at org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:186) at org.apache.hadoop.hive.ql.exec.tez.TezProcessor.run(TezProcessor.java:160) at org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.run(LogicalIOProcessorRuntimeTask.java:355) at org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:72) at org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:60) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:422) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1628) at org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:60) at org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:36) at org.apache.tez.common.CallableWithNdc.call(CallableWithNdc.java:36) at java.util.concurrent.FutureTask.run(FutureTask.java:266) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) at java.lang.Thread.run(Thread.java:745) "AsyncLogger-1" #11 daemon prio=5 os_prio=31 tid=0x00007feea1235000 nid=0x5a0f waiting on condition [0x00007000015d5000] java.lang.Thread.State: WAITING (parking) at sun.misc.Unsafe.park(Native Method) - parking to wait for <0x000000078e0657c8> (a java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject) at java.util.concurrent.locks.LockSupport.park(LockSupport.java:175) at java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.await(AbstractQueuedSynchronizer.java:2039) at com.lmax.disruptor.BlockingWaitStrategy.waitFor(BlockingWaitStrategy.java:45) at com.lmax.disruptor.ProcessingSequenceBarrier.waitFor(ProcessingSequenceBarrier.java:55) at com.lmax.disruptor.BatchEventProcessor.run(BatchEventProcessor.java:123) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) at java.lang.Thread.run(Thread.java:745) "Service Thread" #9 daemon prio=9 os_prio=31 tid=0x00007feea4801000 nid=0x5203 runnable [0x0000000000000000] java.lang.Thread.State: RUNNABLE "C1 CompilerThread3" #8 daemon prio=9 os_prio=31 tid=0x00007feea3004800 nid=0x5003 waiting on condition [0x0000000000000000] java.lang.Thread.State: RUNNABLE "C2 CompilerThread2" #7 daemon prio=9 os_prio=31 tid=0x00007feea102c800 nid=0x4e03 waiting on condition [0x0000000000000000] java.lang.Thread.State: RUNNABLE "C2 CompilerThread1" #6 daemon prio=9 os_prio=31 tid=0x00007feea1803800 nid=0x4c03 waiting on condition [0x0000000000000000] java.lang.Thread.State: RUNNABLE "C2 CompilerThread0" #5 daemon prio=9 os_prio=31 tid=0x00007feea1801000 nid=0x4a03 waiting on condition [0x0000000000000000] java.lang.Thread.State: RUNNABLE "Signal Dispatcher" #4 daemon prio=9 os_prio=31 tid=0x00007feea081c800 nid=0x3e0f runnable [0x0000000000000000] java.lang.Thread.State: RUNNABLE "Finalizer" #3 daemon prio=8 os_prio=31 tid=0x00007feea080f800 nid=0x3803 in Object.wait() [0x0000700000d3a000] java.lang.Thread.State: WAITING (on object monitor) at java.lang.Object.wait(Native Method) at java.lang.ref.ReferenceQueue.remove(ReferenceQueue.java:143) - locked <0x000000078e1a8a90> (a java.lang.ref.ReferenceQueue$Lock) at java.lang.ref.ReferenceQueue.remove(ReferenceQueue.java:164) at java.lang.ref.Finalizer$FinalizerThread.run(Finalizer.java:209) "Reference Handler" #2 daemon prio=10 os_prio=31 tid=0x00007feea3845000 nid=0x3603 in Object.wait() [0x0000700000c37000] java.lang.Thread.State: WAITING (on object monitor) at java.lang.Object.wait(Native Method) at java.lang.Object.wait(Object.java:502) at java.lang.ref.Reference.tryHandlePending(Reference.java:191) - locked <0x000000078e1a8b28> (a java.lang.ref.Reference$Lock) at java.lang.ref.Reference$ReferenceHandler.run(Reference.java:153) "main" #1 prio=5 os_prio=31 tid=0x00007feea2802000 nid=0x1703 waiting on condition [0x0000700000219000] java.lang.Thread.State: WAITING (parking) at sun.misc.Unsafe.park(Native Method) - parking to wait for <0x00000007bcb6b0d8> (a com.google.common.util.concurrent.ListenableFutureTask) at java.util.concurrent.locks.LockSupport.park(LockSupport.java:175) at java.util.concurrent.FutureTask.awaitDone(FutureTask.java:429) at java.util.concurrent.FutureTask.get(FutureTask.java:191) at org.apache.tez.runtime.task.TezTaskRunner2.run(TezTaskRunner2.java:158) at org.apache.tez.runtime.task.TezChild.run(TezChild.java:264) at org.apache.tez.runtime.task.TezChild.main(TezChild.java:508) "VM Thread" os_prio=31 tid=0x00007feea102c000 nid=0x3403 runnable "GC task thread#0 (ParallelGC)" os_prio=31 tid=0x00007feea101d000 nid=0x2403 runnable "GC task thread#1 (ParallelGC)" os_prio=31 tid=0x00007feea080a800 nid=0x2603 runnable "GC task thread#2 (ParallelGC)" os_prio=31 tid=0x00007feea3000000 nid=0x2803 runnable "GC task thread#3 (ParallelGC)" os_prio=31 tid=0x00007feea0804000 nid=0x2a03 runnable "GC task thread#4 (ParallelGC)" os_prio=31 tid=0x00007feea080d000 nid=0x2c03 runnable "GC task thread#5 (ParallelGC)" os_prio=31 tid=0x00007feea080d800 nid=0x2e03 runnable "GC task thread#6 (ParallelGC)" os_prio=31 tid=0x00007feea080e800 nid=0x3003 runnable "GC task thread#7 (ParallelGC)" os_prio=31 tid=0x00007feea080f000 nid=0x3203 runnable "VM Periodic Task Thread" os_prio=31 tid=0x00007feea481c800 nid=0x5403 waiting on condition JNI global references: 273 {code} > hybridgrace_hashjoin_1.q test gets stuck > ---------------------------------------- > > Key: HIVE-13730 > URL: https://issues.apache.org/jira/browse/HIVE-13730 > Project: Hive > Issue Type: Bug > Components: Tez > Affects Versions: 2.1.0 > Reporter: Vikram Dixit K > Assignee: Wei Zheng > Priority: Blocker > > I am seeing hybridgrace_hashjoin_1.q getting stuck on master. -- This message was sent by Atlassian JIRA (v6.3.4#6332)