[ https://issues.apache.org/jira/browse/HIVE-28617?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
László Bodor updated HIVE-28617: -------------------------------- Description: in case of an IOW sometimes the MoveTask takes as much as the original DAG, like below (mind the 1000s difference between the RunDAG and the overall query time): {code} INFO : Run DAG 2125.73s ... VERTICES: 04/04 [==========================>>] 100% ELAPSED TIME: 3193.96 s {code} I can see this thread in HS2 after the end of DAG: {code} "HiveServer2-Background-Pool: Thread-33224" #33224 prio=5 os_prio=0 cpu=1956.78ms elapsed=3190.58s tid=0x00007f2b92e4e800 nid=0x1a6d9 waiting on condition [0x00007f2b7c840000] java.lang.Thread.State: WAITING (parking) at jdk.internal.misc.Unsafe.park(java.base@11.0.24/Native Method) - parking to wait for <0x00000006015666f0> (a java.util.concurrent.FutureTask) at java.util.concurrent.locks.LockSupport.park(java.base@11.0.24/LockSupport.java:194) at java.util.concurrent.FutureTask.awaitDone(java.base@11.0.24/FutureTask.java:447) at java.util.concurrent.FutureTask.get(java.base@11.0.24/FutureTask.java:190) at org.apache.hadoop.hive.ql.metadata.Hive.moveFile(Hive.java:5214) at org.apache.hadoop.hive.ql.exec.MoveTask.moveFileInDfs(MoveTask.java:237) at org.apache.hadoop.hive.ql.exec.MoveTask.moveFile(MoveTask.java:192) at org.apache.hadoop.hive.ql.exec.MoveTask.execute(MoveTask.java:438) at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:213) at org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:105) at org.apache.hadoop.hive.ql.Executor.launchTask(Executor.java:356) at org.apache.hadoop.hive.ql.Executor.launchTasks(Executor.java:329) at org.apache.hadoop.hive.ql.Executor.runTasks(Executor.java:246) at org.apache.hadoop.hive.ql.Executor.execute(Executor.java:107) at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:809) at org.apache.hadoop.hive.ql.Driver.run(Driver.java:546) at org.apache.hadoop.hive.ql.Driver.run(Driver.java:540) at org.apache.hadoop.hive.ql.reexec.ReExecDriver.run(ReExecDriver.java:190) at org.apache.hive.service.cli.operation.SQLOperation.runQuery(SQLOperation.java:235) at org.apache.hive.service.cli.operation.SQLOperation.access$700(SQLOperation.java:92) at org.apache.hive.service.cli.operation.SQLOperation$BackgroundWork$1.run(SQLOperation.java:340) at java.security.AccessController.doPrivileged(java.base@11.0.24/Native Method) at javax.security.auth.Subject.doAs(java.base@11.0.24/Subject.java:423) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1899) at org.apache.hive.service.cli.operation.SQLOperation$BackgroundWork.run(SQLOperation.java:360) at java.util.concurrent.Executors$RunnableAdapter.call(java.base@11.0.24/Executors.java:515) at java.util.concurrent.FutureTask.run(java.base@11.0.24/FutureTask.java:264) at java.util.concurrent.Executors$RunnableAdapter.call(java.base@11.0.24/Executors.java:515) at java.util.concurrent.FutureTask.run(java.base@11.0.24/FutureTask.java:264) at java.util.concurrent.ThreadPoolExecutor.runWorker(java.base@11.0.24/ThreadPoolExecutor.java:1128) at java.util.concurrent.ThreadPoolExecutor$Worker.run(java.base@11.0.24/ThreadPoolExecutor.java:628) at java.lang.Thread.run(java.base@11.0.24/Thread.java:829) {code} in the HS2 log it's displayed like: {code} hiveserver2 <15>1 2024-11-07T16:09:03.534Z hiveserver2-0 hiveserver2 1 7e9f692b-2bff-487f-92a6-a41312e15850 [mdc@38374 class="log.PerfLogger" dagId="dag_1730992536558_0006_1" level="DEBUG" operationLogLevel="EXECUTION" queryId="hive_20241107151538_c450a4aa-0e0f-445a-a080-45fb12b84e93" sessionId="9c37f653-3e22-4276-9189-cfec9382abc5" thread="HiveServer2-Background-Pool: Thread-33224"] </PERFLOG method=FileMoves start=1730995520789 end=1730995743534 duration=222745 from=MoveTask> {code} I think the summary should be extended to: {code} INFO : Query Execution Summary INFO : ---------------------------------------------------------------------------------------------- INFO : OPERATION DURATION INFO : ---------------------------------------------------------------------------------------------- INFO : Compile Query 0.47s INFO : Prepare Plan 0.15s INFO : Get Query Coordinator (AM) 0.07s INFO : Submit Plan 0.01s INFO : Start DAG 0.01s INFO : Run DAG 1616.78s INFO : Move Files 1000s INFO : ---------------------------------------------------------------------------------------------- {code} was: in case of an IOW sometimes the MoveTask takes as much as the original DAG, like below (mind the 1000s difference between the RunDAG and the overall query time): {code} INFO : Run DAG 2125.73s ... VERTICES: 04/04 [==========================>>] 100% ELAPSED TIME: 3193.96 s {code} I can see this thread in HS2 after the end of DAG: {code} "HiveServer2-Background-Pool: Thread-33224" #33224 prio=5 os_prio=0 cpu=1956.78ms elapsed=3190.58s tid=0x00007f2b92e4e800 nid=0x1a6d9 waiting on condition [0x00007f2b7c840000] java.lang.Thread.State: WAITING (parking) at jdk.internal.misc.Unsafe.park(java.base@11.0.24/Native Method) - parking to wait for <0x00000006015666f0> (a java.util.concurrent.FutureTask) at java.util.concurrent.locks.LockSupport.park(java.base@11.0.24/LockSupport.java:194) at java.util.concurrent.FutureTask.awaitDone(java.base@11.0.24/FutureTask.java:447) at java.util.concurrent.FutureTask.get(java.base@11.0.24/FutureTask.java:190) at org.apache.hadoop.hive.ql.metadata.Hive.moveFile(Hive.java:5214) at org.apache.hadoop.hive.ql.exec.MoveTask.moveFileInDfs(MoveTask.java:237) at org.apache.hadoop.hive.ql.exec.MoveTask.moveFile(MoveTask.java:192) at org.apache.hadoop.hive.ql.exec.MoveTask.execute(MoveTask.java:438) at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:213) at org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:105) at org.apache.hadoop.hive.ql.Executor.launchTask(Executor.java:356) at org.apache.hadoop.hive.ql.Executor.launchTasks(Executor.java:329) at org.apache.hadoop.hive.ql.Executor.runTasks(Executor.java:246) at org.apache.hadoop.hive.ql.Executor.execute(Executor.java:107) at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:809) at org.apache.hadoop.hive.ql.Driver.run(Driver.java:546) at org.apache.hadoop.hive.ql.Driver.run(Driver.java:540) at org.apache.hadoop.hive.ql.reexec.ReExecDriver.run(ReExecDriver.java:190) at org.apache.hive.service.cli.operation.SQLOperation.runQuery(SQLOperation.java:235) at org.apache.hive.service.cli.operation.SQLOperation.access$700(SQLOperation.java:92) at org.apache.hive.service.cli.operation.SQLOperation$BackgroundWork$1.run(SQLOperation.java:340) at java.security.AccessController.doPrivileged(java.base@11.0.24/Native Method) at javax.security.auth.Subject.doAs(java.base@11.0.24/Subject.java:423) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1899) at org.apache.hive.service.cli.operation.SQLOperation$BackgroundWork.run(SQLOperation.java:360) at java.util.concurrent.Executors$RunnableAdapter.call(java.base@11.0.24/Executors.java:515) at java.util.concurrent.FutureTask.run(java.base@11.0.24/FutureTask.java:264) at java.util.concurrent.Executors$RunnableAdapter.call(java.base@11.0.24/Executors.java:515) at java.util.concurrent.FutureTask.run(java.base@11.0.24/FutureTask.java:264) at java.util.concurrent.ThreadPoolExecutor.runWorker(java.base@11.0.24/ThreadPoolExecutor.java:1128) at java.util.concurrent.ThreadPoolExecutor$Worker.run(java.base@11.0.24/ThreadPoolExecutor.java:628) at java.lang.Thread.run(java.base@11.0.24/Thread.java:829) {code} in the HS2 log it's displayed like: {code} hiveserver2 <15>1 2024-11-07T16:09:03.534Z hiveserver2-0 hiveserver2 1 7e9f692b-2bff-487f-92a6-a41312e15850 [mdc@38374 class="log.PerfLogger" dagId="dag_1730992536558_0006_1" level="DEBUG" operationLogLevel="EXECUTION" queryId="hive_20241107151538_c450a4aa-0e0f-445a-a080-45fb12b84e93" sessionId="9c37f653-3e22-4276-9189-cfec9382abc5" thread="HiveServer2-Background-Pool: Thread-33224"] </PERFLOG method=FileMoves start=1730995520789 end=1730995743534 duration=222745 from=MoveTask> {code} I think the summary should be extended to: {code} INFO : Query Execution Summary INFO : ---------------------------------------------------------------------------------------------- INFO : OPERATION DURATION INFO : ---------------------------------------------------------------------------------------------- INFO : Compile Query 0.47s INFO : Prepare Plan 0.15s INFO : Get Query Coordinator (AM) 0.07s INFO : Submit Plan 0.01s INFO : Start DAG 0.01s INFO : Run DAG 1616.78s INFO : Moving Files 1000s INFO : ---------------------------------------------------------------------------------------------- {code} > Display MoveTask's duration on the query summary > ------------------------------------------------ > > Key: HIVE-28617 > URL: https://issues.apache.org/jira/browse/HIVE-28617 > Project: Hive > Issue Type: Bug > Security Level: Public(Viewable by anyone) > Reporter: László Bodor > Assignee: László Bodor > Priority: Major > > in case of an IOW sometimes the MoveTask takes as much as the original DAG, > like below (mind the 1000s difference between the RunDAG and the overall > query time): > {code} > INFO : Run DAG 2125.73s > ... > VERTICES: 04/04 [==========================>>] 100% ELAPSED TIME: 3193.96 s > {code} > I can see this thread in HS2 after the end of DAG: > {code} > "HiveServer2-Background-Pool: Thread-33224" #33224 prio=5 os_prio=0 > cpu=1956.78ms elapsed=3190.58s tid=0x00007f2b92e4e800 nid=0x1a6d9 waiting on > condition [0x00007f2b7c840000] > java.lang.Thread.State: WAITING (parking) > at jdk.internal.misc.Unsafe.park(java.base@11.0.24/Native Method) > - parking to wait for <0x00000006015666f0> (a > java.util.concurrent.FutureTask) > at > java.util.concurrent.locks.LockSupport.park(java.base@11.0.24/LockSupport.java:194) > at > java.util.concurrent.FutureTask.awaitDone(java.base@11.0.24/FutureTask.java:447) > at > java.util.concurrent.FutureTask.get(java.base@11.0.24/FutureTask.java:190) > at org.apache.hadoop.hive.ql.metadata.Hive.moveFile(Hive.java:5214) > at > org.apache.hadoop.hive.ql.exec.MoveTask.moveFileInDfs(MoveTask.java:237) > at org.apache.hadoop.hive.ql.exec.MoveTask.moveFile(MoveTask.java:192) > at org.apache.hadoop.hive.ql.exec.MoveTask.execute(MoveTask.java:438) > at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:213) > at > org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:105) > at org.apache.hadoop.hive.ql.Executor.launchTask(Executor.java:356) > at org.apache.hadoop.hive.ql.Executor.launchTasks(Executor.java:329) > at org.apache.hadoop.hive.ql.Executor.runTasks(Executor.java:246) > at org.apache.hadoop.hive.ql.Executor.execute(Executor.java:107) > at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:809) > at org.apache.hadoop.hive.ql.Driver.run(Driver.java:546) > at org.apache.hadoop.hive.ql.Driver.run(Driver.java:540) > at > org.apache.hadoop.hive.ql.reexec.ReExecDriver.run(ReExecDriver.java:190) > at > org.apache.hive.service.cli.operation.SQLOperation.runQuery(SQLOperation.java:235) > at > org.apache.hive.service.cli.operation.SQLOperation.access$700(SQLOperation.java:92) > at > org.apache.hive.service.cli.operation.SQLOperation$BackgroundWork$1.run(SQLOperation.java:340) > at java.security.AccessController.doPrivileged(java.base@11.0.24/Native > Method) > at javax.security.auth.Subject.doAs(java.base@11.0.24/Subject.java:423) > at > org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1899) > at > org.apache.hive.service.cli.operation.SQLOperation$BackgroundWork.run(SQLOperation.java:360) > at > java.util.concurrent.Executors$RunnableAdapter.call(java.base@11.0.24/Executors.java:515) > at > java.util.concurrent.FutureTask.run(java.base@11.0.24/FutureTask.java:264) > at > java.util.concurrent.Executors$RunnableAdapter.call(java.base@11.0.24/Executors.java:515) > at > java.util.concurrent.FutureTask.run(java.base@11.0.24/FutureTask.java:264) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(java.base@11.0.24/ThreadPoolExecutor.java:1128) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(java.base@11.0.24/ThreadPoolExecutor.java:628) > at java.lang.Thread.run(java.base@11.0.24/Thread.java:829) > {code} > in the HS2 log it's displayed like: > {code} > hiveserver2 <15>1 2024-11-07T16:09:03.534Z hiveserver2-0 hiveserver2 1 > 7e9f692b-2bff-487f-92a6-a41312e15850 [mdc@38374 class="log.PerfLogger" > dagId="dag_1730992536558_0006_1" level="DEBUG" operationLogLevel="EXECUTION" > queryId="hive_20241107151538_c450a4aa-0e0f-445a-a080-45fb12b84e93" > sessionId="9c37f653-3e22-4276-9189-cfec9382abc5" > thread="HiveServer2-Background-Pool: Thread-33224"] </PERFLOG > method=FileMoves start=1730995520789 end=1730995743534 duration=222745 > from=MoveTask> > {code} > I think the summary should be extended to: > {code} > INFO : Query Execution Summary > INFO : > ---------------------------------------------------------------------------------------------- > INFO : OPERATION DURATION > INFO : > ---------------------------------------------------------------------------------------------- > INFO : Compile Query 0.47s > INFO : Prepare Plan 0.15s > INFO : Get Query Coordinator (AM) 0.07s > INFO : Submit Plan 0.01s > INFO : Start DAG 0.01s > INFO : Run DAG 1616.78s > INFO : Move Files 1000s > INFO : > ---------------------------------------------------------------------------------------------- > {code} -- This message was sent by Atlassian Jira (v8.20.10#820010)