[ https://issues.apache.org/jira/browse/HIVE-14363?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Hari Sankar Sivarama Subramaniyan updated HIVE-14363: ----------------------------------------------------- Attachment: HIVE-14363.final.patch Adding comments, the issue was reproduced only in cluster so I havent put a q file test case. I am not running the unit tests again for just adding comments. > bucketmap inner join query fails due to NullPointerException in some cases > -------------------------------------------------------------------------- > > Key: HIVE-14363 > URL: https://issues.apache.org/jira/browse/HIVE-14363 > Project: Hive > Issue Type: Bug > Affects Versions: 2.2.0 > Reporter: Jagruti Varia > Assignee: Hari Sankar Sivarama Subramaniyan > Attachments: HIVE-14363.1.patch, HIVE-14363.final.patch > > > Bucketmap inner join query between bucketed tables throws following exception > when one table contains all the empty buckets while other has all the > non-empty buckets. > {noformat} > Vertex failed, vertexName=Map 2, vertexId=vertex_1466710232033_0432_4_01, > diagnostics=[Task failed, taskId=task_1466710232033_0432_4_01_000000, > diagnostics=[TaskAttempt 0 failed, info=[Error: Error while running task ( > failure ) : > attempt_1466710232033_0432_4_01_000000_0:java.lang.RuntimeException: > java.lang.RuntimeException: Map operator initialization failed > at > org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:211) > at > org.apache.hadoop.hive.ql.exec.tez.TezProcessor.run(TezProcessor.java:168) > at > org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.run(LogicalIOProcessorRuntimeTask.java:370) > at > org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:73) > at > org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:61) > at java.security.AccessController.doPrivileged(Native Method) > at javax.security.auth.Subject.doAs(Subject.java:422) > at > org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1724) > at > org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:61) > at > org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:37) > at org.apache.tez.common.CallableWithNdc.call(CallableWithNdc.java:36) > at java.util.concurrent.FutureTask.run(FutureTask.java:266) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) > at java.lang.Thread.run(Thread.java:745) > Caused by: java.lang.RuntimeException: Map operator initialization failed > at > org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.init(MapRecordProcessor.java:330) > at > org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:184) > ... 14 more > Caused by: java.lang.NullPointerException > at > org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.getKeyValueReader(MapRecordProcessor.java:372) > at > org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.initializeMapRecordSources(MapRecordProcessor.java:344) > at > org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.init(MapRecordProcessor.java:292) > ... 15 more > ], TaskAttempt 1 failed, info=[Error: Error while running task ( failure ) : > attempt_1466710232033_0432_4_01_000000_1:java.lang.RuntimeException: > java.lang.RuntimeException: Map operator initialization failed > at > org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:211) > at > org.apache.hadoop.hive.ql.exec.tez.TezProcessor.run(TezProcessor.java:168) > at > org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.run(LogicalIOProcessorRuntimeTask.java:370) > at > org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:73) > at > org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:61) > at java.security.AccessController.doPrivileged(Native Method) > at javax.security.auth.Subject.doAs(Subject.java:422) > at > org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1724) > at > org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:61) > at > org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:37) > at org.apache.tez.common.CallableWithNdc.call(CallableWithNdc.java:36) > at java.util.concurrent.FutureTask.run(FutureTask.java:266) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) > at java.lang.Thread.run(Thread.java:745) > Caused by: java.lang.RuntimeException: Map operator initialization failed > at > org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.init(MapRecordProcessor.java:330) > at > org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:184) > ... 14 more > Caused by: java.lang.NullPointerException > at > org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.getKeyValueReader(MapRecordProcessor.java:372) > at > org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.initializeMapRecordSources(MapRecordProcessor.java:344) > at > org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.init(MapRecordProcessor.java:292) > ... 15 more > ], TaskAttempt 2 failed, info=[Error: Error while running task ( failure ) : > attempt_1466710232033_0432_4_01_000000_2:java.lang.RuntimeException: > java.lang.RuntimeException: Map operator initialization failed > at > org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:211) > at > org.apache.hadoop.hive.ql.exec.tez.TezProcessor.run(TezProcessor.java:168) > at > org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.run(LogicalIOProcessorRuntimeTask.java:370) > at > org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:73) > at > org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:61) > at java.security.AccessController.doPrivileged(Native Method) > at javax.security.auth.Subject.doAs(Subject.java:422) > at > org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1724) > at > org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:61) > at > org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:37) > at org.apache.tez.common.CallableWithNdc.call(CallableWithNdc.java:36) > at java.util.concurrent.FutureTask.run(FutureTask.java:266) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) > at java.lang.Thread.run(Thread.java:745) > Caused by: java.lang.RuntimeException: Map operator initialization failed > at > org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.init(MapRecordProcessor.java:330) > at > org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:184) > ... 14 more > Caused by: java.lang.NullPointerException > at > org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.getKeyValueReader(MapRecordProcessor.java:372) > at > org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.initializeMapRecordSources(MapRecordProcessor.java:344) > at > org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.init(MapRecordProcessor.java:292) > ... 15 more > ], TaskAttempt 3 failed, info=[Error: Error while running task ( failure ) : > attempt_1466710232033_0432_4_01_000000_3:java.lang.RuntimeException: > java.lang.RuntimeException: Map operator initialization failed > at > org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:211) > at > org.apache.hadoop.hive.ql.exec.tez.TezProcessor.run(TezProcessor.java:168) > at > org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.run(LogicalIOProcessorRuntimeTask.java:370) > at > org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:73) > at > org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:61) > at java.security.AccessController.doPrivileged(Native Method) > at javax.security.auth.Subject.doAs(Subject.java:422) > at > org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1724) > at > org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:61) > at > org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:37) > at org.apache.tez.common.CallableWithNdc.call(CallableWithNdc.java:36) > at java.util.concurrent.FutureTask.run(FutureTask.java:266) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) > at java.lang.Thread.run(Thread.java:745) > Caused by: java.lang.RuntimeException: Map operator initialization failed > at > org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.init(MapRecordProcessor.java:330) > at > org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:184) > ... 14 more > Caused by: java.lang.NullPointerException > at > org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.getKeyValueReader(MapRecordProcessor.java:372) > at > org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.initializeMapRecordSources(MapRecordProcessor.java:344) > at > org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.init(MapRecordProcessor.java:292) > ... 15 more > ]], Vertex did not succeed due to OWN_TASK_FAILURE, failedTasks:1 > killedTasks:59, Vertex vertex_1466710232033_0432_4_01 [Map 2] killed/failed > due to:OWN_TASK_FAILURE] > DAG did not succeed due to VERTEX_FAILURE. failedVertices:1 killedVertices:0 > FAILED: Execution Error, return code 2 from > org.apache.hadoop.hive.ql.exec.tez.TezTask. Vertex failed, vertexName=Map 2, > vertexId=vertex_1466710232033_0432_4_01, diagnostics=[Task failed, > taskId=task_1466710232033_0432_4_01_000000, diagnostics=[TaskAttempt 0 > failed, info=[Error: Error while running task ( failure ) : > attempt_1466710232033_0432_4_01_000000_0:java.lang.RuntimeException: > java.lang.RuntimeException: Map operator initialization failed > at > org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:211) > at > org.apache.hadoop.hive.ql.exec.tez.TezProcessor.run(TezProcessor.java:168) > at > org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.run(LogicalIOProcessorRuntimeTask.java:370) > at > org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:73) > at > org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:61) > at java.security.AccessController.doPrivileged(Native Method) > at javax.security.auth.Subject.doAs(Subject.java:422) > at > org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1724) > at > org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:61) > at > org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:37) > at org.apache.tez.common.CallableWithNdc.call(CallableWithNdc.java:36) > at java.util.concurrent.FutureTask.run(FutureTask.java:266) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) > at java.lang.Thread.run(Thread.java:745) > Caused by: java.lang.RuntimeException: Map operator initialization failed > at > org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.init(MapRecordProcessor.java:330) > at > org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:184) > ... 14 more > Caused by: java.lang.NullPointerException > at > org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.getKeyValueReader(MapRecordProcessor.java:372) > at > org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.initializeMapRecordSources(MapRecordProcessor.java:344) > at > org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.init(MapRecordProcessor.java:292) > ... 15 more > ], TaskAttempt 1 failed, info=[Error: Error while running task ( failure ) : > attempt_1466710232033_0432_4_01_000000_1:java.lang.RuntimeException: > java.lang.RuntimeException: Map operator initialization failed > at > org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:211) > at > org.apache.hadoop.hive.ql.exec.tez.TezProcessor.run(TezProcessor.java:168) > at > org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.run(LogicalIOProcessorRuntimeTask.java:370) > at > org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:73) > at > org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:61) > at java.security.AccessController.doPrivileged(Native Method) > at javax.security.auth.Subject.doAs(Subject.java:422) > at > org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1724) > at > org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:61) > at > org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:37) > at org.apache.tez.common.CallableWithNdc.call(CallableWithNdc.java:36) > at java.util.concurrent.FutureTask.run(FutureTask.java:266) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) > at java.lang.Thread.run(Thread.java:745) > Caused by: java.lang.RuntimeException: Map operator initialization failed > at > org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.init(MapRecordProcessor.java:330) > at > org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:184) > ... 14 more > Caused by: java.lang.NullPointerException > at > org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.getKeyValueReader(MapRecordProcessor.java:372) > at > org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.initializeMapRecordSources(MapRecordProcessor.java:344) > at > org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.init(MapRecordProcessor.java:292) > ... 15 more > ], TaskAttempt 2 failed, info=[Error: Error while running task ( failure ) : > attempt_1466710232033_0432_4_01_000000_2:java.lang.RuntimeException: > java.lang.RuntimeException: Map operator initialization failed > at > org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:211) > at > org.apache.hadoop.hive.ql.exec.tez.TezProcessor.run(TezProcessor.java:168) > at > org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.run(LogicalIOProcessorRuntimeTask.java:370) > at > org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:73) > at > org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:61) > at java.security.AccessController.doPrivileged(Native Method) > at javax.security.auth.Subject.doAs(Subject.java:422) > at > org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1724) > at > org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:61) > at > org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:37) > at org.apache.tez.common.CallableWithNdc.call(CallableWithNdc.java:36) > at java.util.concurrent.FutureTask.run(FutureTask.java:266) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) > at java.lang.Thread.run(Thread.java:745) > Caused by: java.lang.RuntimeException: Map operator initialization failed > at > org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.init(MapRecordProcessor.java:330) > at > org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:184) > ... 14 more > Caused by: java.lang.NullPointerException > at > org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.getKeyValueReader(MapRecordProcessor.java:372) > at > org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.initializeMapRecordSources(MapRecordProcessor.java:344) > at > org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.init(MapRecordProcessor.java:292) > ... 15 more > ], TaskAttempt 3 failed, info=[Error: Error while running task ( failure ) : > attempt_1466710232033_0432_4_01_000000_3:java.lang.RuntimeException: > java.lang.RuntimeException: Map operator initialization failed > at > org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:211) > at > org.apache.hadoop.hive.ql.exec.tez.TezProcessor.run(TezProcessor.java:168) > at > org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.run(LogicalIOProcessorRuntimeTask.java:370) > at > org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:73) > at > org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:61) > at java.security.AccessController.doPrivileged(Native Method) > at javax.security.auth.Subject.doAs(Subject.java:422) > at > org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1724) > at > org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:61) > at > org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:37) > at org.apache.tez.common.CallableWithNdc.call(CallableWithNdc.java:36) > at java.util.concurrent.FutureTask.run(FutureTask.java:266) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) > at java.lang.Thread.run(Thread.java:745) > Caused by: java.lang.RuntimeException: Map operator initialization failed > at > org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.init(MapRecordProcessor.java:330) > at > org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:184) > ... 14 more > Caused by: java.lang.NullPointerException > at > org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.getKeyValueReader(MapRecordProcessor.java:372) > at > org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.initializeMapRecordSources(MapRecordProcessor.java:344) > at > org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.init(MapRecordProcessor.java:292) > ... 15 more > {noformat} > Steps to reproduce the issue: > {noformat} > set hive.execution.engine=tez; > set hive.exeucution.mode=container; > drop table if exists src_emptybucket; > create table src_emptybucket (name string, age int, gpa double) > clustered by (age) > into 30 buckets > stored as orc; > insert into table src_emptybucket > select * from studenttab10k limit 0; > drop table if exists src_nonemptybucket; > create table src_nonemptybucket (name varchar(50), age int, gpa > decimal(38,18)) > clustered by (age) > into 60 buckets > stored as orc; > insert into table src_nonemptybucket > select * from studenttab10k > where age in ( > select distinct(age) from studenttab10k > limit 60); > set hive.optimize.bucketmapjoin=true; > set hive.convert.join.bucket.mapjoin.tez=true; > select e1.name as e1_name, e1.age as e1_age, e1.gpa as e1_gpa, e2.name as > e2_name, e2.age as e2_age, e2.gpa as e2_gpa from src_emptybucket e1 inner > join src_nonemptybucket e2 on e1.age = e2.age; > {noformat} -- This message was sent by Atlassian JIRA (v6.3.4#6332)