[ https://issues.apache.org/jira/browse/HIVE-19286?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Oleksiy Sayankin updated HIVE-19286: ------------------------------------ Description: *STEP 1. Create test data* {code} DROP TABLE IF EXISTS customer_target; DROP TABLE IF EXISTS customer_source; {code} {code} CREATE TABLE customer_target (id STRING, first_name STRING, last_name STRING, age INT) clustered by (id) into 2 buckets stored as ORC TBLPROPERTIES ('transactional'='true'); {code} {code} insert into customer_target values ('001', 'John', 'Smith', 45), ('002', 'Michael', 'Watson', 27), ('003', 'Den', 'Brown', 33); SELECT id, first_name, last_name, age FROM customer_target; {code} {code} +------+-------------+------------+------+ | id | first_name | last_name | age | +------+-------------+------------+------+ | 002 | Michael | Watson | 27 | | 001 | John | Smith | 45 | | 003 | Den | Brown | 33 | +------+-------------+------------+------+ {code} {code} CREATE TABLE customer_source (id STRING, first_name STRING, last_name STRING, age INT); insert into customer_source values ('001', 'Dorothi', 'Hogward', 77), ('007', 'Alex', 'Bowee', 1), ('088', 'Robert', 'Dowson', 25); SELECT id, first_name, last_name, age FROM customer_source; {code} {code} +------+-------------+------------+------+ | id | first_name | last_name | age | +------+-------------+------------+------+ | 088 | Robert | Dowson | 25 | | 001 | Dorothi | Hogward | 77 | | 007 | Alex | Bowee | 1 | +------+-------------+------------+------+ {code} *STEP 2. Merge data* {code} merge into customer_target trg using customer_source src on src.id = trg.id when matched then update set first_name = src.first_name, last_name = src.last_name when not matched then insert values (src.id, src.first_name, src.last_name, src.age); {code} *ACTUAL RESULT* {code} 2018-04-24T07:11:44,448 DEBUG [main] log.PerfLogger: <PERFLOG method=deserializePlan from=org.apache.hadoop.hive.ql.exec.SerializationUtilities> 2018-04-24T07:11:44,448 INFO [main] exec.SerializationUtilities: Deserializing MapredLocalWork using kryo 2018-04-24T07:11:44,463 DEBUG [main] exec.Utilities: Hive Conf not found or Session not initiated, use thread based class loader instead 2018-04-24T07:11:44,538 DEBUG [main] log.PerfLogger: </PERFLOG method=deserializePlan start=1524568304448 end=1524568304538 duration=90 from=org.apache.hadoop.hive.ql.exec.SerializationUtilities> 2018-04-24T07:11:44,545 INFO [main] mr.MapredLocalTask: 2018-04-24 07:11:44 Starting to launch local task to process map join; maximum memory = 477626368 2018-04-24T07:11:44,545 DEBUG [main] mr.MapredLocalTask: initializeOperators: trg, children = [HASHTABLESINK[37]] 2018-04-24T07:11:44,656 DEBUG [main] exec.Utilities: Hive Conf not found or Session not initiated, use thread based class loader instead 2018-04-24T07:11:44,676 INFO [main] mr.MapredLocalTask: fetchoperator for trg created 2018-04-24T07:11:44,676 INFO [main] exec.TableScanOperator: Initializing operator TS[0] 2018-04-24T07:11:44,676 DEBUG [main] exec.TableScanOperator: Initialization Done 0 TS 2018-04-24T07:11:44,676 DEBUG [main] exec.TableScanOperator: Operator 0 TS initialized 2018-04-24T07:11:44,676 DEBUG [main] exec.TableScanOperator: Initializing children of 0 TS 2018-04-24T07:11:44,676 DEBUG [main] exec.HashTableSinkOperator: Initializing child 37 HASHTABLESINK 2018-04-24T07:11:44,676 INFO [main] exec.HashTableSinkOperator: Initializing operator HASHTABLESINK[37] 2018-04-24T07:11:44,677 INFO [main] mapjoin.MapJoinMemoryExhaustionHandler: JVM Max Heap Size: 477626368 2018-04-24T07:11:44,680 ERROR [main] mr.MapredLocalTask: Hive Runtime Error: Map local work failed java.lang.NullPointerException at org.apache.hadoop.hive.ql.exec.ExprNodeColumnEvaluator.initialize(ExprNodeColumnEvaluator.java:57) ~[hive-exec-2.3.3.jar:2.3.3] at org.apache.hadoop.hive.ql.exec.JoinUtil.getObjectInspectorsFromEvaluators(JoinUtil.java:91) ~[hive-exec-2.3.3.jar:2.3.3] at org.apache.hadoop.hive.ql.exec.HashTableSinkOperator.initializeOp(HashTableSinkOperator.java:153) ~[hive-exec-2.3.3.jar:2.3.3] at org.apache.hadoop.hive.ql.exec.Operator.initialize(Operator.java:366) ~[hive-exec-2.3.3.jar:2.3.3] at org.apache.hadoop.hive.ql.exec.Operator.initialize(Operator.java:556) ~[hive-exec-2.3.3.jar:2.3.3] at org.apache.hadoop.hive.ql.exec.Operator.initializeChildren(Operator.java:508) ~[hive-exec-2.3.3.jar:2.3.3] at org.apache.hadoop.hive.ql.exec.Operator.initialize(Operator.java:376) ~[hive-exec-2.3.3.jar:2.3.3] at org.apache.hadoop.hive.ql.exec.mr.MapredLocalTask.initializeOperators(MapredLocalTask.java:508) ~[hive-exec-2.3.3.jar:2.3.3] at org.apache.hadoop.hive.ql.exec.mr.MapredLocalTask.startForward(MapredLocalTask.java:411) ~[hive-exec-2.3.3.jar:2.3.3] at org.apache.hadoop.hive.ql.exec.mr.MapredLocalTask.executeInProcess(MapredLocalTask.java:391) ~[hive-exec-2.3.3.jar:2.3.3] at org.apache.hadoop.hive.ql.exec.mr.ExecDriver.main(ExecDriver.java:764) ~[hive-exec-2.3.3.jar:2.3.3] at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) ~[?:1.8.0_161] at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) ~[?:1.8.0_161] at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) ~[?:1.8.0_161] at java.lang.reflect.Method.invoke(Method.java:498) ~[?:1.8.0_161] at org.apache.hadoop.util.RunJar.run(RunJar.java:221) ~[hadoop-common-2.7.2.jar:?] at org.apache.hadoop.util.RunJar.main(RunJar.java:136) ~[hadoop-common-2.7.2.jar:?] {code} was: {code} DROP TABLE IF EXISTS customer_target; DROP TABLE IF EXISTS customer_source; {code} {code} CREATE TABLE customer_target (id STRING, first_name STRING, last_name STRING, age INT) clustered by (id) into 2 buckets stored as ORC TBLPROPERTIES ('transactional'='true'); {code} {code} insert into customer_target values ('001', 'John', 'Smith', 45), ('002', 'Michael', 'Watson', 27), ('003', 'Den', 'Brown', 33); SELECT id, first_name, last_name, age FROM customer_target; {code} {code} +------+-------------+------------+------+ | id | first_name | last_name | age | +------+-------------+------------+------+ | 002 | Michael | Watson | 27 | | 001 | John | Smith | 45 | | 003 | Den | Brown | 33 | +------+-------------+------------+------+ {code} {code} CREATE TABLE customer_source (id STRING, first_name STRING, last_name STRING, age INT); insert into customer_source values ('001', 'Dorothi', 'Hogward', 77), ('007', 'Alex', 'Bowee', 1), ('088', 'Robert', 'Dowson', 25); SELECT id, first_name, last_name, age FROM customer_source; {code} {code} +------+-------------+------------+------+ | id | first_name | last_name | age | +------+-------------+------------+------+ | 088 | Robert | Dowson | 25 | | 001 | Dorothi | Hogward | 77 | | 007 | Alex | Bowee | 1 | +------+-------------+------------+------+ {code} {code} merge into customer_target trg using customer_source src on src.id = trg.id when matched then update set first_name = src.first_name, last_name = src.last_name when not matched then insert values (src.id, src.first_name, src.last_name, src.age); {code} {code} 2018-04-24T07:11:44,448 DEBUG [main] log.PerfLogger: <PERFLOG method=deserializePlan from=org.apache.hadoop.hive.ql.exec.SerializationUtilities> 2018-04-24T07:11:44,448 INFO [main] exec.SerializationUtilities: Deserializing MapredLocalWork using kryo 2018-04-24T07:11:44,463 DEBUG [main] exec.Utilities: Hive Conf not found or Session not initiated, use thread based class loader instead 2018-04-24T07:11:44,538 DEBUG [main] log.PerfLogger: </PERFLOG method=deserializePlan start=1524568304448 end=1524568304538 duration=90 from=org.apache.hadoop.hive.ql.exec.SerializationUtilities> 2018-04-24T07:11:44,545 INFO [main] mr.MapredLocalTask: 2018-04-24 07:11:44 Starting to launch local task to process map join; maximum memory = 477626368 2018-04-24T07:11:44,545 DEBUG [main] mr.MapredLocalTask: initializeOperators: trg, children = [HASHTABLESINK[37]] 2018-04-24T07:11:44,656 DEBUG [main] exec.Utilities: Hive Conf not found or Session not initiated, use thread based class loader instead 2018-04-24T07:11:44,676 INFO [main] mr.MapredLocalTask: fetchoperator for trg created 2018-04-24T07:11:44,676 INFO [main] exec.TableScanOperator: Initializing operator TS[0] 2018-04-24T07:11:44,676 DEBUG [main] exec.TableScanOperator: Initialization Done 0 TS 2018-04-24T07:11:44,676 DEBUG [main] exec.TableScanOperator: Operator 0 TS initialized 2018-04-24T07:11:44,676 DEBUG [main] exec.TableScanOperator: Initializing children of 0 TS 2018-04-24T07:11:44,676 DEBUG [main] exec.HashTableSinkOperator: Initializing child 37 HASHTABLESINK 2018-04-24T07:11:44,676 INFO [main] exec.HashTableSinkOperator: Initializing operator HASHTABLESINK[37] 2018-04-24T07:11:44,677 INFO [main] mapjoin.MapJoinMemoryExhaustionHandler: JVM Max Heap Size: 477626368 2018-04-24T07:11:44,680 ERROR [main] mr.MapredLocalTask: Hive Runtime Error: Map local work failed java.lang.NullPointerException at org.apache.hadoop.hive.ql.exec.ExprNodeColumnEvaluator.initialize(ExprNodeColumnEvaluator.java:57) ~[hive-exec-2.3.3.jar:2.3.3] at org.apache.hadoop.hive.ql.exec.JoinUtil.getObjectInspectorsFromEvaluators(JoinUtil.java:91) ~[hive-exec-2.3.3.jar:2.3.3] at org.apache.hadoop.hive.ql.exec.HashTableSinkOperator.initializeOp(HashTableSinkOperator.java:153) ~[hive-exec-2.3.3.jar:2.3.3] at org.apache.hadoop.hive.ql.exec.Operator.initialize(Operator.java:366) ~[hive-exec-2.3.3.jar:2.3.3] at org.apache.hadoop.hive.ql.exec.Operator.initialize(Operator.java:556) ~[hive-exec-2.3.3.jar:2.3.3] at org.apache.hadoop.hive.ql.exec.Operator.initializeChildren(Operator.java:508) ~[hive-exec-2.3.3.jar:2.3.3] at org.apache.hadoop.hive.ql.exec.Operator.initialize(Operator.java:376) ~[hive-exec-2.3.3.jar:2.3.3] at org.apache.hadoop.hive.ql.exec.mr.MapredLocalTask.initializeOperators(MapredLocalTask.java:508) ~[hive-exec-2.3.3.jar:2.3.3] at org.apache.hadoop.hive.ql.exec.mr.MapredLocalTask.startForward(MapredLocalTask.java:411) ~[hive-exec-2.3.3.jar:2.3.3] at org.apache.hadoop.hive.ql.exec.mr.MapredLocalTask.executeInProcess(MapredLocalTask.java:391) ~[hive-exec-2.3.3.jar:2.3.3] at org.apache.hadoop.hive.ql.exec.mr.ExecDriver.main(ExecDriver.java:764) ~[hive-exec-2.3.3.jar:2.3.3] at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) ~[?:1.8.0_161] at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) ~[?:1.8.0_161] at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) ~[?:1.8.0_161] at java.lang.reflect.Method.invoke(Method.java:498) ~[?:1.8.0_161] at org.apache.hadoop.util.RunJar.run(RunJar.java:221) ~[hadoop-common-2.7.2.jar:?] at org.apache.hadoop.util.RunJar.main(RunJar.java:136) ~[hadoop-common-2.7.2.jar:?] {code} > NPE in MERGE operator on MR mode > -------------------------------- > > Key: HIVE-19286 > URL: https://issues.apache.org/jira/browse/HIVE-19286 > Project: Hive > Issue Type: Bug > Reporter: Oleksiy Sayankin > Assignee: Oleksiy Sayankin > Priority: Blocker > > *STEP 1. Create test data* > {code} > DROP TABLE IF EXISTS customer_target; > DROP TABLE IF EXISTS customer_source; > {code} > {code} > CREATE TABLE customer_target (id STRING, first_name STRING, last_name STRING, > age INT) clustered by (id) into 2 buckets stored as ORC TBLPROPERTIES > ('transactional'='true'); > {code} > {code} > insert into customer_target values ('001', 'John', 'Smith', 45), ('002', > 'Michael', 'Watson', 27), ('003', 'Den', 'Brown', 33); > SELECT id, first_name, last_name, age FROM customer_target; > {code} > {code} > +------+-------------+------------+------+ > | id | first_name | last_name | age | > +------+-------------+------------+------+ > | 002 | Michael | Watson | 27 | > | 001 | John | Smith | 45 | > | 003 | Den | Brown | 33 | > +------+-------------+------------+------+ > {code} > {code} > CREATE TABLE customer_source (id STRING, first_name STRING, last_name STRING, > age INT); > insert into customer_source values ('001', 'Dorothi', 'Hogward', 77), ('007', > 'Alex', 'Bowee', 1), ('088', 'Robert', 'Dowson', 25); > SELECT id, first_name, last_name, age FROM customer_source; > {code} > {code} > +------+-------------+------------+------+ > | id | first_name | last_name | age | > +------+-------------+------------+------+ > | 088 | Robert | Dowson | 25 | > | 001 | Dorothi | Hogward | 77 | > | 007 | Alex | Bowee | 1 | > +------+-------------+------------+------+ > {code} > *STEP 2. Merge data* > {code} > merge into customer_target trg using customer_source src on src.id = trg.id > when matched then update set first_name = src.first_name, last_name = > src.last_name when not matched then insert values (src.id, src.first_name, > src.last_name, src.age); > {code} > *ACTUAL RESULT* > {code} > 2018-04-24T07:11:44,448 DEBUG [main] log.PerfLogger: <PERFLOG > method=deserializePlan > from=org.apache.hadoop.hive.ql.exec.SerializationUtilities> > 2018-04-24T07:11:44,448 INFO [main] exec.SerializationUtilities: > Deserializing MapredLocalWork using kryo > 2018-04-24T07:11:44,463 DEBUG [main] exec.Utilities: Hive Conf not found or > Session not initiated, use thread based class loader instead > 2018-04-24T07:11:44,538 DEBUG [main] log.PerfLogger: </PERFLOG > method=deserializePlan start=1524568304448 end=1524568304538 duration=90 > from=org.apache.hadoop.hive.ql.exec.SerializationUtilities> > 2018-04-24T07:11:44,545 INFO [main] mr.MapredLocalTask: 2018-04-24 07:11:44 > Starting to launch local task to process map join; maximum memory = > 477626368 > 2018-04-24T07:11:44,545 DEBUG [main] mr.MapredLocalTask: initializeOperators: > trg, children = [HASHTABLESINK[37]] > 2018-04-24T07:11:44,656 DEBUG [main] exec.Utilities: Hive Conf not found or > Session not initiated, use thread based class loader instead > 2018-04-24T07:11:44,676 INFO [main] mr.MapredLocalTask: fetchoperator for > trg created > 2018-04-24T07:11:44,676 INFO [main] exec.TableScanOperator: Initializing > operator TS[0] > 2018-04-24T07:11:44,676 DEBUG [main] exec.TableScanOperator: Initialization > Done 0 TS > 2018-04-24T07:11:44,676 DEBUG [main] exec.TableScanOperator: Operator 0 TS > initialized > 2018-04-24T07:11:44,676 DEBUG [main] exec.TableScanOperator: Initializing > children of 0 TS > 2018-04-24T07:11:44,676 DEBUG [main] exec.HashTableSinkOperator: Initializing > child 37 HASHTABLESINK > 2018-04-24T07:11:44,676 INFO [main] exec.HashTableSinkOperator: Initializing > operator HASHTABLESINK[37] > 2018-04-24T07:11:44,677 INFO [main] mapjoin.MapJoinMemoryExhaustionHandler: > JVM Max Heap Size: 477626368 > 2018-04-24T07:11:44,680 ERROR [main] mr.MapredLocalTask: Hive Runtime Error: > Map local work failed > java.lang.NullPointerException > at > org.apache.hadoop.hive.ql.exec.ExprNodeColumnEvaluator.initialize(ExprNodeColumnEvaluator.java:57) > ~[hive-exec-2.3.3.jar:2.3.3] > at > org.apache.hadoop.hive.ql.exec.JoinUtil.getObjectInspectorsFromEvaluators(JoinUtil.java:91) > ~[hive-exec-2.3.3.jar:2.3.3] > at > org.apache.hadoop.hive.ql.exec.HashTableSinkOperator.initializeOp(HashTableSinkOperator.java:153) > ~[hive-exec-2.3.3.jar:2.3.3] > at > org.apache.hadoop.hive.ql.exec.Operator.initialize(Operator.java:366) > ~[hive-exec-2.3.3.jar:2.3.3] > at > org.apache.hadoop.hive.ql.exec.Operator.initialize(Operator.java:556) > ~[hive-exec-2.3.3.jar:2.3.3] > at > org.apache.hadoop.hive.ql.exec.Operator.initializeChildren(Operator.java:508) > ~[hive-exec-2.3.3.jar:2.3.3] > at > org.apache.hadoop.hive.ql.exec.Operator.initialize(Operator.java:376) > ~[hive-exec-2.3.3.jar:2.3.3] > at > org.apache.hadoop.hive.ql.exec.mr.MapredLocalTask.initializeOperators(MapredLocalTask.java:508) > ~[hive-exec-2.3.3.jar:2.3.3] > at > org.apache.hadoop.hive.ql.exec.mr.MapredLocalTask.startForward(MapredLocalTask.java:411) > ~[hive-exec-2.3.3.jar:2.3.3] > at > org.apache.hadoop.hive.ql.exec.mr.MapredLocalTask.executeInProcess(MapredLocalTask.java:391) > ~[hive-exec-2.3.3.jar:2.3.3] > at > org.apache.hadoop.hive.ql.exec.mr.ExecDriver.main(ExecDriver.java:764) > ~[hive-exec-2.3.3.jar:2.3.3] > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > ~[?:1.8.0_161] > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > ~[?:1.8.0_161] > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > ~[?:1.8.0_161] > at java.lang.reflect.Method.invoke(Method.java:498) ~[?:1.8.0_161] > at org.apache.hadoop.util.RunJar.run(RunJar.java:221) > ~[hadoop-common-2.7.2.jar:?] > at org.apache.hadoop.util.RunJar.main(RunJar.java:136) > ~[hadoop-common-2.7.2.jar:?] > {code} -- This message was sent by Atlassian JIRA (v7.6.3#76005)