[ https://issues.apache.org/jira/browse/HIVE-18042?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Hengyu Dai updated HIVE-18042: ------------------------------ Summary: Correlation Optimizer lead to NPE when there's multi subquery(select distinct) union all operation after join (was: Correlation Optimizer lead to NPE when there is multi union all operation after join ) > Correlation Optimizer lead to NPE when there's multi subquery(select > distinct) union all operation after join > -------------------------------------------------------------------------------------------------------------- > > Key: HIVE-18042 > URL: https://issues.apache.org/jira/browse/HIVE-18042 > Project: Hive > Issue Type: Bug > Components: Logical Optimizer > Affects Versions: 2.1.1 > Environment: > Reporter: Hengyu Dai > > test sql: > {code:sql} > SELECT DISTINCT a.logday AS push_day, a.mtype, a.t, If(b.msgid IS NULL, 'no', > 'yes') AS isnotdaoda, a.platform > , a.uid, a.dt > FROM (SELECT DISTINCT If(tokentype = '7', msgid, If(tokentype = '6', > regexp_extract(sendpushresult, 'msgId":"([^"]+)', 1), > regexp_extract(sendpushresult, 'msgId=(.+?),', 1))) AS msgid, logday, If(vid > LIKE '60%', 'adr', If(vid LIKE '8%', 'ios', 'other')) AS platform, mtype, t > , If(vid LIKE '8%', uid, gid) AS uid, concat(substr(logday, 1, 4), > '-', substr(logday, 5, 2), '-', substr(logday, 7, 2)) AS dt > FROM wirelessdata.orig_push_client > ) a > LEFT JOIN (SELECT DISTINCT msgid > FROM ( > SELECT DISTINCT msgid > FROM wirelessdata.orig_push_return > UNION ALL > SELECT DISTINCT msgid > FROM wirelessdata.orig_push_return_xiaomi > UNION ALL > SELECT DISTINCT regexp_extract(action, '"id":"([^"]+)', 1) AS > msgid > FROM wirelessdata.ods_client_behavior_hour4spark > ) bb > ) b ON lower(a.msgid) = lower(b.msgid) > {code} > the error stack > {code:java} > 2017-11-10T16:01:21,123 ERROR [9b7d82f5-dfc8-43ac-8d6f-a019d8677392 main] > ql.Driver: FAILED: NullPointerException null > java.lang.NullPointerException > at > org.apache.hadoop.hive.ql.optimizer.GenMapRedUtils.setUnionPlan(GenMapRedUtils.java:230) > at > org.apache.hadoop.hive.ql.optimizer.GenMapRedUtils.joinUnionPlan(GenMapRedUtils.java:287) > at > org.apache.hadoop.hive.ql.optimizer.GenMRRedSink3.process(GenMRRedSink3.java:100) > at > org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher.dispatch(DefaultRuleDispatcher.java:90) > at > org.apache.hadoop.hive.ql.lib.DefaultGraphWalker.dispatchAndReturn(DefaultGraphWalker.java:105) > at > org.apache.hadoop.hive.ql.parse.GenMapRedWalker.walk(GenMapRedWalker.java:54) > at > org.apache.hadoop.hive.ql.parse.GenMapRedWalker.walk(GenMapRedWalker.java:65) > at > org.apache.hadoop.hive.ql.parse.GenMapRedWalker.walk(GenMapRedWalker.java:65) > at > org.apache.hadoop.hive.ql.parse.GenMapRedWalker.walk(GenMapRedWalker.java:65) > at > org.apache.hadoop.hive.ql.parse.GenMapRedWalker.walk(GenMapRedWalker.java:65) > at > org.apache.hadoop.hive.ql.parse.GenMapRedWalker.walk(GenMapRedWalker.java:65) > at > org.apache.hadoop.hive.ql.parse.GenMapRedWalker.walk(GenMapRedWalker.java:65) > at > org.apache.hadoop.hive.ql.parse.GenMapRedWalker.walk(GenMapRedWalker.java:65) > at > org.apache.hadoop.hive.ql.parse.GenMapRedWalker.walk(GenMapRedWalker.java:65) > at > org.apache.hadoop.hive.ql.parse.GenMapRedWalker.walk(GenMapRedWalker.java:65) > at > org.apache.hadoop.hive.ql.lib.DefaultGraphWalker.startWalking(DefaultGraphWalker.java:120) > at > org.apache.hadoop.hive.ql.parse.MapReduceCompiler.generateTaskTree(MapReduceCompiler.java:323) > at > org.apache.hadoop.hive.ql.parse.TaskCompiler.compile(TaskCompiler.java:267) > at > org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.analyzeInternal(SemanticAnalyzer.java:11008) > at > org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.analyzeInternal(SemanticAnalyzer.java:10547) > at > org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.analyze(BaseSemanticAnalyzer.java:250) > at org.apache.hadoop.hive.ql.Driver.compile(Driver.java:483) > at org.apache.hadoop.hive.ql.Driver.compileInternal(Driver.java:1254) > at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:1396) > at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1181) > at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1170) > at > org.apache.hadoop.hive.cli.CliDriver.processLocalCmd(CliDriver.java:229) > at org.apache.hadoop.hive.cli.CliDriver.processCmd(CliDriver.java:180) > at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:396) > at > org.apache.hadoop.hive.cli.CliDriver.executeDriver(CliDriver.java:770) > at org.apache.hadoop.hive.cli.CliDriver.run(CliDriver.java:711) > at org.apache.hadoop.hive.cli.CliDriver.main(CliDriver.java:638) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:497) > at org.apache.hadoop.util.RunJar.main(RunJar.java:212) > {code} -- This message was sent by Atlassian JIRA (v6.4.14#64029)