[ https://issues.apache.org/jira/browse/HIVE-24671?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
ASF GitHub Bot updated HIVE-24671: ---------------------------------- Labels: pull-request-available (was: ) > Semijoinremoval should not run into an NPE in case the SJ filter contains an > UDF > -------------------------------------------------------------------------------- > > Key: HIVE-24671 > URL: https://issues.apache.org/jira/browse/HIVE-24671 > Project: Hive > Issue Type: Bug > Reporter: Zoltan Haindrich > Assignee: Zoltan Haindrich > Priority: Major > Labels: pull-request-available > Time Spent: 10m > Remaining Estimate: 0h > > {code} > set hive.optimize.index.filter=true; > set hive.support.concurrency=true; > set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; > set hive.exec.dynamic.partition.mode=nonstrict; > set hive.exec.dynamic.partition=true; > set hive.vectorized.execution.enabled=true; > drop table if exists t1; > drop table if exists t2; > create table t1 ( > v1 string > ); > create table t2 ( > v2 string > ); > insert into t1 values ('e123456789'),('x123456789'); > insert into t2 values > ('123'), > ('e123456789'); > -- alter table t1 update statistics set > ('numRows'='9348843574','rawDataSize'='0'); > alter table t1 update statistics set > ('numRows'='934884357','rawDataSize'='0'); > alter table t2 update statistics set ('numRows'='9348','rawDataSize'='0'); > alter table t1 update statistics for column v1 set > ('numNulls'='0','numDVs'='15541355','avgColLen'='10.0','maxColLen'='10'); > alter table t2 update statistics for column v2 set > ('numNulls'='0','numDVs'='155','avgColLen'='5.0','maxColLen'='10'); > -- alter table t2 update statistics for column k set > ('numNulls'='0','numDVs'='13876472','avgColLen'='15.9836','maxColLen'='16'); > explain > select v1,v2 from t1 join t2 on (substr(v1,1,3) = v2); > {code} > results in: > {code} > java.lang.NullPointerException > at > org.apache.hadoop.hive.ql.parse.TezCompiler.removeSemijoinOptimizationByBenefit(TezCompiler.java:1944) > at > org.apache.hadoop.hive.ql.parse.TezCompiler.semijoinRemovalBasedTransformations(TezCompiler.java:544) > at > org.apache.hadoop.hive.ql.parse.TezCompiler.optimizeOperatorPlan(TezCompiler.java:240) > at > org.apache.hadoop.hive.ql.parse.TaskCompiler.compile(TaskCompiler.java:161) > at > org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.compilePlan(SemanticAnalyzer.java:12467) > at > org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.analyzeInternal(SemanticAnalyzer.java:12672) > at > org.apache.hadoop.hive.ql.parse.CalcitePlanner.analyzeInternal(CalcitePlanner.java:455) > at > org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.analyze(BaseSemanticAnalyzer.java:301) > at > org.apache.hadoop.hive.ql.parse.ExplainSemanticAnalyzer.analyzeInternal(ExplainSemanticAnalyzer.java:171) > [...] > {code} -- This message was sent by Atlassian Jira (v8.3.4#803005)