[ 
https://issues.apache.org/jira/browse/HIVE-24671?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

ASF GitHub Bot updated HIVE-24671:
----------------------------------
    Labels: pull-request-available  (was: )

> Semijoinremoval should not run into an NPE in case the SJ filter contains an 
> UDF
> --------------------------------------------------------------------------------
>
>                 Key: HIVE-24671
>                 URL: https://issues.apache.org/jira/browse/HIVE-24671
>             Project: Hive
>          Issue Type: Bug
>            Reporter: Zoltan Haindrich
>            Assignee: Zoltan Haindrich
>            Priority: Major
>              Labels: pull-request-available
>          Time Spent: 10m
>  Remaining Estimate: 0h
>
> {code}
> set hive.optimize.index.filter=true;
> set hive.support.concurrency=true;
> set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
> set hive.exec.dynamic.partition.mode=nonstrict;
> set hive.exec.dynamic.partition=true;
> set hive.vectorized.execution.enabled=true;
> drop table if exists t1;
> drop table if exists t2;
> create table t1 (
>         v1 string
> );
> create table t2 (
>         v2 string
> );
> insert into t1 values ('e123456789'),('x123456789');
> insert into t2 values
> ('123'),
>  ('e123456789');
> -- alter table t1 update statistics set 
> ('numRows'='9348843574','rawDataSize'='0');
> alter table t1 update statistics set 
> ('numRows'='934884357','rawDataSize'='0');
> alter table t2 update statistics set ('numRows'='9348','rawDataSize'='0');
> alter table t1 update statistics for column v1 set 
> ('numNulls'='0','numDVs'='15541355','avgColLen'='10.0','maxColLen'='10');
> alter table t2 update statistics for column v2 set 
> ('numNulls'='0','numDVs'='155','avgColLen'='5.0','maxColLen'='10');
> -- alter table t2 update statistics for column k set 
> ('numNulls'='0','numDVs'='13876472','avgColLen'='15.9836','maxColLen'='16');
> explain
> select v1,v2 from t1 join t2 on (substr(v1,1,3) = v2);
> {code}
> results in:
> {code}
>  java.lang.NullPointerException
>       at 
> org.apache.hadoop.hive.ql.parse.TezCompiler.removeSemijoinOptimizationByBenefit(TezCompiler.java:1944)
>       at 
> org.apache.hadoop.hive.ql.parse.TezCompiler.semijoinRemovalBasedTransformations(TezCompiler.java:544)
>       at 
> org.apache.hadoop.hive.ql.parse.TezCompiler.optimizeOperatorPlan(TezCompiler.java:240)
>       at 
> org.apache.hadoop.hive.ql.parse.TaskCompiler.compile(TaskCompiler.java:161)
>       at 
> org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.compilePlan(SemanticAnalyzer.java:12467)
>       at 
> org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.analyzeInternal(SemanticAnalyzer.java:12672)
>       at 
> org.apache.hadoop.hive.ql.parse.CalcitePlanner.analyzeInternal(CalcitePlanner.java:455)
>       at 
> org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.analyze(BaseSemanticAnalyzer.java:301)
>       at 
> org.apache.hadoop.hive.ql.parse.ExplainSemanticAnalyzer.analyzeInternal(ExplainSemanticAnalyzer.java:171)
> [...]
> {code}



--
This message was sent by Atlassian Jira
(v8.3.4#803005)

Reply via email to