[ https://issues.apache.org/jira/browse/HIVE-24671?focusedWorklogId=542050&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-542050 ]
ASF GitHub Bot logged work on HIVE-24671: ----------------------------------------- Author: ASF GitHub Bot Created on: 26/Jan/21 07:22 Start Date: 26/Jan/21 07:22 Worklog Time Spent: 10m Work Description: kasakrisz merged pull request #1901: URL: https://github.com/apache/hive/pull/1901 ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking ------------------- Worklog Id: (was: 542050) Time Spent: 0.5h (was: 20m) > Semijoinremoval should not run into an NPE in case the SJ filter contains an > UDF > -------------------------------------------------------------------------------- > > Key: HIVE-24671 > URL: https://issues.apache.org/jira/browse/HIVE-24671 > Project: Hive > Issue Type: Bug > Reporter: Zoltan Haindrich > Assignee: Zoltan Haindrich > Priority: Major > Labels: pull-request-available > Time Spent: 0.5h > Remaining Estimate: 0h > > {code} > set hive.optimize.index.filter=true; > set hive.support.concurrency=true; > set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; > set hive.exec.dynamic.partition.mode=nonstrict; > set hive.exec.dynamic.partition=true; > set hive.vectorized.execution.enabled=true; > drop table if exists t1; > drop table if exists t2; > create table t1 ( > v1 string > ); > create table t2 ( > v2 string > ); > insert into t1 values ('e123456789'),('x123456789'); > insert into t2 values > ('123'), > ('e123456789'); > -- alter table t1 update statistics set > ('numRows'='9348843574','rawDataSize'='0'); > alter table t1 update statistics set > ('numRows'='934884357','rawDataSize'='0'); > alter table t2 update statistics set ('numRows'='9348','rawDataSize'='0'); > alter table t1 update statistics for column v1 set > ('numNulls'='0','numDVs'='15541355','avgColLen'='10.0','maxColLen'='10'); > alter table t2 update statistics for column v2 set > ('numNulls'='0','numDVs'='155','avgColLen'='5.0','maxColLen'='10'); > -- alter table t2 update statistics for column k set > ('numNulls'='0','numDVs'='13876472','avgColLen'='15.9836','maxColLen'='16'); > explain > select v1,v2 from t1 join t2 on (substr(v1,1,3) = v2); > {code} > results in: > {code} > java.lang.NullPointerException > at > org.apache.hadoop.hive.ql.parse.TezCompiler.removeSemijoinOptimizationByBenefit(TezCompiler.java:1944) > at > org.apache.hadoop.hive.ql.parse.TezCompiler.semijoinRemovalBasedTransformations(TezCompiler.java:544) > at > org.apache.hadoop.hive.ql.parse.TezCompiler.optimizeOperatorPlan(TezCompiler.java:240) > at > org.apache.hadoop.hive.ql.parse.TaskCompiler.compile(TaskCompiler.java:161) > at > org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.compilePlan(SemanticAnalyzer.java:12467) > at > org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.analyzeInternal(SemanticAnalyzer.java:12672) > at > org.apache.hadoop.hive.ql.parse.CalcitePlanner.analyzeInternal(CalcitePlanner.java:455) > at > org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.analyze(BaseSemanticAnalyzer.java:301) > at > org.apache.hadoop.hive.ql.parse.ExplainSemanticAnalyzer.analyzeInternal(ExplainSemanticAnalyzer.java:171) > [...] > {code} -- This message was sent by Atlassian Jira (v8.3.4#803005)