[ https://issues.apache.org/jira/browse/HIVE-25364?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Stamatis Zampetakis updated HIVE-25364: --------------------------------------- Summary: NPE while estimating row count in external JDBC tables (was: Null Pointer Exception while estimating row count in external tables.) > NPE while estimating row count in external JDBC tables > ------------------------------------------------------ > > Key: HIVE-25364 > URL: https://issues.apache.org/jira/browse/HIVE-25364 > Project: Hive > Issue Type: Bug > Reporter: Soumyakanti Das > Assignee: Soumyakanti Das > Priority: Major > > Running the query below for external tables produces NPE because of missing > APIs to handle JDBC Converter and JdbcHiveTableScan in > [RelMdDistinctRowCount.java|https://github.com/apache/calcite/blob/master/core/src/main/java/org/apache/calcite/rel/metadata/RelMdDistinctRowCount.java] > (calcite). The catch-all method > [getDistinctRowCount|https://github.com/apache/calcite/blob/master/core/src/main/java/org/apache/calcite/rel/metadata/RelMdDistinctRowCount.java#L76] > returns null for HiveJdbcConverter and JdbcHiveTableScan, which ultimately > results in a null value for *computeInnerJoinSelectivity(j, mq, predicate)* > method at > [HiveRelMdSelectivity.java|https://github.com/apache/hive/blob/master/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSelectivity.java#L78] > {code:java} > double innerJoinSelectivity = computeInnerJoinSelectivity(j, mq, predicate); > {code} > Query: > {code:java} > explain cbo > with t1 as (select fkey, ikey, sum(dkey) as dk_sum, sum(dkey2) as dk2_sum > from ext_simple_derby_table1 left join ext_simple_derby_table3 > on ikey = ikey2 > where fkey2 is null > group by fkey, ikey), > t2 as (select datekey, fkey, ikey, sum(dkey) as dk_sum2, sum(dkey2) as > dk2_sum2 > from ext_simple_derby_table2 left join ext_simple_derby_table4 > on ikey = ikey2 > where fkey2 is null > group by datekey, fkey, ikey) > select t1.fkey, t2.ikey, sum(t1.ikey) > from t1 left join t2 > on t1.ikey = t2.ikey AND t1.fkey = t2.fkey > where t2.fkey is null > group by t2.datekey, t1.fkey, t2.ikey > {code} > The stacktrace: > {code:java} > java.lang.NullPointerException > at > org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdSelectivity.getSelectivity(HiveRelMdSelectivity.java:78) > at GeneratedMetadataHandler_Selectivity.getSelectivity_$(Unknown Source) > at GeneratedMetadataHandler_Selectivity.getSelectivity(Unknown Source) > at GeneratedMetadataHandler_Selectivity.getSelectivity_$(Unknown Source) > at GeneratedMetadataHandler_Selectivity.getSelectivity(Unknown Source) > at > org.apache.calcite.rel.metadata.RelMetadataQuery.getSelectivity(RelMetadataQuery.java:426) > at > org.apache.calcite.rel.metadata.RelMdUtil.estimateFilteredRows(RelMdUtil.java:765) > at > org.apache.calcite.rel.metadata.RelMdRowCount.getRowCount(RelMdRowCount.java:131) > at > org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdRowCount.getRowCount(HiveRelMdRowCount.java:175) > at > org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdRuntimeRowCount.getRowCount(HiveRelMdRuntimeRowCount.java:53) > at GeneratedMetadataHandler_RowCount.getRowCount_$(Unknown Source) > at GeneratedMetadataHandler_RowCount.getRowCount(Unknown Source) > at GeneratedMetadataHandler_RowCount.getRowCount_$(Unknown Source) > at GeneratedMetadataHandler_RowCount.getRowCount(Unknown Source) > at > org.apache.calcite.rel.metadata.RelMetadataQuery.getRowCount(RelMetadataQuery.java:212) > at > org.apache.calcite.rel.metadata.RelMdRowCount.getRowCount(RelMdRowCount.java:205) > at GeneratedMetadataHandler_RowCount.getRowCount_$(Unknown Source) > at GeneratedMetadataHandler_RowCount.getRowCount(Unknown Source) > at GeneratedMetadataHandler_RowCount.getRowCount_$(Unknown Source) > at GeneratedMetadataHandler_RowCount.getRowCount(Unknown Source) > at > org.apache.calcite.rel.metadata.RelMetadataQuery.getRowCount(RelMetadataQuery.java:212) > at > org.apache.calcite.rel.metadata.RelMdRowCount.getRowCount(RelMdRowCount.java:140) > at GeneratedMetadataHandler_RowCount.getRowCount_$(Unknown Source) > at GeneratedMetadataHandler_RowCount.getRowCount(Unknown Source) > at GeneratedMetadataHandler_RowCount.getRowCount_$(Unknown Source) > at GeneratedMetadataHandler_RowCount.getRowCount(Unknown Source) > at > org.apache.calcite.rel.metadata.RelMetadataQuery.getRowCount(RelMetadataQuery.java:212) > at > org.apache.calcite.rel.metadata.RelMdUtil.getJoinRowCount(RelMdUtil.java:723) > at org.apache.calcite.rel.core.Join.estimateRowCount(Join.java:205) > at > org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdRowCount.getRowCount(HiveRelMdRowCount.java:113) > at GeneratedMetadataHandler_RowCount.getRowCount_$(Unknown Source) > at GeneratedMetadataHandler_RowCount.getRowCount(Unknown Source) > at > org.apache.calcite.rel.metadata.RelMetadataQuery.getRowCount(RelMetadataQuery.java:212) > at > org.apache.hadoop.hive.ql.optimizer.calcite.stats.FilterSelectivityEstimator.<init>(FilterSelectivityEstimator.java:62) > at > org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveFilterSortPredicates$RexSortPredicatesShuttle.<init>(HiveFilterSortPredicates.java:126) > at > org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveFilterSortPredicates$RexSortPredicatesShuttle.<init>(HiveFilterSortPredicates.java:120) > at > org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveFilterSortPredicates.onMatch(HiveFilterSortPredicates.java:91) > at > org.apache.calcite.plan.AbstractRelOptPlanner.fireRule(AbstractRelOptPlanner.java:333) > at org.apache.calcite.plan.hep.HepPlanner.applyRule(HepPlanner.java:542) > at > org.apache.calcite.plan.hep.HepPlanner.applyRules(HepPlanner.java:407) > at > org.apache.calcite.plan.hep.HepPlanner.executeInstruction(HepPlanner.java:243) > at > org.apache.calcite.plan.hep.HepInstruction$RuleInstance.execute(HepInstruction.java:127) > at > org.apache.calcite.plan.hep.HepPlanner.executeProgram(HepPlanner.java:202) > at > org.apache.calcite.plan.hep.HepPlanner.findBestExp(HepPlanner.java:189) > at > org.apache.hadoop.hive.ql.parse.CalcitePlanner$CalcitePlannerAction.executeProgram(CalcitePlanner.java:2440) > at > org.apache.hadoop.hive.ql.parse.CalcitePlanner$CalcitePlannerAction.executeProgram(CalcitePlanner.java:2406) > at > org.apache.hadoop.hive.ql.parse.CalcitePlanner$CalcitePlannerAction.applyPostJoinOrderingTransform(CalcitePlanner.java:2326) > at > org.apache.hadoop.hive.ql.parse.CalcitePlanner$CalcitePlannerAction.apply(CalcitePlanner.java:1735) > at > org.apache.hadoop.hive.ql.parse.CalcitePlanner$CalcitePlannerAction.apply(CalcitePlanner.java:1588) > at > org.apache.calcite.tools.Frameworks.lambda$withPlanner$0(Frameworks.java:131) > at > org.apache.calcite.prepare.CalcitePrepareImpl.perform(CalcitePrepareImpl.java:914) > at org.apache.calcite.tools.Frameworks.withPrepare(Frameworks.java:180) > at org.apache.calcite.tools.Frameworks.withPlanner(Frameworks.java:126) > at > org.apache.hadoop.hive.ql.parse.CalcitePlanner.logicalPlan(CalcitePlanner.java:1340) > at > org.apache.hadoop.hive.ql.parse.CalcitePlanner.genOPTree(CalcitePlanner.java:559) > at > org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.analyzeInternal(SemanticAnalyzer.java:12513) > at > org.apache.hadoop.hive.ql.parse.CalcitePlanner.analyzeInternal(CalcitePlanner.java:452) > at > org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.analyze(BaseSemanticAnalyzer.java:317) > at > org.apache.hadoop.hive.ql.parse.ExplainSemanticAnalyzer.analyzeInternal(ExplainSemanticAnalyzer.java:175) > at > org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.analyze(BaseSemanticAnalyzer.java:317) > at org.apache.hadoop.hive.ql.Compiler.analyze(Compiler.java:223) > at org.apache.hadoop.hive.ql.Compiler.compile(Compiler.java:105) > at org.apache.hadoop.hive.ql.Driver.compile(Driver.java:500) > at org.apache.hadoop.hive.ql.Driver.compileInternal(Driver.java:453) > at org.apache.hadoop.hive.ql.Driver.compileAndRespond(Driver.java:417) > at org.apache.hadoop.hive.ql.Driver.compileAndRespond(Driver.java:411) > at > org.apache.hadoop.hive.ql.reexec.ReExecDriver.compileAndRespond(ReExecDriver.java:125) > at > org.apache.hadoop.hive.ql.reexec.ReExecDriver.run(ReExecDriver.java:229) > at > org.apache.hadoop.hive.cli.CliDriver.processLocalCmd(CliDriver.java:256) > at org.apache.hadoop.hive.cli.CliDriver.processCmd1(CliDriver.java:201) > at org.apache.hadoop.hive.cli.CliDriver.processCmd(CliDriver.java:127) > at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:422) > at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:353) > at > org.apache.hadoop.hive.ql.QTestUtil.executeClientInternal(QTestUtil.java:744) > at org.apache.hadoop.hive.ql.QTestUtil.executeClient(QTestUtil.java:714) > at > org.apache.hadoop.hive.cli.control.CoreCliDriver.runTest(CoreCliDriver.java:170) > at > org.apache.hadoop.hive.cli.control.CliAdapter.runTest(CliAdapter.java:157) > at > org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver(TestMiniLlapLocalCliDriver.java:62) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:498) > at > org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:59) > at > org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:12) > at > org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:56) > at > org.junit.internal.runners.statements.InvokeMethod.evaluate(InvokeMethod.java:17) > at > org.apache.hadoop.hive.cli.control.CliAdapter$2$1.evaluate(CliAdapter.java:135) > at org.junit.runners.ParentRunner$3.evaluate(ParentRunner.java:306) > at > org.junit.runners.BlockJUnit4ClassRunner$1.evaluate(BlockJUnit4ClassRunner.java:100) > at org.junit.runners.ParentRunner.runLeaf(ParentRunner.java:366) > at > org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:103) > at > org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:63) > at org.junit.runners.ParentRunner$4.run(ParentRunner.java:331) > at org.junit.runners.ParentRunner$1.schedule(ParentRunner.java:79) > at org.junit.runners.ParentRunner.runChildren(ParentRunner.java:329) > at org.junit.runners.ParentRunner.access$100(ParentRunner.java:66) > at org.junit.runners.ParentRunner$2.evaluate(ParentRunner.java:293) > at org.junit.runners.ParentRunner.run(ParentRunner.java:413) > at org.junit.runners.Suite.runChild(Suite.java:128) > at org.junit.runners.Suite.runChild(Suite.java:27) > at org.junit.runners.ParentRunner$4.run(ParentRunner.java:331) > at org.junit.runners.ParentRunner$1.schedule(ParentRunner.java:79) > at org.junit.runners.ParentRunner.runChildren(ParentRunner.java:329) > at org.junit.runners.ParentRunner.access$100(ParentRunner.java:66) > at org.junit.runners.ParentRunner$2.evaluate(ParentRunner.java:293) > at > org.apache.hadoop.hive.cli.control.CliAdapter$1$1.evaluate(CliAdapter.java:95) > at org.junit.rules.RunRules.evaluate(RunRules.java:20) > at org.junit.runners.ParentRunner$3.evaluate(ParentRunner.java:306) > at org.junit.runners.ParentRunner.run(ParentRunner.java:413) > at > org.apache.maven.surefire.junit4.JUnit4Provider.execute(JUnit4Provider.java:365) > at > org.apache.maven.surefire.junit4.JUnit4Provider.executeWithRerun(JUnit4Provider.java:273) > at > org.apache.maven.surefire.junit4.JUnit4Provider.executeTestSet(JUnit4Provider.java:238) > at > org.apache.maven.surefire.junit4.JUnit4Provider.invoke(JUnit4Provider.java:159) > at > org.apache.maven.surefire.booter.ForkedBooter.runSuitesInProcess(ForkedBooter.java:377) > at > org.apache.maven.surefire.booter.ForkedBooter.execute(ForkedBooter.java:138) > at > org.apache.maven.surefire.booter.ForkedBooter.run(ForkedBooter.java:465) > at > org.apache.maven.surefire.booter.ForkedBooter.main(ForkedBooter.java:451) > {code} -- This message was sent by Atlassian Jira (v8.3.4#803005)