[ https://issues.apache.org/jira/browse/HIVE-14773?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Nita Dembla updated HIVE-14773: ------------------------------- Description: Hive runs into a NPE when the query has a filter on a date column and the partitioned column eg: select count(*) from date_dim where d_date > date "1900-01-02" and d_date_sk= 2416945; Here d_date_sk is a partition column and d_date is of type date. {code} 2016-09-16T08:27:06,510 DEBUG [90d4780f-77e4-4704-9907-4860ce11a206 main] metastore.AggregateStatsCache: No aggregate stats cached for database:default, table:date_dim, column:d_date 2016-09-16T08:27:06,512 DEBUG [90d4780f-77e4-4704-9907-4860ce11a206 main] metastore.MetaStoreDirectSql: Direct SQL query in 1.302231ms + 0.00653ms, the query is [select "COLUMN_NAME", "COLUMN_TYPE", min("LONG_LOW_VALUE"), max("LONG_HIGH_VALUE"), min("DOUBLE_LOW_VALUE"), max("DOUBLE_HIGH_VALUE"), min(cast("BIG_DECIMAL_LOW_VALUE" as decimal)), max(cast("BIG_DECIMAL_HIGH_VALUE" as decimal)), sum("NUM_NULLS"), max("NUM_DISTINCTS"), max("AVG_COL_LEN"), max("MAX_COL_LEN"), sum("NUM_TRUES"), sum("NUM_FALSES"), avg(("LONG_HIGH_VALUE"-"LONG_LOW_VALUE")/cast("NUM_DISTINCTS" as decimal)),avg(("DOUBLE_HIGH_VALUE"-"DOUBLE_LOW_VALUE")/"NUM_DISTINCTS"),avg((cast("BIG_DECIMAL_HIGH_VALUE" as decimal)-cast("BIG_DECIMAL_LOW_VALUE" as decimal))/"NUM_DISTINCTS"),sum("NUM_DISTINCTS") from "PART_COL_STATS" where "DB_NAME" = ? and "TABLE_NAME" = ? and "COLUMN_NAME" in (?) and "PARTITION_NAME" in (?) group by "COLUMN_NAME", "COLUMN_TYPE"] 2016-09-16T08:27:06,526 INFO [90d4780f-77e4-4704-9907-4860ce11a206 main] metastore.MetaStoreDirectSql: useDensityFunctionForNDVEstimation = false partsFound = 1 ColumnStatisticsObj = [ColumnStatisticsObj(colName:d_date, colType:date, statsData:<ColumnStatisticsData >)] 2016-09-16T08:27:06,526 DEBUG [90d4780f-77e4-4704-9907-4860ce11a206 main] metastore.ObjectStore: Commit transaction: count = 0, isactive true at: org.apache.hadoop.hive.metastore.ObjectStore$GetHelper.commit(ObjectStore.java:2827) 2016-09-16T08:27:06,531 DEBUG [90d4780f-77e4-4704-9907-4860ce11a206 main] metastore.ObjectStore: null retrieved using SQL in 43.425925ms 2016-09-16T08:27:06,545 ERROR [90d4780f-77e4-4704-9907-4860ce11a206 main] ql.Driver: FAILED: NullPointerException null java.lang.NullPointerException at org.apache.hadoop.hive.metastore.api.ColumnStatisticsData.getFieldDesc(ColumnStatisticsData.java:451) at org.apache.hadoop.hive.metastore.api.ColumnStatisticsData.getDateStats(ColumnStatisticsData.java:574) at org.apache.hadoop.hive.ql.stats.StatsUtils.getColStatistics(StatsUtils.java:759) at org.apache.hadoop.hive.ql.stats.StatsUtils.convertColStats(StatsUtils.java:806) at org.apache.hadoop.hive.ql.stats.StatsUtils.collectStatistics(StatsUtils.java:304) at org.apache.hadoop.hive.ql.stats.StatsUtils.collectStatistics(StatsUtils.java:152) at org.apache.hadoop.hive.ql.stats.StatsUtils.collectStatistics(StatsUtils.java:140) at org.apache.hadoop.hive.ql.optimizer.stats.annotation.StatsRulesProcFactory$TableScanStatsRule.process(StatsRulesProcFactory.java:126) at org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher.dispatch(DefaultRuleDispatcher.java:90) at org.apache.hadoop.hive.ql.lib.DefaultGraphWalker.dispatchAndReturn(DefaultGraphWalker.java:105) at org.apache.hadoop.hive.ql.lib.DefaultGraphWalker.dispatch(DefaultGraphWalker.java:89) at org.apache.hadoop.hive.ql.lib.LevelOrderWalker.walk(LevelOrderWalker.java:143) at org.apache.hadoop.hive.ql.lib.LevelOrderWalker.startWalking(LevelOrderWalker.java:122) at org.apache.hadoop.hive.ql.optimizer.stats.annotation.AnnotateWithStatistics.transform(AnnotateWithStatistics.java:78) at org.apache.hadoop.hive.ql.parse.TezCompiler.runStatsAnnotation(TezCompiler.java:260) at org.apache.hadoop.hive.ql.parse.TezCompiler.optimizeOperatorPlan(TezCompiler.java:129) at org.apache.hadoop.hive.ql.parse.TaskCompiler.compile(TaskCompiler.java:140) at org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.analyzeInternal(SemanticAnalyzer.java:10928) at org.apache.hadoop.hive.ql.parse.CalcitePlanner.analyzeInternal(CalcitePlanner.java:255) at org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.analyze(BaseSemanticAnalyzer.java:251) at org.apache.hadoop.hive.ql.Driver.compile(Driver.java:467) at org.apache.hadoop.hive.ql.Driver.compile(Driver.java:342) at org.apache.hadoop.hive.ql.Driver.compileInternal(Driver.java:1235) at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:1355) at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1143) at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1131) at org.apache.hadoop.hive.cli.CliDriver.processLocalCmd(CliDriver.java:233) at org.apache.hadoop.hive.cli.CliDriver.processCmd(CliDriver.java:184) at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:400) at org.apache.hadoop.hive.cli.CliDriver.executeDriver(CliDriver.java:777) at org.apache.hadoop.hive.cli.CliDriver.run(CliDriver.java:715) at org.apache.hadoop.hive.cli.CliDriver.main(CliDriver.java:642) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at org.apache.hadoop.util.RunJar.run(RunJar.java:233) at org.apache.hadoop.util.RunJar.main(RunJar.java:148) {code} was: Hive runs into a NPE when the query has a filter on a date column and the partitioned column eg: select count(*) from date_dim where d_date > date "1900-01-02" and d_date_sk= 2416945; Here d_date_sk is a partition column and d_date is of type date. 2016-09-16T08:27:06,510 DEBUG [90d4780f-77e4-4704-9907-4860ce11a206 main] metastore.AggregateStatsCache: No aggregate stats cached for database:default, table:date_dim, column:d_date 2016-09-16T08:27:06,512 DEBUG [90d4780f-77e4-4704-9907-4860ce11a206 main] metastore.MetaStoreDirectSql: Direct SQL query in 1.302231ms + 0.00653ms, the query is [select "COLUMN_NAME", "COLUMN_TYPE", min("LONG_LOW_VALUE"), max("LONG_HIGH_VALUE"), min("DOUBLE_LOW_VALUE"), max("DOUBLE_HIGH_VALUE"), min(cast("BIG_DECIMAL_LOW_VALUE" as decimal)), max(cast("BIG_DECIMAL_HIGH_VALUE" as decimal)), sum("NUM_NULLS"), max("NUM_DISTINCTS"), max("AVG_COL_LEN"), max("MAX_COL_LEN"), sum("NUM_TRUES"), sum("NUM_FALSES"), avg(("LONG_HIGH_VALUE"-"LONG_LOW_VALUE")/cast("NUM_DISTINCTS" as decimal)),avg(("DOUBLE_HIGH_VALUE"-"DOUBLE_LOW_VALUE")/"NUM_DISTINCTS"),avg((cast("BIG_DECIMAL_HIGH_VALUE" as decimal)-cast("BIG_DECIMAL_LOW_VALUE" as decimal))/"NUM_DISTINCTS"),sum("NUM_DISTINCTS") from "PART_COL_STATS" where "DB_NAME" = ? and "TABLE_NAME" = ? and "COLUMN_NAME" in (?) and "PARTITION_NAME" in (?) group by "COLUMN_NAME", "COLUMN_TYPE"] 2016-09-16T08:27:06,526 INFO [90d4780f-77e4-4704-9907-4860ce11a206 main] metastore.MetaStoreDirectSql: useDensityFunctionForNDVEstimation = false partsFound = 1 ColumnStatisticsObj = [ColumnStatisticsObj(colName:d_date, colType:date, statsData:<ColumnStatisticsData >)] 2016-09-16T08:27:06,526 DEBUG [90d4780f-77e4-4704-9907-4860ce11a206 main] metastore.ObjectStore: Commit transaction: count = 0, isactive true at: org.apache.hadoop.hive.metastore.ObjectStore$GetHelper.commit(ObjectStore.java:2827) 2016-09-16T08:27:06,531 DEBUG [90d4780f-77e4-4704-9907-4860ce11a206 main] metastore.ObjectStore: null retrieved using SQL in 43.425925ms 2016-09-16T08:27:06,545 ERROR [90d4780f-77e4-4704-9907-4860ce11a206 main] ql.Driver: FAILED: NullPointerException null java.lang.NullPointerException at org.apache.hadoop.hive.metastore.api.ColumnStatisticsData.getFieldDesc(ColumnStatisticsData.java:451) at org.apache.hadoop.hive.metastore.api.ColumnStatisticsData.getDateStats(ColumnStatisticsData.java:574) at org.apache.hadoop.hive.ql.stats.StatsUtils.getColStatistics(StatsUtils.java:759) at org.apache.hadoop.hive.ql.stats.StatsUtils.convertColStats(StatsUtils.java:806) at org.apache.hadoop.hive.ql.stats.StatsUtils.collectStatistics(StatsUtils.java:304) at org.apache.hadoop.hive.ql.stats.StatsUtils.collectStatistics(StatsUtils.java:152) at org.apache.hadoop.hive.ql.stats.StatsUtils.collectStatistics(StatsUtils.java:140) at org.apache.hadoop.hive.ql.optimizer.stats.annotation.StatsRulesProcFactory$TableScanStatsRule.process(StatsRulesProcFactory.java:126) at org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher.dispatch(DefaultRuleDispatcher.java:90) at org.apache.hadoop.hive.ql.lib.DefaultGraphWalker.dispatchAndReturn(DefaultGraphWalker.java:105) at org.apache.hadoop.hive.ql.lib.DefaultGraphWalker.dispatch(DefaultGraphWalker.java:89) at org.apache.hadoop.hive.ql.lib.LevelOrderWalker.walk(LevelOrderWalker.java:143) at org.apache.hadoop.hive.ql.lib.LevelOrderWalker.startWalking(LevelOrderWalker.java:122) at org.apache.hadoop.hive.ql.optimizer.stats.annotation.AnnotateWithStatistics.transform(AnnotateWithStatistics.java:78) at org.apache.hadoop.hive.ql.parse.TezCompiler.runStatsAnnotation(TezCompiler.java:260) at org.apache.hadoop.hive.ql.parse.TezCompiler.optimizeOperatorPlan(TezCompiler.java:129) at org.apache.hadoop.hive.ql.parse.TaskCompiler.compile(TaskCompiler.java:140) at org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.analyzeInternal(SemanticAnalyzer.java:10928) at org.apache.hadoop.hive.ql.parse.CalcitePlanner.analyzeInternal(CalcitePlanner.java:255) at org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.analyze(BaseSemanticAnalyzer.java:251) at org.apache.hadoop.hive.ql.Driver.compile(Driver.java:467) at org.apache.hadoop.hive.ql.Driver.compile(Driver.java:342) at org.apache.hadoop.hive.ql.Driver.compileInternal(Driver.java:1235) at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:1355) at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1143) at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1131) at org.apache.hadoop.hive.cli.CliDriver.processLocalCmd(CliDriver.java:233) at org.apache.hadoop.hive.cli.CliDriver.processCmd(CliDriver.java:184) at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:400) at org.apache.hadoop.hive.cli.CliDriver.executeDriver(CliDriver.java:777) at org.apache.hadoop.hive.cli.CliDriver.run(CliDriver.java:715) at org.apache.hadoop.hive.cli.CliDriver.main(CliDriver.java:642) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at org.apache.hadoop.util.RunJar.run(RunJar.java:233) at org.apache.hadoop.util.RunJar.main(RunJar.java:148) > NPE aggregating column statistics for date column in partitioned table > ---------------------------------------------------------------------- > > Key: HIVE-14773 > URL: https://issues.apache.org/jira/browse/HIVE-14773 > Project: Hive > Issue Type: Bug > Components: Hive > Affects Versions: 2.2.0 > Reporter: Nita Dembla > > Hive runs into a NPE when the query has a filter on a date column and the > partitioned column > eg: > select count(*) from date_dim where d_date > date "1900-01-02" and d_date_sk= > 2416945; > Here d_date_sk is a partition column and d_date is of type date. > {code} > 2016-09-16T08:27:06,510 DEBUG [90d4780f-77e4-4704-9907-4860ce11a206 main] > metastore.AggregateStatsCache: No aggregate stats cached for > database:default, table:date_dim, column:d_date > 2016-09-16T08:27:06,512 DEBUG [90d4780f-77e4-4704-9907-4860ce11a206 main] > metastore.MetaStoreDirectSql: Direct SQL query in 1.302231ms + 0.00653ms, the > query is [select "COLUMN_NAME", "COLUMN_TYPE", min("LONG_LOW_VALUE"), > max("LONG_HIGH_VALUE"), min("DOUBLE_LOW_VALUE"), max("DOUBLE_HIGH_VALUE"), > min(cast("BIG_DECIMAL_LOW_VALUE" as decimal)), > max(cast("BIG_DECIMAL_HIGH_VALUE" as decimal)), sum("NUM_NULLS"), > max("NUM_DISTINCTS"), max("AVG_COL_LEN"), max("MAX_COL_LEN"), > sum("NUM_TRUES"), sum("NUM_FALSES"), > avg(("LONG_HIGH_VALUE"-"LONG_LOW_VALUE")/cast("NUM_DISTINCTS" as > decimal)),avg(("DOUBLE_HIGH_VALUE"-"DOUBLE_LOW_VALUE")/"NUM_DISTINCTS"),avg((cast("BIG_DECIMAL_HIGH_VALUE" > as decimal)-cast("BIG_DECIMAL_LOW_VALUE" as > decimal))/"NUM_DISTINCTS"),sum("NUM_DISTINCTS") from "PART_COL_STATS" where > "DB_NAME" = ? and "TABLE_NAME" = ? and "COLUMN_NAME" in (?) and > "PARTITION_NAME" in (?) group by "COLUMN_NAME", "COLUMN_TYPE"] > 2016-09-16T08:27:06,526 INFO [90d4780f-77e4-4704-9907-4860ce11a206 main] > metastore.MetaStoreDirectSql: useDensityFunctionForNDVEstimation = false > partsFound = 1 > ColumnStatisticsObj = [ColumnStatisticsObj(colName:d_date, colType:date, > statsData:<ColumnStatisticsData >)] > 2016-09-16T08:27:06,526 DEBUG [90d4780f-77e4-4704-9907-4860ce11a206 main] > metastore.ObjectStore: Commit transaction: count = 0, isactive true at: > > org.apache.hadoop.hive.metastore.ObjectStore$GetHelper.commit(ObjectStore.java:2827) > 2016-09-16T08:27:06,531 DEBUG [90d4780f-77e4-4704-9907-4860ce11a206 main] > metastore.ObjectStore: null retrieved using SQL in 43.425925ms > 2016-09-16T08:27:06,545 ERROR [90d4780f-77e4-4704-9907-4860ce11a206 main] > ql.Driver: FAILED: NullPointerException null > java.lang.NullPointerException > at > org.apache.hadoop.hive.metastore.api.ColumnStatisticsData.getFieldDesc(ColumnStatisticsData.java:451) > at > org.apache.hadoop.hive.metastore.api.ColumnStatisticsData.getDateStats(ColumnStatisticsData.java:574) > at > org.apache.hadoop.hive.ql.stats.StatsUtils.getColStatistics(StatsUtils.java:759) > at > org.apache.hadoop.hive.ql.stats.StatsUtils.convertColStats(StatsUtils.java:806) > at > org.apache.hadoop.hive.ql.stats.StatsUtils.collectStatistics(StatsUtils.java:304) > at > org.apache.hadoop.hive.ql.stats.StatsUtils.collectStatistics(StatsUtils.java:152) > at > org.apache.hadoop.hive.ql.stats.StatsUtils.collectStatistics(StatsUtils.java:140) > at > org.apache.hadoop.hive.ql.optimizer.stats.annotation.StatsRulesProcFactory$TableScanStatsRule.process(StatsRulesProcFactory.java:126) > at > org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher.dispatch(DefaultRuleDispatcher.java:90) > at > org.apache.hadoop.hive.ql.lib.DefaultGraphWalker.dispatchAndReturn(DefaultGraphWalker.java:105) > at > org.apache.hadoop.hive.ql.lib.DefaultGraphWalker.dispatch(DefaultGraphWalker.java:89) > at > org.apache.hadoop.hive.ql.lib.LevelOrderWalker.walk(LevelOrderWalker.java:143) > at > org.apache.hadoop.hive.ql.lib.LevelOrderWalker.startWalking(LevelOrderWalker.java:122) > at > org.apache.hadoop.hive.ql.optimizer.stats.annotation.AnnotateWithStatistics.transform(AnnotateWithStatistics.java:78) > at > org.apache.hadoop.hive.ql.parse.TezCompiler.runStatsAnnotation(TezCompiler.java:260) > at > org.apache.hadoop.hive.ql.parse.TezCompiler.optimizeOperatorPlan(TezCompiler.java:129) > at > org.apache.hadoop.hive.ql.parse.TaskCompiler.compile(TaskCompiler.java:140) > at > org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.analyzeInternal(SemanticAnalyzer.java:10928) > at > org.apache.hadoop.hive.ql.parse.CalcitePlanner.analyzeInternal(CalcitePlanner.java:255) > at > org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.analyze(BaseSemanticAnalyzer.java:251) > at org.apache.hadoop.hive.ql.Driver.compile(Driver.java:467) > at org.apache.hadoop.hive.ql.Driver.compile(Driver.java:342) > at org.apache.hadoop.hive.ql.Driver.compileInternal(Driver.java:1235) > at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:1355) > at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1143) > at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1131) > at > org.apache.hadoop.hive.cli.CliDriver.processLocalCmd(CliDriver.java:233) > at org.apache.hadoop.hive.cli.CliDriver.processCmd(CliDriver.java:184) > at > org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:400) > at > org.apache.hadoop.hive.cli.CliDriver.executeDriver(CliDriver.java:777) > at org.apache.hadoop.hive.cli.CliDriver.run(CliDriver.java:715) > at org.apache.hadoop.hive.cli.CliDriver.main(CliDriver.java:642) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:498) > at org.apache.hadoop.util.RunJar.run(RunJar.java:233) > at org.apache.hadoop.util.RunJar.main(RunJar.java:148) > {code} -- This message was sent by Atlassian JIRA (v6.3.4#6332)