[ https://issues.apache.org/jira/browse/HIVE-11113?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=14627777#comment-14627777 ]
Bing Li commented on HIVE-11113: -------------------------------- Hi, [~pxiong] and [~shiroy] I tried this scenario on Hive 1.2.1. And found it could work for a table stored as TEXTFILE, but cant NOT work for the one stored as PARQUET. Errors ====== Caused by: java.lang.IllegalArgumentException: Column [ds] was not found in schema! at parquet.Preconditions.checkArgument(Preconditions.java:55) at parquet.filter2.predicate.SchemaCompatibilityValidator.getColumnDescriptor(SchemaCompatibilityValidator.java:190) at parquet.filter2.predicate.SchemaCompatibilityValidator.validateColumn(SchemaCompatibilityValidator.java:178) at parquet.filter2.predicate.SchemaCompatibilityValidator.validateColumnFilterPredicate(SchemaCompatibilityValidator.java:160) at parquet.filter2.predicate.SchemaCompatibilityValidator.visit(SchemaCompatibilityValidator.java:94) at parquet.filter2.predicate.SchemaCompatibilityValidator.visit(SchemaCompatibilityValidator.java:59) at parquet.filter2.predicate.Operators$Eq.accept(Operators.java:180) at parquet.filter2.predicate.SchemaCompatibilityValidator.validate(SchemaCompatibilityValidator.java:64) at parquet.filter2.compat.RowGroupFilter.visit(RowGroupFilter.java:59) at parquet.filter2.compat.RowGroupFilter.visit(RowGroupFilter.java:40) at parquet.filter2.compat.FilterCompat$FilterPredicateCompat.accept(FilterCompat.java:126) at parquet.filter2.compat.RowGroupFilter.filterRowGroups(RowGroupFilter.java:46) at org.apache.hadoop.hive.ql.io.parquet.read.ParquetRecordReaderWrapper.getSplit(ParquetRecordReaderWrapper.java:275) at org.apache.hadoop.hive.ql.io.parquet.read.ParquetRecordReaderWrapper.<init>(ParquetRecordReaderWrapper.java:99) at org.apache.hadoop.hive.ql.io.parquet.read.ParquetRecordReaderWrapper.<init>(ParquetRecordReaderWrapper.java:85) at org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat.getRecordReader(MapredParquetInputFormat.java:72) at org.apache.hadoop.hive.ql.io.CombineHiveRecordReader.<init>(CombineHiveRecordReader.java:67) ... 16 more Reproduced Queries ================== create table dummy (key string, value string) partitioned by (ds string, hr string); load data local inpath 'kv1.txt' into table dummy partition (ds='2008',hr='12'); load data local inpath 'kv1.txt' into table dummy partition (ds='2008',hr='11'); select * from dummy; analyze table dummy partition (ds='2008',hr='12') compute statistics for columns key; create table dummy2 (key string, value string) partitioned by (ds string, hr string)stored as parquet; insert into table dummy2 partition (ds='2008',hr='12') select key, value from dummy where (ds='2008'); select * from dummy2; analyze table dummy2 partition(ds='2008') compute statistics for columns key; > ANALYZE TABLE .. COMPUTE STATISTICS FOR COLUMNS does not work. > --------------------------------------------------------------- > > Key: HIVE-11113 > URL: https://issues.apache.org/jira/browse/HIVE-11113 > Project: Hive > Issue Type: Bug > Affects Versions: 0.13.1, 1.2.1 > Environment: > Reporter: Shiroy Pigarez > Priority: Critical > > I was trying to perform some column statistics using hive as per the > documentation > https://cwiki.apache.org/confluence/display/Hive/Column+Statistics+in+Hive > and was encountering the following errors: > Seems like a bug. Can you look into this? Thanks in advance. > -- HIVE table > {noformat} > hive> create table people_part( > name string, > address string) PARTITIONED BY (dob string, nationality varchar(2)) > row format delimited fields terminated by '\t'; > {noformat} > --Analyze table with partition dob and nationality with FOR COLUMNS > {noformat} > hive> ANALYZE TABLE people_part PARTITION(dob='2015-10-2',nationality) > COMPUTE STATISTICS FOR COLUMNS; > NoViableAltException(-1@[]) > at > org.apache.hadoop.hive.ql.parse.HiveParser_IdentifiersParser.identifier(HiveParser_IdentifiersParser.java:11627) > at > org.apache.hadoop.hive.ql.parse.HiveParser.identifier(HiveParser.java:40215) > at > org.apache.hadoop.hive.ql.parse.HiveParser.columnName(HiveParser.java:33351) > at > org.apache.hadoop.hive.ql.parse.HiveParser.columnNameList(HiveParser.java:33219) > at > org.apache.hadoop.hive.ql.parse.HiveParser.analyzeStatement(HiveParser.java:17764) > at > org.apache.hadoop.hive.ql.parse.HiveParser.ddlStatement(HiveParser.java:2369) > at > org.apache.hadoop.hive.ql.parse.HiveParser.execStatement(HiveParser.java:1398) > at > org.apache.hadoop.hive.ql.parse.HiveParser.statement(HiveParser.java:1036) > at > org.apache.hadoop.hive.ql.parse.ParseDriver.parse(ParseDriver.java:199) > at > org.apache.hadoop.hive.ql.parse.ParseDriver.parse(ParseDriver.java:166) > at org.apache.hadoop.hive.ql.Driver.compile(Driver.java:404) > at org.apache.hadoop.hive.ql.Driver.compile(Driver.java:322) > at org.apache.hadoop.hive.ql.Driver.compileInternal(Driver.java:975) > at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:1040) > at org.apache.hadoop.hive.ql.Driver.run(Driver.java:911) > at org.apache.hadoop.hive.ql.Driver.run(Driver.java:901) > at > org.apache.hadoop.hive.cli.CliDriver.processLocalCmd(CliDriver.java:275) > at org.apache.hadoop.hive.cli.CliDriver.processCmd(CliDriver.java:227) > at > org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:430) > at > org.apache.hadoop.hive.cli.CliDriver.executeDriver(CliDriver.java:803) > at org.apache.hadoop.hive.cli.CliDriver.run(CliDriver.java:697) > at org.apache.hadoop.hive.cli.CliDriver.main(CliDriver.java:636) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:606) > at org.apache.hadoop.util.RunJar.main(RunJar.java:212) > FAILED: ParseException line 1:95 cannot recognize input near '<EOF>' '<EOF>' > '<EOF>' in column name > {noformat} > --Analyze table with partition dob and nationality values specified with FOR > COLUMNS > {noformat} > hive> ANALYZE TABLE people_part PARTITION(dob='2015-10-2',nationality='IE') > COMPUTE STATISTICS FOR COLUMNS; > NoViableAltException(-1@[]) > at > org.apache.hadoop.hive.ql.parse.HiveParser_IdentifiersParser.identifier(HiveParser_IdentifiersParser.java:11627) > at > org.apache.hadoop.hive.ql.parse.HiveParser.identifier(HiveParser.java:40215) > at > org.apache.hadoop.hive.ql.parse.HiveParser.columnName(HiveParser.java:33351) > at > org.apache.hadoop.hive.ql.parse.HiveParser.columnNameList(HiveParser.java:33219) > at > org.apache.hadoop.hive.ql.parse.HiveParser.analyzeStatement(HiveParser.java:17764) > at > org.apache.hadoop.hive.ql.parse.HiveParser.ddlStatement(HiveParser.java:2369) > at > org.apache.hadoop.hive.ql.parse.HiveParser.execStatement(HiveParser.java:1398) > at > org.apache.hadoop.hive.ql.parse.HiveParser.statement(HiveParser.java:1036) > at > org.apache.hadoop.hive.ql.parse.ParseDriver.parse(ParseDriver.java:199) > at > org.apache.hadoop.hive.ql.parse.ParseDriver.parse(ParseDriver.java:166) > at org.apache.hadoop.hive.ql.Driver.compile(Driver.java:404) > at org.apache.hadoop.hive.ql.Driver.compile(Driver.java:322) > at org.apache.hadoop.hive.ql.Driver.compileInternal(Driver.java:975) > at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:1040) > at org.apache.hadoop.hive.ql.Driver.run(Driver.java:911) > at org.apache.hadoop.hive.ql.Driver.run(Driver.java:901) > at > org.apache.hadoop.hive.cli.CliDriver.processLocalCmd(CliDriver.java:275) > at org.apache.hadoop.hive.cli.CliDriver.processCmd(CliDriver.java:227) > at > org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:430) > at > org.apache.hadoop.hive.cli.CliDriver.executeDriver(CliDriver.java:803) > at org.apache.hadoop.hive.cli.CliDriver.run(CliDriver.java:697) > at org.apache.hadoop.hive.cli.CliDriver.main(CliDriver.java:636) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:606) > at org.apache.hadoop.util.RunJar.main(RunJar.java:212) > FAILED: ParseException line 1:100 cannot recognize input near '<EOF>' '<EOF>' > '<EOF>' in column name > {noformat} > --Analyze table with partition dob and nationality values specified with FOR > COLUMNS with column name specified > {noformat} > hive> ANALYZE TABLE people_part PARTITION(dob='2015-10-2',nationality='IE') > COMPUTE STATISTICS FOR COLUMNS name; > FAILED: SemanticException [Error 10004]: Line 1:92 Invalid table alias or > column reference 'IE': (possible column names are: name, address, dob, > nationality) > {noformat} -- This message was sent by Atlassian JIRA (v6.3.4#6332)