[ https://issues.apache.org/jira/browse/HIVE-20970?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17290143#comment-17290143 ]
Ashish Doneriya commented on HIVE-20970: ---------------------------------------- Working fine in Hive 3.1.2 > ORC table with bloom filter fails on PPD query > ---------------------------------------------- > > Key: HIVE-20970 > URL: https://issues.apache.org/jira/browse/HIVE-20970 > Project: Hive > Issue Type: Bug > Components: File Formats, Hive, ORC > Affects Versions: 2.1.0 > Reporter: Gabriel C Balan > Priority: Minor > > I encountered this issue in hive2.1.0-cdh6.0.0. > {noformat:title=Reproducer} > drop table if exists t1; > create table t1(c1 string, c2 int) stored as orc > TBLPROPERTIES ("orc.compress"="NONE", > "orc.bloom.filter.columns"="c2"); > INSERT INTO TABLE t1 VALUES ("row 1", 1), ("row 2", 2), ("row 3", 3); > --this works fine > set hive.optimize.index.filter=false; > select * from t1 where c2=2; > --this fails > set hive.optimize.index.filter=true; > select * from t1 where c2=2; > {noformat} > These three items are essential to reproducing the issue: > # hive.optimize.index.filter=true; > # "orc.compress"="NONE" in TBLPROPERTIES > # "orc.bloom.filter.columns"="c2" in TBLPROPERTIES > That is, if any of the above mentioned items are taken out, the query will > not fail anymore. > Finally, here is the stack: > {noformat:title=Stack trace in log4j file} > java.io.IOException: java.lang.IllegalStateException: > InputStream#read(byte[]) returned invalid result: 0 > The InputStream implementation is buggy. > at > org.apache.hadoop.hive.ql.exec.FetchOperator.getNextRow(FetchOperator.java:521) > at > org.apache.hadoop.hive.ql.exec.FetchOperator.pushRow(FetchOperator.java:428) > at org.apache.hadoop.hive.ql.exec.FetchTask.fetch(FetchTask.java:146) > at org.apache.hadoop.hive.ql.Driver.getResults(Driver.java:2188) > at > org.apache.hadoop.hive.cli.CliDriver.processLocalCmd(CliDriver.java:259) > at org.apache.hadoop.hive.cli.CliDriver.processCmd(CliDriver.java:187) > at > org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:409) > at > org.apache.hadoop.hive.cli.CliDriver.executeDriver(CliDriver.java:838) > at org.apache.hadoop.hive.cli.CliDriver.run(CliDriver.java:774) > at org.apache.hadoop.hive.cli.CliDriver.main(CliDriver.java:701) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:483) > at org.apache.hadoop.util.RunJar.run(RunJar.java:313) > at org.apache.hadoop.util.RunJar.main(RunJar.java:227) > Caused by: java.lang.IllegalStateException: InputStream#read(byte[]) returned > invalid result: 0 > The InputStream implementation is buggy. > at > com.google.protobuf.CodedInputStream.refillBuffer(CodedInputStream.java:739) > at > com.google.protobuf.CodedInputStream.isAtEnd(CodedInputStream.java:701) > at > com.google.protobuf.CodedInputStream.readTag(CodedInputStream.java:99) > at org.apache.orc.OrcProto$RowIndex.<init>(OrcProto.java:7429) > at org.apache.orc.OrcProto$RowIndex.<init>(OrcProto.java:7393) > at > org.apache.orc.OrcProto$RowIndex$1.parsePartialFrom(OrcProto.java:7482) > at > org.apache.orc.OrcProto$RowIndex$1.parsePartialFrom(OrcProto.java:7477) > at > com.google.protobuf.AbstractParser.parsePartialFrom(AbstractParser.java:200) > at > com.google.protobuf.AbstractParser.parseFrom(AbstractParser.java:217) > at > com.google.protobuf.AbstractParser.parseFrom(AbstractParser.java:223) > at > com.google.protobuf.AbstractParser.parseFrom(AbstractParser.java:49) > at org.apache.orc.OrcProto$RowIndex.parseFrom(OrcProto.java:7593) > at > org.apache.orc.impl.RecordReaderUtils$DefaultDataReader.readRowIndex(RecordReaderUtils.java:138) > at > org.apache.orc.impl.RecordReaderImpl.readRowIndex(RecordReaderImpl.java:1151) > at > org.apache.orc.impl.RecordReaderImpl.readRowIndex(RecordReaderImpl.java:1134) > at > org.apache.orc.impl.RecordReaderImpl.pickRowGroups(RecordReaderImpl.java:800) > at > org.apache.orc.impl.RecordReaderImpl.readStripe(RecordReaderImpl.java:830) > at > org.apache.orc.impl.RecordReaderImpl.advanceStripe(RecordReaderImpl.java:986) > at > org.apache.orc.impl.RecordReaderImpl.advanceToNextRow(RecordReaderImpl.java:1021) > at > org.apache.orc.impl.RecordReaderImpl.<init>(RecordReaderImpl.java:215) > at > org.apache.hadoop.hive.ql.io.orc.RecordReaderImpl.<init>(RecordReaderImpl.java:63) > at > org.apache.hadoop.hive.ql.io.orc.ReaderImpl.rowsOptions(ReaderImpl.java:87) > at > org.apache.hadoop.hive.ql.io.orc.OrcInputFormat.createReaderFromFile(OrcInputFormat.java:314) > at > org.apache.hadoop.hive.ql.io.orc.OrcInputFormat$OrcRecordReader.<init>(OrcInputFormat.java:225) > at > org.apache.hadoop.hive.ql.io.orc.OrcInputFormat.getRecordReader(OrcInputFormat.java:1691) > at > org.apache.hadoop.hive.ql.exec.FetchOperator$FetchInputFormatSplit.getRecordReader(FetchOperator.java:695) > at > org.apache.hadoop.hive.ql.exec.FetchOperator.getRecordReader(FetchOperator.java:333) > at > org.apache.hadoop.hive.ql.exec.FetchOperator.getNextRow(FetchOperator.java:459) > ... 15 more > {noformat} > -- This message was sent by Atlassian Jira (v8.3.4#803005)