[ https://issues.apache.org/jira/browse/HIVE-14650?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
tartarus updated HIVE-14650: ---------------------------- Description: When SELECTing from a Hive ORC table, the following IndexOutOfBoundsException is thrown if the underlying ORC file has 4 or more columns than the Hive schema (where N is the number of columns in the ORC file). {noformat} Failed with exception java.io.IOException:java.lang.IndexOutOfBoundsException: toIndex = N 16/08/25 15:22:19 ERROR CliDriver: Failed with exception java.io.IOException:java.lang.IndexOutOfBoundsException: toIndex = N java.io.IOException: java.lang.IndexOutOfBoundsException: toIndex = N at org.apache.hadoop.hive.ql.exec.FetchOperator.getNextRow(FetchOperator.java:507) at org.apache.hadoop.hive.ql.exec.FetchOperator.pushRow(FetchOperator.java:414) at org.apache.hadoop.hive.ql.exec.FetchTask.fetch(FetchTask.java:140) at org.apache.hadoop.hive.ql.Driver.getResults(Driver.java:1686) at org.apache.hadoop.hive.cli.CliDriver.processLocalCmd(CliDriver.java:233) at org.apache.hadoop.hive.cli.CliDriver.processCmd(CliDriver.java:165) at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:376) at org.apache.hadoop.hive.cli.CliDriver.executeDriver(CliDriver.java:736) at org.apache.hadoop.hive.cli.CliDriver.run(CliDriver.java:681) at org.apache.hadoop.hive.cli.CliDriver.main(CliDriver.java:621) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at org.apache.hadoop.util.RunJar.run(RunJar.java:221) at org.apache.hadoop.util.RunJar.main(RunJar.java:136) Caused by: java.lang.IndexOutOfBoundsException: toIndex = 6 at java.util.ArrayList.subListRangeCheck(ArrayList.java:1004) at java.util.ArrayList.subList(ArrayList.java:996) at org.apache.hadoop.hive.ql.io.orc.RecordReaderFactory.getSchemaOnRead(RecordReaderFactory.java:161) at org.apache.hadoop.hive.ql.io.orc.RecordReaderFactory.createTreeReader(RecordReaderFactory.java:66) at org.apache.hadoop.hive.ql.io.orc.RecordReaderImpl.<init>(RecordReaderImpl.java:202) at org.apache.hadoop.hive.ql.io.orc.ReaderImpl.rowsOptions(ReaderImpl.java:541) at org.apache.hadoop.hive.ql.io.orc.OrcRawRecordMerger$ReaderPair.<init>(OrcRawRecordMerger.java:183) at org.apache.hadoop.hive.ql.io.orc.OrcRawRecordMerger$OriginalReaderPair.<init>(OrcRawRecordMerger.java:226) at org.apache.hadoop.hive.ql.io.orc.OrcRawRecordMerger.<init>(OrcRawRecordMerger.java:437) at org.apache.hadoop.hive.ql.io.orc.OrcInputFormat.getReader(OrcInputFormat.java:1216) at org.apache.hadoop.hive.ql.io.orc.OrcInputFormat.getRecordReader(OrcInputFormat.java:1113) at org.apache.hadoop.hive.ql.exec.FetchOperator$FetchInputFormatSplit.getRecordReader(FetchOperator.java:673) at org.apache.hadoop.hive.ql.exec.FetchOperator.getRecordReader(FetchOperator.java:323) at org.apache.hadoop.hive.ql.exec.FetchOperator.getNextRow(FetchOperator.java:445) ... 15 more {noformat} This error appears to be related to the patch of HIVE-10591. Steps to reproduce (Hive QL): {noformat} DROP TABLE IF EXISTS orc_drop_column; CREATE TABLE orc_drop_column (`id` int, `name` string, `description` string, `somevalue` double, `someflag` boolean, `somedate` timestamp) STORED AS ORC; INSERT INTO TABLE orc_drop_column select * from (select 1, 'my_name', 'my_desc', 5.5, true, '2016-08-25 06:00:00') a; ALTER TABLE orc_drop_column SET SERDE 'org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe'; ALTER TABLE orc_drop_column REPLACE COLUMNS ( `id` int, `name` string ); ALTER TABLE orc_drop_column SET SERDE 'org.apache.hadoop.hive.ql.io.orc.OrcSerde'; SELECT id, name FROM orc_drop_column; {noformat} was: When SELECTing from a Hive ORC table, the following IndexOutOfBoundsException is thrown if the underlying ORC file has 4 or more columns than the Hive schema (where N is the number of columns in the ORC file). {noformat} Failed with exception java.io.IOException:java.lang.IndexOutOfBoundsException: toIndex = N 16/08/25 15:22:19 ERROR CliDriver: Failed with exception java.io.IOException:java.lang.IndexOutOfBoundsException: toIndex = N java.io.IOException: java.lang.IndexOutOfBoundsException: toIndex = N at org.apache.hadoop.hive.ql.exec.FetchOperator.getNextRow(FetchOperator.java:507) at org.apache.hadoop.hive.ql.exec.FetchOperator.pushRow(FetchOperator.java:414) at org.apache.hadoop.hive.ql.exec.FetchTask.fetch(FetchTask.java:140) at org.apache.hadoop.hive.ql.Driver.getResults(Driver.java:1686) at org.apache.hadoop.hive.cli.CliDriver.processLocalCmd(CliDriver.java:233) at org.apache.hadoop.hive.cli.CliDriver.processCmd(CliDriver.java:165) at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:376) at org.apache.hadoop.hive.cli.CliDriver.executeDriver(CliDriver.java:736) at org.apache.hadoop.hive.cli.CliDriver.run(CliDriver.java:681) at org.apache.hadoop.hive.cli.CliDriver.main(CliDriver.java:621) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at org.apache.hadoop.util.RunJar.run(RunJar.java:221) at org.apache.hadoop.util.RunJar.main(RunJar.java:136) Caused by: java.lang.IndexOutOfBoundsException: toIndex = 6 at java.util.ArrayList.subListRangeCheck(ArrayList.java:1004) at java.util.ArrayList.subList(ArrayList.java:996) at org.apache.hadoop.hive.ql.io.orc.RecordReaderFactory.getSchemaOnRead(RecordReaderFactory.java:161) at org.apache.hadoop.hive.ql.io.orc.RecordReaderFactory.createTreeReader(RecordReaderFactory.java:66) at org.apache.hadoop.hive.ql.io.orc.RecordReaderImpl.<init>(RecordReaderImpl.java:202) at org.apache.hadoop.hive.ql.io.orc.ReaderImpl.rowsOptions(ReaderImpl.java:541) at org.apache.hadoop.hive.ql.io.orc.OrcRawRecordMerger$ReaderPair.<init>(OrcRawRecordMerger.java:183) at org.apache.hadoop.hive.ql.io.orc.OrcRawRecordMerger$OriginalReaderPair.<init>(OrcRawRecordMerger.java:226) at org.apache.hadoop.hive.ql.io.orc.OrcRawRecordMerger.<init>(OrcRawRecordMerger.java:437) at org.apache.hadoop.hive.ql.io.orc.OrcInputFormat.getReader(OrcInputFormat.java:1216) at org.apache.hadoop.hive.ql.io.orc.OrcInputFormat.getRecordReader(OrcInputFormat.java:1113) at org.apache.hadoop.hive.ql.exec.FetchOperator$FetchInputFormatSplit.getRecordReader(FetchOperator.java:673) at org.apache.hadoop.hive.ql.exec.FetchOperator.getRecordReader(FetchOperator.java:323) at org.apache.hadoop.hive.ql.exec.FetchOperator.getNextRow(FetchOperator.java:445) ... 15 more {noformat} This error appears to be related to the patch of HIVE-10591. Steps to reproduce (Hive QL): {noformat} DROP TABLE IF EXISTS orc_drop_column; CREATE TABLE orc_drop_column (`id` int, `name` string, `description` string, `somevalue` double, `someflag` boolean, `somedate` timestamp) STORED AS ORC; INSERT INTO TABLE orc_drop_column select * from (select 1, 'my_name', 'my_desc', 5.5, true, '2016-08-25 06:00:00') a; ALTER TABLE orc_drop_column SET SERDE 'org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe'; ALTER TABLE orc_drop_column REPLACE COLUMNS ( `id` int, `name` string ); ALTER TABLE orc_drop_column SET SERDE 'org.apache.hadoop.hive.ql.io.orc.OrcSerde'; SELECT id, name FROM orc_drop_column; {noformat} > Select fails when ORC file has more columns than table schema > ------------------------------------------------------------- > > Key: HIVE-14650 > URL: https://issues.apache.org/jira/browse/HIVE-14650 > Project: Hive > Issue Type: Bug > Affects Versions: 1.2.1 > Reporter: Jeff Mink > Priority: Minor > > When SELECTing from a Hive ORC table, the following IndexOutOfBoundsException > is thrown if the underlying ORC file has 4 or more columns than the Hive > schema (where N is the number of columns in the ORC file). > {noformat} > Failed with exception > java.io.IOException:java.lang.IndexOutOfBoundsException: toIndex = N > 16/08/25 15:22:19 ERROR CliDriver: Failed with exception > java.io.IOException:java.lang.IndexOutOfBoundsException: toIndex = N > java.io.IOException: java.lang.IndexOutOfBoundsException: toIndex = N > at > org.apache.hadoop.hive.ql.exec.FetchOperator.getNextRow(FetchOperator.java:507) > at > org.apache.hadoop.hive.ql.exec.FetchOperator.pushRow(FetchOperator.java:414) > at org.apache.hadoop.hive.ql.exec.FetchTask.fetch(FetchTask.java:140) > at org.apache.hadoop.hive.ql.Driver.getResults(Driver.java:1686) > at > org.apache.hadoop.hive.cli.CliDriver.processLocalCmd(CliDriver.java:233) > at org.apache.hadoop.hive.cli.CliDriver.processCmd(CliDriver.java:165) > at > org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:376) > at > org.apache.hadoop.hive.cli.CliDriver.executeDriver(CliDriver.java:736) > at org.apache.hadoop.hive.cli.CliDriver.run(CliDriver.java:681) > at org.apache.hadoop.hive.cli.CliDriver.main(CliDriver.java:621) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:498) > at org.apache.hadoop.util.RunJar.run(RunJar.java:221) > at org.apache.hadoop.util.RunJar.main(RunJar.java:136) > Caused by: java.lang.IndexOutOfBoundsException: toIndex = 6 > at java.util.ArrayList.subListRangeCheck(ArrayList.java:1004) > at java.util.ArrayList.subList(ArrayList.java:996) > at > org.apache.hadoop.hive.ql.io.orc.RecordReaderFactory.getSchemaOnRead(RecordReaderFactory.java:161) > at > org.apache.hadoop.hive.ql.io.orc.RecordReaderFactory.createTreeReader(RecordReaderFactory.java:66) > at > org.apache.hadoop.hive.ql.io.orc.RecordReaderImpl.<init>(RecordReaderImpl.java:202) > at > org.apache.hadoop.hive.ql.io.orc.ReaderImpl.rowsOptions(ReaderImpl.java:541) > at > org.apache.hadoop.hive.ql.io.orc.OrcRawRecordMerger$ReaderPair.<init>(OrcRawRecordMerger.java:183) > at > org.apache.hadoop.hive.ql.io.orc.OrcRawRecordMerger$OriginalReaderPair.<init>(OrcRawRecordMerger.java:226) > at > org.apache.hadoop.hive.ql.io.orc.OrcRawRecordMerger.<init>(OrcRawRecordMerger.java:437) > at > org.apache.hadoop.hive.ql.io.orc.OrcInputFormat.getReader(OrcInputFormat.java:1216) > at > org.apache.hadoop.hive.ql.io.orc.OrcInputFormat.getRecordReader(OrcInputFormat.java:1113) > at > org.apache.hadoop.hive.ql.exec.FetchOperator$FetchInputFormatSplit.getRecordReader(FetchOperator.java:673) > at > org.apache.hadoop.hive.ql.exec.FetchOperator.getRecordReader(FetchOperator.java:323) > at > org.apache.hadoop.hive.ql.exec.FetchOperator.getNextRow(FetchOperator.java:445) > ... 15 more > {noformat} > This error appears to be related to the patch of HIVE-10591. > Steps to reproduce (Hive QL): > {noformat} > DROP TABLE IF EXISTS orc_drop_column; > CREATE TABLE orc_drop_column (`id` int, `name` string, `description` string, > `somevalue` double, `someflag` boolean, `somedate` timestamp) STORED AS ORC; > INSERT INTO TABLE orc_drop_column select * from (select 1, 'my_name', > 'my_desc', 5.5, true, '2016-08-25 06:00:00') a; > ALTER TABLE orc_drop_column SET SERDE > 'org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe'; > ALTER TABLE orc_drop_column REPLACE COLUMNS ( > `id` int, > `name` string > ); > ALTER TABLE orc_drop_column SET SERDE > 'org.apache.hadoop.hive.ql.io.orc.OrcSerde'; > SELECT id, name FROM orc_drop_column; > {noformat} > -- This message was sent by Atlassian JIRA (v7.6.3#76005)