[ https://issues.apache.org/jira/browse/HIVE-26612?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17618928#comment-17618928 ]
Stamatis Zampetakis commented on HIVE-26612: -------------------------------------------- [~scarlin] I reverted HIVE-23345 locally and tried to run the test case you have in the PR but it seems that there are still errors at a different level: {noformat} java.lang.RuntimeException: org.apache.hadoop.hive.ql.metadata.HiveException: java.lang.ClassCastException: org.apache.hadoop.hive.serde2.io.TimestampWritableV2 cannot be cast to org.apache.hadoop.io.LongWritable at org.apache.hadoop.hive.ql.exec.FetchTask.executeInner(FetchTask.java:213) at org.apache.hadoop.hive.ql.exec.FetchTask.execute(FetchTask.java:98) at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:212) at org.apache.hadoop.hive.ql.Driver.run(Driver.java:154) at org.apache.hadoop.hive.ql.Driver.run(Driver.java:149) at org.apache.hadoop.hive.ql.reexec.ReExecDriver.run(ReExecDriver.java:185) at org.apache.hadoop.hive.ql.reexec.ReExecDriver.run(ReExecDriver.java:228) at org.apache.hadoop.hive.cli.CliDriver.processLocalCmd(CliDriver.java:255) at org.apache.hadoop.hive.cli.CliDriver.processCmd1(CliDriver.java:200) at org.apache.hadoop.hive.cli.CliDriver.processCmd(CliDriver.java:126) at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:421) at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:352) at org.apache.hadoop.hive.ql.QTestUtil.executeClientInternal(QTestUtil.java:727) at org.apache.hadoop.hive.ql.QTestUtil.executeClient(QTestUtil.java:697) at org.apache.hadoop.hive.cli.control.CoreCliDriver.runTest(CoreCliDriver.java:114) at org.apache.hadoop.hive.cli.control.CliAdapter.runTest(CliAdapter.java:157) at org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver(TestMiniLlapLocalCliDriver.java:62) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:59) at org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:12) at org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:56) at org.junit.internal.runners.statements.InvokeMethod.evaluate(InvokeMethod.java:17) at org.apache.hadoop.hive.cli.control.CliAdapter$2$1.evaluate(CliAdapter.java:135) at org.junit.runners.ParentRunner$3.evaluate(ParentRunner.java:306) at org.junit.runners.BlockJUnit4ClassRunner$1.evaluate(BlockJUnit4ClassRunner.java:100) at org.junit.runners.ParentRunner.runLeaf(ParentRunner.java:366) at org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:103) at org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:63) at org.junit.runners.ParentRunner$4.run(ParentRunner.java:331) at org.junit.runners.ParentRunner$1.schedule(ParentRunner.java:79) at org.junit.runners.ParentRunner.runChildren(ParentRunner.java:329) at org.junit.runners.ParentRunner.access$100(ParentRunner.java:66) at org.junit.runners.ParentRunner$2.evaluate(ParentRunner.java:293) at org.junit.runners.ParentRunner.run(ParentRunner.java:413) at org.junit.runners.Suite.runChild(Suite.java:128) at org.junit.runners.Suite.runChild(Suite.java:27) at org.junit.runners.ParentRunner$4.run(ParentRunner.java:331) at org.junit.runners.ParentRunner$1.schedule(ParentRunner.java:79) at org.junit.runners.ParentRunner.runChildren(ParentRunner.java:329) at org.junit.runners.ParentRunner.access$100(ParentRunner.java:66) at org.junit.runners.ParentRunner$2.evaluate(ParentRunner.java:293) at org.apache.hadoop.hive.cli.control.CliAdapter$1$1.evaluate(CliAdapter.java:95) at org.junit.rules.RunRules.evaluate(RunRules.java:20) at org.junit.runners.ParentRunner$3.evaluate(ParentRunner.java:306) at org.junit.runners.ParentRunner.run(ParentRunner.java:413) at org.apache.maven.surefire.junit4.JUnit4Provider.execute(JUnit4Provider.java:365) at org.apache.maven.surefire.junit4.JUnit4Provider.executeWithRerun(JUnit4Provider.java:273) at org.apache.maven.surefire.junit4.JUnit4Provider.executeTestSet(JUnit4Provider.java:238) at org.apache.maven.surefire.junit4.JUnit4Provider.invoke(JUnit4Provider.java:159) at org.apache.maven.surefire.booter.ForkedBooter.runSuitesInProcess(ForkedBooter.java:377) at org.apache.maven.surefire.booter.ForkedBooter.execute(ForkedBooter.java:138) at org.apache.maven.surefire.booter.ForkedBooter.run(ForkedBooter.java:465) at org.apache.maven.surefire.booter.ForkedBooter.main(ForkedBooter.java:451) Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: java.lang.ClassCastException: org.apache.hadoop.hive.serde2.io.TimestampWritableV2 cannot be cast to org.apache.hadoop.io.LongWritable at org.apache.hadoop.hive.ql.exec.ListSinkOperator.process(ListSinkOperator.java:98) at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:888) at org.apache.hadoop.hive.ql.exec.SelectOperator.process(SelectOperator.java:94) at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:888) at org.apache.hadoop.hive.ql.exec.TableScanOperator.process(TableScanOperator.java:173) at org.apache.hadoop.hive.ql.exec.FetchOperator.pushRow(FetchOperator.java:541) at org.apache.hadoop.hive.ql.exec.FetchOperator.pushRow(FetchOperator.java:533) at org.apache.hadoop.hive.ql.exec.FetchTask.executeInner(FetchTask.java:197) ... 55 more Caused by: java.lang.ClassCastException: org.apache.hadoop.hive.serde2.io.TimestampWritableV2 cannot be cast to org.apache.hadoop.io.LongWritable at org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableLongObjectInspector.get(WritableLongObjectInspector.java:36) at org.apache.hadoop.hive.serde2.lazy.LazyUtils.writePrimitiveUTF8(LazyUtils.java:258) at org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.serialize(LazySimpleSerDe.java:308) at org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.serializeField(LazySimpleSerDe.java:263) at org.apache.hadoop.hive.serde2.DelimitedJSONSerDe.serializeField(DelimitedJSONSerDe.java:72) at org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.doSerialize(LazySimpleSerDe.java:247) at org.apache.hadoop.hive.serde2.AbstractEncodingAwareSerDe.serialize(AbstractEncodingAwareSerDe.java:53) at org.apache.hadoop.hive.serde2.DefaultFetchFormatter.convert(DefaultFetchFormatter.java:67) at org.apache.hadoop.hive.serde2.DefaultFetchFormatter.convert(DefaultFetchFormatter.java:36) at org.apache.hadoop.hive.ql.exec.ListSinkOperator.process(ListSinkOperator.java:94) ... 62 more {noformat} Due to this I am skeptical about if HIVE-23345 broke this functionality. Did this ever work? > Hive cannot read parquet files with int64 (TIMESTAMP_MILLIS) > ------------------------------------------------------------ > > Key: HIVE-26612 > URL: https://issues.apache.org/jira/browse/HIVE-26612 > Project: Hive > Issue Type: Bug > Components: Database/Schema > Reporter: Steve Carlin > Priority: Major > Labels: pull-request-available > Time Spent: 0.5h > Remaining Estimate: 0h > > If a parquet file has a Type of "int64 eventtime (TIMESTAMP(MILLIS,true))", > the following error is produced: > {noformat} > java.lang.RuntimeException: java.io.IOException: > org.apache.parquet.io.ParquetDecodingException: Can not read value at 1 in > block 0 in file > file:/xxxx/hive/itests/qtest/target/tmp/parquet_format_ts_as_bigint/part-00000/timestamp_as_bigint.parquet > at > org.apache.hadoop.hive.ql.exec.FetchTask.executeInner(FetchTask.java:213) > at org.apache.hadoop.hive.ql.exec.FetchTask.execute(FetchTask.java:98) > at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:212) > at org.apache.hadoop.hive.ql.Driver.run(Driver.java:154) > at org.apache.hadoop.hive.ql.Driver.run(Driver.java:149) > Caused by: java.io.IOException: > org.apache.parquet.io.ParquetDecodingException: Can not read value at 1 in > block 0 in file > file:/xxxx/hive/itests/qtest/target/tmp/parquet_format_ts_as_bigint/part-00000/timestamp_as_bigint.parquet > at > org.apache.hadoop.hive.ql.exec.FetchOperator.getNextRow(FetchOperator.java:624) > at > org.apache.hadoop.hive.ql.exec.FetchOperator.pushRow(FetchOperator.java:531) > at > org.apache.hadoop.hive.ql.exec.FetchTask.executeInner(FetchTask.java:197) > ... 55 more > Caused by: org.apache.parquet.io.ParquetDecodingException: Can not read value > at 1 in block 0 in file > file:/home/stamatis/Projects/Apache/hive/itests/qtest/target/tmp/parquet_format_ts_as_bigint/part-00000/timestamp_as_bigint.parquet > at > org.apache.parquet.hadoop.InternalParquetRecordReader.nextKeyValue(InternalParquetRecordReader.java:255) > at > org.apache.parquet.hadoop.ParquetRecordReader.nextKeyValue(ParquetRecordReader.java:207) > at > org.apache.hadoop.hive.ql.io.parquet.read.ParquetRecordReaderWrapper.<init>(ParquetRecordReaderWrapper.java:87) > at > org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat.getRecordReader(MapredParquetInputFormat.java:89) > at > org.apache.hadoop.hive.ql.exec.FetchOperator$FetchInputFormatSplit.getRecordReader(FetchOperator.java:771) > at > org.apache.hadoop.hive.ql.exec.FetchOperator.getRecordReader(FetchOperator.java:335) > at > org.apache.hadoop.hive.ql.exec.FetchOperator.getNextRow(FetchOperator.java:562) > ... 57 more > Caused by: java.lang.UnsupportedOperationException: > org.apache.hadoop.hive.ql.io.parquet.convert.ETypeConverter$10$1 > at > org.apache.parquet.io.api.PrimitiveConverter.addLong(PrimitiveConverter.java:105) > at > org.apache.parquet.column.impl.ColumnReaderBase$2$4.writeValue(ColumnReaderBase.java:301) > at > org.apache.parquet.column.impl.ColumnReaderBase.writeCurrentValueToConverter(ColumnReaderBase.java:410) > at > org.apache.parquet.column.impl.ColumnReaderImpl.writeCurrentValueToConverter(ColumnReaderImpl.java:30) > at > org.apache.parquet.io.RecordReaderImplementation.read(RecordReaderImplementation.java:406) > at > org.apache.parquet.hadoop.InternalParquetRecordReader.nextKeyValue(InternalParquetRecordReader.java:230) > ... 63 more > {noformat} > The parquet file can be created with the following steps (through spark): > spark.conf.set("spark.sql.parquet.outputTimestampType", "TIMESTAMP_MILLIS") > spark.conf.set("spark.sql.legacy.parquet.int96RebaseModeInWrite", "LEGACY") > spark.conf.set("spark.sql.legacy.parquet.datetimeRebaseModeInWrite", "LEGACY") > spark.conf.set("spark.sql.legacy.parquet.int96RebaseModeInRead", "LEGACY") > spark.conf.set("spark.sql.legacy.parquet.datetimeRebaseModeInRead", "LEGACY") > [1] > val df = Seq( > (1, Timestamp.valueOf("2014-01-01 23:00:01")), > (1, Timestamp.valueOf("2014-11-30 12:40:32")), > (2, Timestamp.valueOf("2016-12-29 09:54:00")), > (2, Timestamp.valueOf("2016-05-09 10:12:43")) > ).toDF("typeid","eventtime") > [2] > [root@c4839-node3 test_parquet2]# parquet-tools schema > part-00001-6c90b794-90b9-4cc0-afc5-2e49a4e96bad-c000.snappy.parquet > message spark_schema { > required int32 typeid; > optional int64 eventtime (TIMESTAMP(MILLIS,true)); > } > [3] > [root@c4839-node3 test_parquet1]# parquet-tools schema > part-00001-cb1aeebb-ec87-4273-82ec-911c4fb605b6-c000.snappy.parquet > message spark_schema { > required int32 typeid; > optional int96 eventtime; > } -- This message was sent by Atlassian Jira (v8.20.10#820010)