[ https://issues.apache.org/jira/browse/HIVE-28249?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Simhadri Govindappa updated HIVE-28249: --------------------------------------- Description: When handling legacy timezone conversions in parquet, 'February 29' year '200' is an edge case. This is because, according to this: [https://www.lanl.gov/Caesar/node202.html] The Julian day for 200 CE/02/29 in the Julian calendar is different from the Julian day in Gregorian Calendar . |Date (BC/AD)|Date (CE)|Julian Day|Julian Day| | | |(Julian Calendar)|(Gregorian Calendar)| |200 AD/02/28|200 CE/02/28|1794166|1794167| |200 AD/02/29|200 CE/02/29|1794167|1794168| |200 AD/03/01|200 CE/03/01|1794168|1794168| As a result since hive stores timestamp in UTC, when converting 200 CE/03/01 between timezones, hive runs into an exception and fails with "not a leap year exception" for 29th Feb 200 even if the actual record inserted was 200 CE/03/01 in Asia/Singapore timezone. Fullstack trace: {noformat} java.lang.RuntimeException: java.io.IOException: org.apache.parquet.io.ParquetDecodingException: Can not read value at 0 in block -1 in file file:/Users/simhadri.govindappa/Documents/apache/hive/itests/qtest/target/localfs/warehouse/test_sgt/sgt000 at org.apache.hadoop.hive.ql.exec.FetchTask.executeInner(FetchTask.java:210) at org.apache.hadoop.hive.ql.exec.FetchTask.execute(FetchTask.java:95) at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:212) at org.apache.hadoop.hive.ql.Driver.run(Driver.java:154) at org.apache.hadoop.hive.ql.Driver.run(Driver.java:149) at org.apache.hadoop.hive.ql.reexec.ReExecDriver.run(ReExecDriver.java:185) at org.apache.hadoop.hive.ql.reexec.ReExecDriver.run(ReExecDriver.java:230) at org.apache.hadoop.hive.cli.CliDriver.processLocalCmd(CliDriver.java:257) at org.apache.hadoop.hive.cli.CliDriver.processCmd1(CliDriver.java:201) at org.apache.hadoop.hive.cli.CliDriver.processCmd(CliDriver.java:127) at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:425) at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:356) at org.apache.hadoop.hive.ql.QTestUtil.executeClientInternal(QTestUtil.java:732) at org.apache.hadoop.hive.ql.QTestUtil.executeClient(QTestUtil.java:702) at org.apache.hadoop.hive.cli.control.CoreCliDriver.runTest(CoreCliDriver.java:116) at org.apache.hadoop.hive.cli.control.CliAdapter.runTest(CliAdapter.java:157) at org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver(TestMiniLlapLocalCliDriver.java:62) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:59) at org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:12) at org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:56) at org.junit.internal.runners.statements.InvokeMethod.evaluate(InvokeMethod.java:17) at org.apache.hadoop.hive.cli.control.CliAdapter$2$1.evaluate(CliAdapter.java:135) at org.junit.runners.ParentRunner$3.evaluate(ParentRunner.java:306) at org.junit.runners.BlockJUnit4ClassRunner$1.evaluate(BlockJUnit4ClassRunner.java:100) at org.junit.runners.ParentRunner.runLeaf(ParentRunner.java:366) at org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:103) at org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:63) at org.junit.runners.ParentRunner$4.run(ParentRunner.java:331) at org.junit.runners.ParentRunner$1.schedule(ParentRunner.java:79) at org.junit.runners.ParentRunner.runChildren(ParentRunner.java:329) at org.junit.runners.ParentRunner.access$100(ParentRunner.java:66) at org.junit.runners.ParentRunner$2.evaluate(ParentRunner.java:293) at org.junit.runners.ParentRunner.run(ParentRunner.java:413) at org.junit.runners.Suite.runChild(Suite.java:128) at org.junit.runners.Suite.runChild(Suite.java:27) at org.junit.runners.ParentRunner$4.run(ParentRunner.java:331) at org.junit.runners.ParentRunner$1.schedule(ParentRunner.java:79) at org.junit.runners.ParentRunner.runChildren(ParentRunner.java:329) at org.junit.runners.ParentRunner.access$100(ParentRunner.java:66) at org.junit.runners.ParentRunner$2.evaluate(ParentRunner.java:293) at org.apache.hadoop.hive.cli.control.CliAdapter$1$1.evaluate(CliAdapter.java:95) at org.junit.rules.RunRules.evaluate(RunRules.java:20) at org.junit.runners.ParentRunner$3.evaluate(ParentRunner.java:306) at org.junit.runners.ParentRunner.run(ParentRunner.java:413) at org.apache.maven.surefire.junit4.JUnit4Provider.execute(JUnit4Provider.java:365) at org.apache.maven.surefire.junit4.JUnit4Provider.executeWithRerun(JUnit4Provider.java:273) at org.apache.maven.surefire.junit4.JUnit4Provider.executeTestSet(JUnit4Provider.java:238) at org.apache.maven.surefire.junit4.JUnit4Provider.invoke(JUnit4Provider.java:159) at org.apache.maven.surefire.booter.ForkedBooter.runSuitesInProcess(ForkedBooter.java:377) at org.apache.maven.surefire.booter.ForkedBooter.execute(ForkedBooter.java:138) at org.apache.maven.surefire.booter.ForkedBooter.run(ForkedBooter.java:465) at org.apache.maven.surefire.booter.ForkedBooter.main(ForkedBooter.java:451) Caused by: java.io.IOException: org.apache.parquet.io.ParquetDecodingException: Can not read value at 0 in block -1 in file file:/Users/simhadri.govindappa/Documents/apache/hive/itests/qtest/target/localfs/warehouse/test_sgt/sgt000 at org.apache.hadoop.hive.ql.exec.FetchOperator.getNextRow(FetchOperator.java:628) at org.apache.hadoop.hive.ql.exec.FetchOperator.pushRow(FetchOperator.java:535) at org.apache.hadoop.hive.ql.exec.FetchTask.executeInner(FetchTask.java:194) ... 55 more Caused by: org.apache.parquet.io.ParquetDecodingException: Can not read value at 0 in block -1 in file file:/Users/simhadri.govindappa/Documents/apache/hive/itests/qtest/target/localfs/warehouse/test_sgt/sgt000 at org.apache.parquet.hadoop.InternalParquetRecordReader.nextKeyValue(InternalParquetRecordReader.java:264) at org.apache.parquet.hadoop.ParquetRecordReader.nextKeyValue(ParquetRecordReader.java:210) at org.apache.hadoop.hive.ql.io.parquet.read.ParquetRecordReaderWrapper.<init>(ParquetRecordReaderWrapper.java:84) at org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat.getRecordReader(MapredParquetInputFormat.java:89) at org.apache.hadoop.hive.ql.exec.FetchOperator$FetchInputFormatSplit.getRecordReader(FetchOperator.java:775) at org.apache.hadoop.hive.ql.exec.FetchOperator.getRecordReader(FetchOperator.java:339) at org.apache.hadoop.hive.ql.exec.FetchOperator.getNextRow(FetchOperator.java:566) ... 57 more Caused by: java.time.DateTimeException: Invalid date 'February 29' as '200' is not a leap year at java.time.LocalDate.create(LocalDate.java:429) at java.time.LocalDate.of(LocalDate.java:269) at java.time.LocalDateTime.of(LocalDateTime.java:361) at jodd.time.JulianDate.toLocalDateTime(JulianDate.java:344) at org.apache.hadoop.hive.ql.io.parquet.timestamp.NanoTimeUtils.getTimestamp(NanoTimeUtils.java:111) at org.apache.hadoop.hive.ql.io.parquet.convert.ETypeConverter$9$2.convert(ETypeConverter.java:782) at org.apache.hadoop.hive.ql.io.parquet.convert.ETypeConverter$9$2.convert(ETypeConverter.java:764) at org.apache.hadoop.hive.ql.io.parquet.convert.ETypeConverter$BinaryConverter.setDictionary(ETypeConverter.java:975) at org.apache.parquet.column.impl.ColumnReaderBase.<init>(ColumnReaderBase.java:415) at org.apache.parquet.column.impl.ColumnReaderImpl.<init>(ColumnReaderImpl.java:46) at org.apache.parquet.column.impl.ColumnReadStoreImpl.getColumnReader(ColumnReadStoreImpl.java:82) at org.apache.parquet.io.RecordReaderImplementation.<init>(RecordReaderImplementation.java:271) at org.apache.parquet.io.MessageColumnIO$1.visit(MessageColumnIO.java:147) at org.apache.parquet.io.MessageColumnIO$1.visit(MessageColumnIO.java:109) at org.apache.parquet.filter2.compat.FilterCompat$NoOpFilter.accept(FilterCompat.java:177) at org.apache.parquet.io.MessageColumnIO.getRecordReader(MessageColumnIO.java:109) at org.apache.parquet.hadoop.InternalParquetRecordReader.checkRead(InternalParquetRecordReader.java:141) at org.apache.parquet.hadoop.InternalParquetRecordReader.nextKeyValue(InternalParquetRecordReader.java:230) ... 63 more{noformat} was: When handling legacy timezone conversions in parquet, 'February 29' year '200' is an edge case. This is because, according to this: [https://www.lanl.gov/Caesar/node202.html] The Julian day for 200 CE/02/29 in the Julian calendar is different from the Julian day in Gregorian Calendar . |Date (BC/AD)|Date (CE)|Julian Day|Julian Day| | | |(Julian Calendar)|(Gregorian Calendar)| |200 AD/02/28|200 CE/02/28|1794166|1794167| |200 AD/02/29|200 CE/02/29|1794167|1794168| |200 AD/03/01|200 CE/03/01|1794168|1794168| As a result since hive stores timestamp in UTC, when converting 200 CE/03/01 between timezones, hive runs into an exception and fails with "not a leap year exception" for 29th Feb 200 even if the actual record inserted was 200 CE/03/01 in Asia/Singapore timezone. > Parquet legacy timezone conversion converts march 1st to 29th feb and fails > with not a leap year exception > ---------------------------------------------------------------------------------------------------------- > > Key: HIVE-28249 > URL: https://issues.apache.org/jira/browse/HIVE-28249 > Project: Hive > Issue Type: Task > Reporter: Simhadri Govindappa > Assignee: Simhadri Govindappa > Priority: Major > Labels: pull-request-available > > When handling legacy timezone conversions in parquet, 'February 29' year > '200' is an edge case. > This is because, according to this: [https://www.lanl.gov/Caesar/node202.html] > The Julian day for 200 CE/02/29 in the Julian calendar is different from the > Julian day in Gregorian Calendar . > |Date (BC/AD)|Date (CE)|Julian Day|Julian Day| > | | |(Julian Calendar)|(Gregorian Calendar)| > |200 AD/02/28|200 CE/02/28|1794166|1794167| > |200 AD/02/29|200 CE/02/29|1794167|1794168| > |200 AD/03/01|200 CE/03/01|1794168|1794168| > As a result since hive stores timestamp in UTC, when converting 200 CE/03/01 > between timezones, hive runs into an exception and fails with "not a leap > year exception" for 29th Feb 200 even if the actual record inserted was 200 > CE/03/01 in Asia/Singapore timezone. > > > Fullstack trace: > {noformat} > java.lang.RuntimeException: java.io.IOException: > org.apache.parquet.io.ParquetDecodingException: Can not read value at 0 in > block -1 in file > file:/Users/simhadri.govindappa/Documents/apache/hive/itests/qtest/target/localfs/warehouse/test_sgt/sgt000 > at > org.apache.hadoop.hive.ql.exec.FetchTask.executeInner(FetchTask.java:210) > at org.apache.hadoop.hive.ql.exec.FetchTask.execute(FetchTask.java:95) > at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:212) > at org.apache.hadoop.hive.ql.Driver.run(Driver.java:154) > at org.apache.hadoop.hive.ql.Driver.run(Driver.java:149) > at > org.apache.hadoop.hive.ql.reexec.ReExecDriver.run(ReExecDriver.java:185) > at > org.apache.hadoop.hive.ql.reexec.ReExecDriver.run(ReExecDriver.java:230) > at > org.apache.hadoop.hive.cli.CliDriver.processLocalCmd(CliDriver.java:257) > at org.apache.hadoop.hive.cli.CliDriver.processCmd1(CliDriver.java:201) > at org.apache.hadoop.hive.cli.CliDriver.processCmd(CliDriver.java:127) > at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:425) > at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:356) > at > org.apache.hadoop.hive.ql.QTestUtil.executeClientInternal(QTestUtil.java:732) > at org.apache.hadoop.hive.ql.QTestUtil.executeClient(QTestUtil.java:702) > at > org.apache.hadoop.hive.cli.control.CoreCliDriver.runTest(CoreCliDriver.java:116) > at > org.apache.hadoop.hive.cli.control.CliAdapter.runTest(CliAdapter.java:157) > at > org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver(TestMiniLlapLocalCliDriver.java:62) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:498) > at > org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:59) > at > org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:12) > at > org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:56) > at > org.junit.internal.runners.statements.InvokeMethod.evaluate(InvokeMethod.java:17) > at > org.apache.hadoop.hive.cli.control.CliAdapter$2$1.evaluate(CliAdapter.java:135) > at org.junit.runners.ParentRunner$3.evaluate(ParentRunner.java:306) > at > org.junit.runners.BlockJUnit4ClassRunner$1.evaluate(BlockJUnit4ClassRunner.java:100) > at org.junit.runners.ParentRunner.runLeaf(ParentRunner.java:366) > at > org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:103) > at > org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:63) > at org.junit.runners.ParentRunner$4.run(ParentRunner.java:331) > at org.junit.runners.ParentRunner$1.schedule(ParentRunner.java:79) > at org.junit.runners.ParentRunner.runChildren(ParentRunner.java:329) > at org.junit.runners.ParentRunner.access$100(ParentRunner.java:66) > at org.junit.runners.ParentRunner$2.evaluate(ParentRunner.java:293) > at org.junit.runners.ParentRunner.run(ParentRunner.java:413) > at org.junit.runners.Suite.runChild(Suite.java:128) > at org.junit.runners.Suite.runChild(Suite.java:27) > at org.junit.runners.ParentRunner$4.run(ParentRunner.java:331) > at org.junit.runners.ParentRunner$1.schedule(ParentRunner.java:79) > at org.junit.runners.ParentRunner.runChildren(ParentRunner.java:329) > at org.junit.runners.ParentRunner.access$100(ParentRunner.java:66) > at org.junit.runners.ParentRunner$2.evaluate(ParentRunner.java:293) > at > org.apache.hadoop.hive.cli.control.CliAdapter$1$1.evaluate(CliAdapter.java:95) > at org.junit.rules.RunRules.evaluate(RunRules.java:20) > at org.junit.runners.ParentRunner$3.evaluate(ParentRunner.java:306) > at org.junit.runners.ParentRunner.run(ParentRunner.java:413) > at > org.apache.maven.surefire.junit4.JUnit4Provider.execute(JUnit4Provider.java:365) > at > org.apache.maven.surefire.junit4.JUnit4Provider.executeWithRerun(JUnit4Provider.java:273) > at > org.apache.maven.surefire.junit4.JUnit4Provider.executeTestSet(JUnit4Provider.java:238) > at > org.apache.maven.surefire.junit4.JUnit4Provider.invoke(JUnit4Provider.java:159) > at > org.apache.maven.surefire.booter.ForkedBooter.runSuitesInProcess(ForkedBooter.java:377) > at > org.apache.maven.surefire.booter.ForkedBooter.execute(ForkedBooter.java:138) > at > org.apache.maven.surefire.booter.ForkedBooter.run(ForkedBooter.java:465) > at > org.apache.maven.surefire.booter.ForkedBooter.main(ForkedBooter.java:451) > Caused by: java.io.IOException: > org.apache.parquet.io.ParquetDecodingException: Can not read value at 0 in > block -1 in file > file:/Users/simhadri.govindappa/Documents/apache/hive/itests/qtest/target/localfs/warehouse/test_sgt/sgt000 > at > org.apache.hadoop.hive.ql.exec.FetchOperator.getNextRow(FetchOperator.java:628) > at > org.apache.hadoop.hive.ql.exec.FetchOperator.pushRow(FetchOperator.java:535) > at > org.apache.hadoop.hive.ql.exec.FetchTask.executeInner(FetchTask.java:194) > ... 55 more > Caused by: org.apache.parquet.io.ParquetDecodingException: Can not read value > at 0 in block -1 in file > file:/Users/simhadri.govindappa/Documents/apache/hive/itests/qtest/target/localfs/warehouse/test_sgt/sgt000 > at > org.apache.parquet.hadoop.InternalParquetRecordReader.nextKeyValue(InternalParquetRecordReader.java:264) > at > org.apache.parquet.hadoop.ParquetRecordReader.nextKeyValue(ParquetRecordReader.java:210) > at > org.apache.hadoop.hive.ql.io.parquet.read.ParquetRecordReaderWrapper.<init>(ParquetRecordReaderWrapper.java:84) > at > org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat.getRecordReader(MapredParquetInputFormat.java:89) > at > org.apache.hadoop.hive.ql.exec.FetchOperator$FetchInputFormatSplit.getRecordReader(FetchOperator.java:775) > at > org.apache.hadoop.hive.ql.exec.FetchOperator.getRecordReader(FetchOperator.java:339) > at > org.apache.hadoop.hive.ql.exec.FetchOperator.getNextRow(FetchOperator.java:566) > ... 57 more > Caused by: java.time.DateTimeException: Invalid date 'February 29' as '200' > is not a leap year > at java.time.LocalDate.create(LocalDate.java:429) > at java.time.LocalDate.of(LocalDate.java:269) > at java.time.LocalDateTime.of(LocalDateTime.java:361) > at jodd.time.JulianDate.toLocalDateTime(JulianDate.java:344) > at > org.apache.hadoop.hive.ql.io.parquet.timestamp.NanoTimeUtils.getTimestamp(NanoTimeUtils.java:111) > at > org.apache.hadoop.hive.ql.io.parquet.convert.ETypeConverter$9$2.convert(ETypeConverter.java:782) > at > org.apache.hadoop.hive.ql.io.parquet.convert.ETypeConverter$9$2.convert(ETypeConverter.java:764) > at > org.apache.hadoop.hive.ql.io.parquet.convert.ETypeConverter$BinaryConverter.setDictionary(ETypeConverter.java:975) > at > org.apache.parquet.column.impl.ColumnReaderBase.<init>(ColumnReaderBase.java:415) > at > org.apache.parquet.column.impl.ColumnReaderImpl.<init>(ColumnReaderImpl.java:46) > at > org.apache.parquet.column.impl.ColumnReadStoreImpl.getColumnReader(ColumnReadStoreImpl.java:82) > at > org.apache.parquet.io.RecordReaderImplementation.<init>(RecordReaderImplementation.java:271) > at org.apache.parquet.io.MessageColumnIO$1.visit(MessageColumnIO.java:147) > at org.apache.parquet.io.MessageColumnIO$1.visit(MessageColumnIO.java:109) > at > org.apache.parquet.filter2.compat.FilterCompat$NoOpFilter.accept(FilterCompat.java:177) > at > org.apache.parquet.io.MessageColumnIO.getRecordReader(MessageColumnIO.java:109) > at > org.apache.parquet.hadoop.InternalParquetRecordReader.checkRead(InternalParquetRecordReader.java:141) > at > org.apache.parquet.hadoop.InternalParquetRecordReader.nextKeyValue(InternalParquetRecordReader.java:230) > ... 63 more{noformat} > -- This message was sent by Atlassian Jira (v8.20.10#820010)