This is an automated email from the ASF dual-hosted git repository.

jiayu pushed a commit to branch geopackage-bug
in repository https://gitbox.apache.org/repos/asf/sedona.git

commit a6362240e29899d57188f456be47989f2b113eda
Author: Jia Yu <[email protected]>
AuthorDate: Sat Sep 6 23:43:48 2025 -0700

    Fix the DateTimeParseException
---
 .../transform/DataTypesTransformations.scala       | 47 +++++++++++++++++++-
 .../apache/sedona/sql/GeoPackageReaderTest.scala   | 51 ++++++++++++++++++++--
 .../apache/sedona/sql/GeoPackageReaderTest.scala   | 44 +++++++++++++++++++
 .../apache/sedona/sql/GeoPackageReaderTest.scala   | 44 +++++++++++++++++++
 4 files changed, 181 insertions(+), 5 deletions(-)

diff --git 
a/spark/common/src/main/scala/org/apache/sedona/sql/datasources/geopackage/transform/DataTypesTransformations.scala
 
b/spark/common/src/main/scala/org/apache/sedona/sql/datasources/geopackage/transform/DataTypesTransformations.scala
index 9a23f0a088..2207194157 100644
--- 
a/spark/common/src/main/scala/org/apache/sedona/sql/datasources/geopackage/transform/DataTypesTransformations.scala
+++ 
b/spark/common/src/main/scala/org/apache/sedona/sql/datasources/geopackage/transform/DataTypesTransformations.scala
@@ -18,8 +18,9 @@
  */
 package org.apache.sedona.sql.datasources.geopackage.transform
 
-import java.time.{Instant, LocalDate}
+import java.time.{Instant, LocalDate, LocalDateTime, ZoneOffset}
 import java.time.format.DateTimeFormatter
+import java.time.format.DateTimeParseException
 import java.time.temporal.ChronoUnit
 
 object DataTypesTransformations {
@@ -34,6 +35,48 @@ object DataTypesTransformations {
   }
 
   def epoch(timestampStr: String): Long = {
-    Instant.parse(timestampStr).toEpochMilli
+    try {
+      // Try parsing as-is first (works for timestamps with timezone info)
+      Instant.parse(timestampStr).toEpochMilli
+    } catch {
+      case _: DateTimeParseException =>
+        // If parsing fails, try treating it as UTC (common case for 
GeoPackage)
+        try {
+          // Handle various datetime formats without timezone info
+          // Try different patterns to handle various millisecond formats
+          val patterns = Array(
+            "yyyy-MM-dd'T'HH:mm:ss.SSS", // 3 digits
+            "yyyy-MM-dd'T'HH:mm:ss.SS", // 2 digits
+            "yyyy-MM-dd'T'HH:mm:ss.S", // 1 digit
+            "yyyy-MM-dd'T'HH:mm:ss" // no milliseconds
+          )
+
+          var localDateTime: LocalDateTime = null
+          var lastException: DateTimeParseException = null
+
+          for (pattern <- patterns) {
+            try {
+              val formatter = DateTimeFormatter.ofPattern(pattern)
+              localDateTime = LocalDateTime.parse(timestampStr, formatter)
+              lastException = null
+            } catch {
+              case e: DateTimeParseException =>
+                lastException = e
+            }
+          }
+
+          if (localDateTime != null) {
+            localDateTime.toInstant(ZoneOffset.UTC).toEpochMilli
+          } else {
+            throw lastException
+          }
+        } catch {
+          case e: DateTimeParseException =>
+            throw new IllegalArgumentException(
+              s"Unable to parse datetime: $timestampStr. " +
+                s"Expected formats: 'yyyy-MM-ddTHH:mm:ss[.S]' or 
'yyyy-MM-ddTHH:mm:ss[.S]Z'",
+              e)
+        }
+    }
   }
 }
diff --git 
a/spark/spark-3.4/src/test/scala/org/apache/sedona/sql/GeoPackageReaderTest.scala
 
b/spark/spark-3.4/src/test/scala/org/apache/sedona/sql/GeoPackageReaderTest.scala
index ee9931cbf4..61b78e2c56 100644
--- 
a/spark/spark-3.4/src/test/scala/org/apache/sedona/sql/GeoPackageReaderTest.scala
+++ 
b/spark/spark-3.4/src/test/scala/org/apache/sedona/sql/GeoPackageReaderTest.scala
@@ -18,11 +18,11 @@
  */
 package org.apache.sedona.sql
 
-import io.minio.{MakeBucketArgs, MinioClient, PutObjectArgs}
-import org.apache.spark.sql.{DataFrame, SparkSession}
+import io.minio.{MakeBucketArgs, MinioClient}
+import org.apache.spark.sql.DataFrame
 import org.apache.spark.sql.functions.expr
 import org.apache.spark.sql.sedona_sql.UDT.GeometryUDT
-import org.apache.spark.sql.types.{BinaryType, BooleanType, DateType, 
DoubleType, IntegerType, StringType, StructField, StructType, TimestampType}
+import org.apache.spark.sql.types._
 import org.scalatest.matchers.should.Matchers
 import org.scalatest.prop.TableDrivenPropertyChecks._
 import org.testcontainers.containers.MinIOContainer
@@ -38,6 +38,7 @@ class GeoPackageReaderTest extends TestBaseScala with 
Matchers {
   val path: String = resourceFolder + "geopackage/example.gpkg"
   val polygonsPath: String = resourceFolder + "geopackage/features.gpkg"
   val rasterPath: String = resourceFolder + "geopackage/raster.gpkg"
+  val datetimePath: String = resourceFolder + 
"geopackage/test_datetime_issue.gpkg"
   val wktReader = new org.locationtech.jts.io.WKTReader()
   val wktWriter = new org.locationtech.jts.io.WKTWriter()
 
@@ -168,6 +169,50 @@ class GeoPackageReaderTest extends TestBaseScala with 
Matchers {
         df.count() shouldEqual expectedCount
       }
     }
+
+    it("should handle datetime fields without timezone information") {
+      // This test verifies the fix for DateTimeParseException when reading
+      // GeoPackage files with datetime fields that don't include timezone info
+      val testFilePath = resourceFolder + "geopackage/test_datetime_issue.gpkg"
+
+      // Test reading the test_features table with problematic datetime formats
+      val df = sparkSession.read
+        .format("geopackage")
+        .option("tableName", "test_features")
+        .load(testFilePath)
+
+      // The test should not throw DateTimeParseException when reading 
datetime fields
+      noException should be thrownBy {
+        df.select("created_at", "updated_at").collect()
+      }
+
+      // Verify that datetime fields are properly parsed as TimestampType
+      df.schema.fields.find(_.name == "created_at").get.dataType shouldEqual 
TimestampType
+      df.schema.fields.find(_.name == "updated_at").get.dataType shouldEqual 
TimestampType
+
+      // Verify that we can read the datetime values
+      val datetimeValues = df.select("created_at", "updated_at").collect()
+      datetimeValues should not be empty
+
+      // Verify that datetime values are valid timestamps
+      datetimeValues.foreach { row =>
+        val createdTimestamp = row.getAs[Timestamp]("created_at")
+        val updatedTimestamp = row.getAs[Timestamp]("updated_at")
+        createdTimestamp should not be null
+        updatedTimestamp should not be null
+        createdTimestamp.getTime should be > 0L
+        updatedTimestamp.getTime should be > 0L
+      }
+
+      // Test showMetadata option with the same file
+      noException should be thrownBy {
+        val metadataDf = sparkSession.read
+          .format("geopackage")
+          .option("showMetadata", "true")
+          .load(testFilePath)
+        metadataDf.select("last_change").collect()
+      }
+    }
   }
 
   describe("GeoPackage Raster Data Test") {
diff --git 
a/spark/spark-3.5/src/test/scala/org/apache/sedona/sql/GeoPackageReaderTest.scala
 
b/spark/spark-3.5/src/test/scala/org/apache/sedona/sql/GeoPackageReaderTest.scala
index 9de19c3c48..6d9f41bf4e 100644
--- 
a/spark/spark-3.5/src/test/scala/org/apache/sedona/sql/GeoPackageReaderTest.scala
+++ 
b/spark/spark-3.5/src/test/scala/org/apache/sedona/sql/GeoPackageReaderTest.scala
@@ -168,6 +168,50 @@ class GeoPackageReaderTest extends TestBaseScala with 
Matchers {
         df.count() shouldEqual expectedCount
       }
     }
+
+    it("should handle datetime fields without timezone information") {
+      // This test verifies the fix for DateTimeParseException when reading
+      // GeoPackage files with datetime fields that don't include timezone info
+      val testFilePath = resourceFolder + "geopackage/test_datetime_issue.gpkg"
+
+      // Test reading the test_features table with problematic datetime formats
+      val df = sparkSession.read
+        .format("geopackage")
+        .option("tableName", "test_features")
+        .load(testFilePath)
+
+      // The test should not throw DateTimeParseException when reading 
datetime fields
+      noException should be thrownBy {
+        df.select("created_at", "updated_at").collect()
+      }
+
+      // Verify that datetime fields are properly parsed as TimestampType
+      df.schema.fields.find(_.name == "created_at").get.dataType shouldEqual 
TimestampType
+      df.schema.fields.find(_.name == "updated_at").get.dataType shouldEqual 
TimestampType
+
+      // Verify that we can read the datetime values
+      val datetimeValues = df.select("created_at", "updated_at").collect()
+      datetimeValues should not be empty
+
+      // Verify that datetime values are valid timestamps
+      datetimeValues.foreach { row =>
+        val createdTimestamp = row.getAs[Timestamp]("created_at")
+        val updatedTimestamp = row.getAs[Timestamp]("updated_at")
+        createdTimestamp should not be null
+        updatedTimestamp should not be null
+        createdTimestamp.getTime should be > 0L
+        updatedTimestamp.getTime should be > 0L
+      }
+
+      // Test showMetadata option with the same file
+      noException should be thrownBy {
+        val metadataDf = sparkSession.read
+          .format("geopackage")
+          .option("showMetadata", "true")
+          .load(testFilePath)
+        metadataDf.select("last_change").collect()
+      }
+    }
   }
 
   describe("GeoPackage Raster Data Test") {
diff --git 
a/spark/spark-4.0/src/test/scala/org/apache/sedona/sql/GeoPackageReaderTest.scala
 
b/spark/spark-4.0/src/test/scala/org/apache/sedona/sql/GeoPackageReaderTest.scala
index 9de19c3c48..6d9f41bf4e 100644
--- 
a/spark/spark-4.0/src/test/scala/org/apache/sedona/sql/GeoPackageReaderTest.scala
+++ 
b/spark/spark-4.0/src/test/scala/org/apache/sedona/sql/GeoPackageReaderTest.scala
@@ -168,6 +168,50 @@ class GeoPackageReaderTest extends TestBaseScala with 
Matchers {
         df.count() shouldEqual expectedCount
       }
     }
+
+    it("should handle datetime fields without timezone information") {
+      // This test verifies the fix for DateTimeParseException when reading
+      // GeoPackage files with datetime fields that don't include timezone info
+      val testFilePath = resourceFolder + "geopackage/test_datetime_issue.gpkg"
+
+      // Test reading the test_features table with problematic datetime formats
+      val df = sparkSession.read
+        .format("geopackage")
+        .option("tableName", "test_features")
+        .load(testFilePath)
+
+      // The test should not throw DateTimeParseException when reading 
datetime fields
+      noException should be thrownBy {
+        df.select("created_at", "updated_at").collect()
+      }
+
+      // Verify that datetime fields are properly parsed as TimestampType
+      df.schema.fields.find(_.name == "created_at").get.dataType shouldEqual 
TimestampType
+      df.schema.fields.find(_.name == "updated_at").get.dataType shouldEqual 
TimestampType
+
+      // Verify that we can read the datetime values
+      val datetimeValues = df.select("created_at", "updated_at").collect()
+      datetimeValues should not be empty
+
+      // Verify that datetime values are valid timestamps
+      datetimeValues.foreach { row =>
+        val createdTimestamp = row.getAs[Timestamp]("created_at")
+        val updatedTimestamp = row.getAs[Timestamp]("updated_at")
+        createdTimestamp should not be null
+        updatedTimestamp should not be null
+        createdTimestamp.getTime should be > 0L
+        updatedTimestamp.getTime should be > 0L
+      }
+
+      // Test showMetadata option with the same file
+      noException should be thrownBy {
+        val metadataDf = sparkSession.read
+          .format("geopackage")
+          .option("showMetadata", "true")
+          .load(testFilePath)
+        metadataDf.select("last_change").collect()
+      }
+    }
   }
 
   describe("GeoPackage Raster Data Test") {

Reply via email to