Copilot commented on code in PR #2339:
URL: https://github.com/apache/sedona/pull/2339#discussion_r2328553067


##########
spark/common/src/main/scala/org/apache/sedona/sql/datasources/geopackage/transform/DataTypesTransformations.scala:
##########
@@ -18,22 +18,50 @@
  */
 package org.apache.sedona.sql.datasources.geopackage.transform
 
-import java.time.{Instant, LocalDate}
+import java.time.{Instant, LocalDate, LocalDateTime, ZoneOffset}
 import java.time.format.DateTimeFormatter
+import java.time.format.DateTimeParseException
 import java.time.temporal.ChronoUnit
 
 object DataTypesTransformations {
-  def getDays(dateString: String): Int = {
-    val formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd")
-
-    val date = LocalDate.parse(dateString, formatter)
+  // Pre-created formatters to avoid repeated object creation
+  private val dateFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd")
+  private val datetimeFormatters = Array(
+    DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ss.SSS"), // 3 digits
+    DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ss.SS"), // 2 digits
+    DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ss.S"), // 1 digit
+    DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ss") // no milliseconds
+  )
 
+  def getDays(dateString: String): Int = {
+    val date = LocalDate.parse(dateString, dateFormatter)
     val epochDate = LocalDate.of(1970, 1, 1)
-
     ChronoUnit.DAYS.between(epochDate, date).toInt
   }
 
   def epoch(timestampStr: String): Long = {
-    Instant.parse(timestampStr).toEpochMilli
+    try {
+      // Try parsing as-is first (works for timestamps with timezone info)
+      Instant.parse(timestampStr).toEpochMilli
+    } catch {
+      case _: DateTimeParseException =>
+        // If parsing fails, try treating it as UTC (common case for 
GeoPackage)
+        // Handle various datetime formats without timezone info
+        // Try different patterns to handle various millisecond formats
+        for (formatter <- datetimeFormatters) {
+          try {
+            val localDateTime = LocalDateTime.parse(timestampStr, formatter)
+            return localDateTime.toInstant(ZoneOffset.UTC).toEpochMilli
+          } catch {
+            case _: DateTimeParseException =>
+            // Continue to next formatter
+          }
+        }
+
+        // If all formatters failed, throw a descriptive exception
+        throw new IllegalArgumentException(
+          s"Unable to parse datetime: $timestampStr. " +
+            s"Expected formats: 'yyyy-MM-ddTHH:mm:ss[.S]' or 
'yyyy-MM-ddTHH:mm:ss[.S]Z'")

Review Comment:
   [nitpick] Using early return in a for loop is not optimal for functional 
programming. Consider using `datetimeFormatters.view.map(...)` with `find` or 
`collectFirst` to make the code more functional and potentially more efficient.
   ```suggestion
           datetimeFormatters.view
             .map { formatter =>
               try {
                 Some(LocalDateTime.parse(timestampStr, 
formatter).toInstant(ZoneOffset.UTC).toEpochMilli)
               } catch {
                 case _: DateTimeParseException => None
               }
             }
             .collectFirst { case Some(epochMilli) => epochMilli }
             .getOrElse {
               // If all formatters failed, throw a descriptive exception
               throw new IllegalArgumentException(
                 s"Unable to parse datetime: $timestampStr. " +
                   s"Expected formats: 'yyyy-MM-ddTHH:mm:ss[.S]' or 
'yyyy-MM-ddTHH:mm:ss[.S]Z'")
             }
   ```



##########
spark/common/src/main/scala/org/apache/sedona/sql/datasources/geopackage/transform/DataTypesTransformations.scala:
##########
@@ -18,22 +18,50 @@
  */
 package org.apache.sedona.sql.datasources.geopackage.transform
 
-import java.time.{Instant, LocalDate}
+import java.time.{Instant, LocalDate, LocalDateTime, ZoneOffset}
 import java.time.format.DateTimeFormatter
+import java.time.format.DateTimeParseException
 import java.time.temporal.ChronoUnit
 
 object DataTypesTransformations {
-  def getDays(dateString: String): Int = {
-    val formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd")
-
-    val date = LocalDate.parse(dateString, formatter)
+  // Pre-created formatters to avoid repeated object creation
+  private val dateFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd")
+  private val datetimeFormatters = Array(
+    DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ss.SSS"), // 3 digits
+    DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ss.SS"), // 2 digits
+    DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ss.S"), // 1 digit
+    DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ss") // no milliseconds
+  )
 
+  def getDays(dateString: String): Int = {
+    val date = LocalDate.parse(dateString, dateFormatter)
     val epochDate = LocalDate.of(1970, 1, 1)
-
     ChronoUnit.DAYS.between(epochDate, date).toInt
   }
 
   def epoch(timestampStr: String): Long = {
-    Instant.parse(timestampStr).toEpochMilli
+    try {
+      // Try parsing as-is first (works for timestamps with timezone info)
+      Instant.parse(timestampStr).toEpochMilli
+    } catch {
+      case _: DateTimeParseException =>
+        // If parsing fails, try treating it as UTC (common case for 
GeoPackage)
+        // Handle various datetime formats without timezone info
+        // Try different patterns to handle various millisecond formats
+        for (formatter <- datetimeFormatters) {
+          try {
+            val localDateTime = LocalDateTime.parse(timestampStr, formatter)
+            return localDateTime.toInstant(ZoneOffset.UTC).toEpochMilli
+          } catch {
+            case _: DateTimeParseException =>
+            // Continue to next formatter
+          }
+        }
+
+        // If all formatters failed, throw a descriptive exception
+        throw new IllegalArgumentException(
+          s"Unable to parse datetime: $timestampStr. " +
+            s"Expected formats: 'yyyy-MM-ddTHH:mm:ss[.S]' or 
'yyyy-MM-ddTHH:mm:ss[.S]Z'")

Review Comment:
   The error message format examples don't accurately reflect all supported 
patterns. It shows '[.S]' but the code supports .S, .SS, and .SSS formats. 
Consider updating to: 'yyyy-MM-ddTHH:mm:ss[.SSS|.SS|.S]' or 
'yyyy-MM-ddTHH:mm:ss[.SSS|.SS|.S]Z'
   ```suggestion
               s"Expected formats: 'yyyy-MM-ddTHH:mm:ss[.SSS|.SS|.S]' or 
'yyyy-MM-ddTHH:mm:ss[.SSS|.SS|.S]Z'")
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to