Copilot commented on code in PR #2339:
URL: https://github.com/apache/sedona/pull/2339#discussion_r2328553067
##########
spark/common/src/main/scala/org/apache/sedona/sql/datasources/geopackage/transform/DataTypesTransformations.scala:
##########
@@ -18,22 +18,50 @@
*/
package org.apache.sedona.sql.datasources.geopackage.transform
-import java.time.{Instant, LocalDate}
+import java.time.{Instant, LocalDate, LocalDateTime, ZoneOffset}
import java.time.format.DateTimeFormatter
+import java.time.format.DateTimeParseException
import java.time.temporal.ChronoUnit
object DataTypesTransformations {
- def getDays(dateString: String): Int = {
- val formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd")
-
- val date = LocalDate.parse(dateString, formatter)
+ // Pre-created formatters to avoid repeated object creation
+ private val dateFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd")
+ private val datetimeFormatters = Array(
+ DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ss.SSS"), // 3 digits
+ DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ss.SS"), // 2 digits
+ DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ss.S"), // 1 digit
+ DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ss") // no milliseconds
+ )
+ def getDays(dateString: String): Int = {
+ val date = LocalDate.parse(dateString, dateFormatter)
val epochDate = LocalDate.of(1970, 1, 1)
-
ChronoUnit.DAYS.between(epochDate, date).toInt
}
def epoch(timestampStr: String): Long = {
- Instant.parse(timestampStr).toEpochMilli
+ try {
+ // Try parsing as-is first (works for timestamps with timezone info)
+ Instant.parse(timestampStr).toEpochMilli
+ } catch {
+ case _: DateTimeParseException =>
+ // If parsing fails, try treating it as UTC (common case for
GeoPackage)
+ // Handle various datetime formats without timezone info
+ // Try different patterns to handle various millisecond formats
+ for (formatter <- datetimeFormatters) {
+ try {
+ val localDateTime = LocalDateTime.parse(timestampStr, formatter)
+ return localDateTime.toInstant(ZoneOffset.UTC).toEpochMilli
+ } catch {
+ case _: DateTimeParseException =>
+ // Continue to next formatter
+ }
+ }
+
+ // If all formatters failed, throw a descriptive exception
+ throw new IllegalArgumentException(
+ s"Unable to parse datetime: $timestampStr. " +
+ s"Expected formats: 'yyyy-MM-ddTHH:mm:ss[.S]' or
'yyyy-MM-ddTHH:mm:ss[.S]Z'")
Review Comment:
[nitpick] Using early return in a for loop is not optimal for functional
programming. Consider using `datetimeFormatters.view.map(...)` with `find` or
`collectFirst` to make the code more functional and potentially more efficient.
```suggestion
datetimeFormatters.view
.map { formatter =>
try {
Some(LocalDateTime.parse(timestampStr,
formatter).toInstant(ZoneOffset.UTC).toEpochMilli)
} catch {
case _: DateTimeParseException => None
}
}
.collectFirst { case Some(epochMilli) => epochMilli }
.getOrElse {
// If all formatters failed, throw a descriptive exception
throw new IllegalArgumentException(
s"Unable to parse datetime: $timestampStr. " +
s"Expected formats: 'yyyy-MM-ddTHH:mm:ss[.S]' or
'yyyy-MM-ddTHH:mm:ss[.S]Z'")
}
```
##########
spark/common/src/main/scala/org/apache/sedona/sql/datasources/geopackage/transform/DataTypesTransformations.scala:
##########
@@ -18,22 +18,50 @@
*/
package org.apache.sedona.sql.datasources.geopackage.transform
-import java.time.{Instant, LocalDate}
+import java.time.{Instant, LocalDate, LocalDateTime, ZoneOffset}
import java.time.format.DateTimeFormatter
+import java.time.format.DateTimeParseException
import java.time.temporal.ChronoUnit
object DataTypesTransformations {
- def getDays(dateString: String): Int = {
- val formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd")
-
- val date = LocalDate.parse(dateString, formatter)
+ // Pre-created formatters to avoid repeated object creation
+ private val dateFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd")
+ private val datetimeFormatters = Array(
+ DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ss.SSS"), // 3 digits
+ DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ss.SS"), // 2 digits
+ DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ss.S"), // 1 digit
+ DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ss") // no milliseconds
+ )
+ def getDays(dateString: String): Int = {
+ val date = LocalDate.parse(dateString, dateFormatter)
val epochDate = LocalDate.of(1970, 1, 1)
-
ChronoUnit.DAYS.between(epochDate, date).toInt
}
def epoch(timestampStr: String): Long = {
- Instant.parse(timestampStr).toEpochMilli
+ try {
+ // Try parsing as-is first (works for timestamps with timezone info)
+ Instant.parse(timestampStr).toEpochMilli
+ } catch {
+ case _: DateTimeParseException =>
+ // If parsing fails, try treating it as UTC (common case for
GeoPackage)
+ // Handle various datetime formats without timezone info
+ // Try different patterns to handle various millisecond formats
+ for (formatter <- datetimeFormatters) {
+ try {
+ val localDateTime = LocalDateTime.parse(timestampStr, formatter)
+ return localDateTime.toInstant(ZoneOffset.UTC).toEpochMilli
+ } catch {
+ case _: DateTimeParseException =>
+ // Continue to next formatter
+ }
+ }
+
+ // If all formatters failed, throw a descriptive exception
+ throw new IllegalArgumentException(
+ s"Unable to parse datetime: $timestampStr. " +
+ s"Expected formats: 'yyyy-MM-ddTHH:mm:ss[.S]' or
'yyyy-MM-ddTHH:mm:ss[.S]Z'")
Review Comment:
The error message format examples don't accurately reflect all supported
patterns. It shows '[.S]' but the code supports .S, .SS, and .SSS formats.
Consider updating to: 'yyyy-MM-ddTHH:mm:ss[.SSS|.SS|.S]' or
'yyyy-MM-ddTHH:mm:ss[.SSS|.SS|.S]Z'
```suggestion
s"Expected formats: 'yyyy-MM-ddTHH:mm:ss[.SSS|.SS|.S]' or
'yyyy-MM-ddTHH:mm:ss[.SSS|.SS|.S]Z'")
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]