Copilot commented on code in PR #2339:
URL: https://github.com/apache/sedona/pull/2339#discussion_r2328543804
##########
spark/common/src/main/scala/org/apache/sedona/sql/datasources/geopackage/transform/DataTypesTransformations.scala:
##########
@@ -34,6 +35,48 @@ object DataTypesTransformations {
}
def epoch(timestampStr: String): Long = {
- Instant.parse(timestampStr).toEpochMilli
+ try {
+ // Try parsing as-is first (works for timestamps with timezone info)
+ Instant.parse(timestampStr).toEpochMilli
+ } catch {
+ case _: DateTimeParseException =>
+ // If parsing fails, try treating it as UTC (common case for
GeoPackage)
+ try {
+ // Handle various datetime formats without timezone info
+ // Try different patterns to handle various millisecond formats
+ val patterns = Array(
+ "yyyy-MM-dd'T'HH:mm:ss.SSS", // 3 digits
+ "yyyy-MM-dd'T'HH:mm:ss.SS", // 2 digits
+ "yyyy-MM-dd'T'HH:mm:ss.S", // 1 digit
+ "yyyy-MM-dd'T'HH:mm:ss" // no milliseconds
+ )
+
+ var localDateTime: LocalDateTime = null
+ var lastException: DateTimeParseException = null
+
+ for (pattern <- patterns) {
+ try {
+ val formatter = DateTimeFormatter.ofPattern(pattern)
+ localDateTime = LocalDateTime.parse(timestampStr, formatter)
+ lastException = null
+ } catch {
+ case e: DateTimeParseException =>
+ lastException = e
+ }
+ }
+
+ if (localDateTime != null) {
+ localDateTime.toInstant(ZoneOffset.UTC).toEpochMilli
+ } else {
+ throw lastException
+ }
Review Comment:
The logic could be simplified by using early returns instead of null checks.
Consider breaking out of the loop immediately when a successful parse occurs,
eliminating the need for null checks.
```suggestion
var lastException: DateTimeParseException = null
for (pattern <- patterns) {
try {
val formatter = DateTimeFormatter.ofPattern(pattern)
val localDateTime = LocalDateTime.parse(timestampStr,
formatter)
return localDateTime.toInstant(ZoneOffset.UTC).toEpochMilli
} catch {
case e: DateTimeParseException =>
lastException = e
}
}
throw lastException
```
##########
spark/common/src/main/scala/org/apache/sedona/sql/datasources/geopackage/transform/DataTypesTransformations.scala:
##########
@@ -34,6 +35,48 @@ object DataTypesTransformations {
}
def epoch(timestampStr: String): Long = {
- Instant.parse(timestampStr).toEpochMilli
+ try {
+ // Try parsing as-is first (works for timestamps with timezone info)
+ Instant.parse(timestampStr).toEpochMilli
+ } catch {
+ case _: DateTimeParseException =>
+ // If parsing fails, try treating it as UTC (common case for
GeoPackage)
+ try {
+ // Handle various datetime formats without timezone info
+ // Try different patterns to handle various millisecond formats
+ val patterns = Array(
+ "yyyy-MM-dd'T'HH:mm:ss.SSS", // 3 digits
+ "yyyy-MM-dd'T'HH:mm:ss.SS", // 2 digits
+ "yyyy-MM-dd'T'HH:mm:ss.S", // 1 digit
+ "yyyy-MM-dd'T'HH:mm:ss" // no milliseconds
+ )
+
+ var localDateTime: LocalDateTime = null
+ var lastException: DateTimeParseException = null
+
+ for (pattern <- patterns) {
+ try {
+ val formatter = DateTimeFormatter.ofPattern(pattern)
+ localDateTime = LocalDateTime.parse(timestampStr, formatter)
+ lastException = null
+ } catch {
+ case e: DateTimeParseException =>
+ lastException = e
+ }
+ }
Review Comment:
Creating a new `DateTimeFormatter` instance for each pattern in every
iteration is inefficient. Consider pre-creating the formatters as static values
or using a lazy initialization pattern to avoid repeated object creation.
##########
spark/common/src/main/scala/org/apache/sedona/sql/datasources/geopackage/transform/DataTypesTransformations.scala:
##########
@@ -34,6 +35,48 @@ object DataTypesTransformations {
}
def epoch(timestampStr: String): Long = {
- Instant.parse(timestampStr).toEpochMilli
+ try {
+ // Try parsing as-is first (works for timestamps with timezone info)
+ Instant.parse(timestampStr).toEpochMilli
+ } catch {
+ case _: DateTimeParseException =>
+ // If parsing fails, try treating it as UTC (common case for
GeoPackage)
+ try {
+ // Handle various datetime formats without timezone info
+ // Try different patterns to handle various millisecond formats
+ val patterns = Array(
+ "yyyy-MM-dd'T'HH:mm:ss.SSS", // 3 digits
+ "yyyy-MM-dd'T'HH:mm:ss.SS", // 2 digits
+ "yyyy-MM-dd'T'HH:mm:ss.S", // 1 digit
+ "yyyy-MM-dd'T'HH:mm:ss" // no milliseconds
+ )
+
+ var localDateTime: LocalDateTime = null
+ var lastException: DateTimeParseException = null
+
+ for (pattern <- patterns) {
+ try {
+ val formatter = DateTimeFormatter.ofPattern(pattern)
+ localDateTime = LocalDateTime.parse(timestampStr, formatter)
+ lastException = null
+ } catch {
+ case e: DateTimeParseException =>
+ lastException = e
+ }
+ }
+
+ if (localDateTime != null) {
+ localDateTime.toInstant(ZoneOffset.UTC).toEpochMilli
+ } else {
+ throw lastException
Review Comment:
Using `null` initialization is not idiomatic Scala. Consider using
`Option[LocalDateTime]` and `Option[DateTimeParseException]` instead to follow
functional programming best practices and avoid null pointer exceptions.
```suggestion
var localDateTimeOpt: Option[LocalDateTime] = None
var lastExceptionOpt: Option[DateTimeParseException] = None
for (pattern <- patterns) {
try {
val formatter = DateTimeFormatter.ofPattern(pattern)
localDateTimeOpt = Some(LocalDateTime.parse(timestampStr,
formatter))
lastExceptionOpt = None
} catch {
case e: DateTimeParseException =>
lastExceptionOpt = Some(e)
}
}
localDateTimeOpt match {
case Some(localDateTime) =>
localDateTime.toInstant(ZoneOffset.UTC).toEpochMilli
case None =>
throw lastExceptionOpt.getOrElse(new
DateTimeParseException("Unknown parse error", timestampStr, 0))
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]