MaxGekk commented on code in PR #50269:
URL: https://github.com/apache/spark/pull/50269#discussion_r2034758745


##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/timeExpressions.scala:
##########
@@ -290,3 +290,47 @@ object HourExpressionBuilder extends ExpressionBuilder {
     }
   }
 }
+
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+  usage = "_FUNC_(hour, minute, second) - Create time from hour, minute and 
second fields. For invalid inputs it will throw an error.",
+  arguments = """
+    Arguments:
+      * hour - the hour to represent, from 0 to 23
+      * minute - the minute to represent, from 0 to 59
+      * second - the second to represent, from 0 to 59.999999
+  """,
+  examples = """
+    Examples:
+      > SELECT _FUNC_(6, 30, 45.887);
+       06:30:45.887
+      > SELECT _FUNC_(NULL, 30, 0);
+       NULL
+  """,
+  group = "datetime_funcs",
+  since = "4.1.0")
+// scalastyle:on line.size.limit
+case class MakeTime(
+    hours: Expression,
+    minutes: Expression,
+    secsAndMicros: Expression)
+  extends RuntimeReplaceable
+    with ImplicitCastInputTypes
+    with ExpectsInputTypes {
+
+  override def inputTypes: Seq[AbstractDataType] = Seq(IntegerType, 
IntegerType, DecimalType(16, 6))
+  override def children: Seq[Expression] = Seq(hours, minutes, secsAndMicros)
+  override def dataType: DataType = TimeType(TimeType.MAX_PRECISION)
+  override def prettyName: String = "make_time"
+
+  override def replacement: Expression = StaticInvoke(
+    classOf[DateTimeUtils.type],
+    dataType,

Review Comment:
   In the near future, the max precision can be changed to nanoseconds, and you 
will have to fix all such places. Let's make the current implementation 
consistent, and set the return data type to 
`TimeType(TimeType.MICROS_PRECISION)`.



##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/timeExpressions.scala:
##########
@@ -290,3 +290,47 @@ object HourExpressionBuilder extends ExpressionBuilder {
     }
   }
 }
+
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+  usage = "_FUNC_(hour, minute, second) - Create time from hour, minute and 
second fields. For invalid inputs it will throw an error.",
+  arguments = """
+    Arguments:
+      * hour - the hour to represent, from 0 to 23
+      * minute - the minute to represent, from 0 to 59
+      * second - the second to represent, from 0 to 59.999999
+  """,
+  examples = """
+    Examples:
+      > SELECT _FUNC_(6, 30, 45.887);
+       06:30:45.887
+      > SELECT _FUNC_(NULL, 30, 0);
+       NULL
+  """,
+  group = "datetime_funcs",
+  since = "4.1.0")
+// scalastyle:on line.size.limit
+case class MakeTime(
+    hours: Expression,
+    minutes: Expression,
+    secsAndMicros: Expression)
+  extends RuntimeReplaceable
+    with ImplicitCastInputTypes
+    with ExpectsInputTypes {
+
+  override def inputTypes: Seq[AbstractDataType] = Seq(IntegerType, 
IntegerType, DecimalType(16, 6))
+  override def children: Seq[Expression] = Seq(hours, minutes, secsAndMicros)
+  override def dataType: DataType = TimeType(TimeType.MAX_PRECISION)

Review Comment:
   When you extend `RuntimeReplaceable`, the `dataType()` method returns 
`dataType` of `replacement`, see:
   ```scala
   trait RuntimeReplaceable extends Expression {
   ...
     override def dataType: DataType = replacement.dataType
   ```



##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/timeExpressions.scala:
##########
@@ -290,3 +290,47 @@ object HourExpressionBuilder extends ExpressionBuilder {
     }
   }
 }
+
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+  usage = "_FUNC_(hour, minute, second) - Create time from hour, minute and 
second fields. For invalid inputs it will throw an error.",
+  arguments = """
+    Arguments:
+      * hour - the hour to represent, from 0 to 23
+      * minute - the minute to represent, from 0 to 59
+      * second - the second to represent, from 0 to 59.999999
+  """,
+  examples = """
+    Examples:
+      > SELECT _FUNC_(6, 30, 45.887);
+       06:30:45.887
+      > SELECT _FUNC_(NULL, 30, 0);
+       NULL
+  """,
+  group = "datetime_funcs",
+  since = "4.1.0")
+// scalastyle:on line.size.limit
+case class MakeTime(
+    hours: Expression,
+    minutes: Expression,
+    secsAndMicros: Expression)
+  extends RuntimeReplaceable
+    with ImplicitCastInputTypes
+    with ExpectsInputTypes {
+
+  override def inputTypes: Seq[AbstractDataType] = Seq(IntegerType, 
IntegerType, DecimalType(16, 6))

Review Comment:
   Could you add a comment from `MakeTimestamp` regarding the type 
`DecimalType(16, 6)`:
   ```
     // Accept `sec` as DecimalType to avoid loosing precision of microseconds 
while converting
     // them to the fractional part of `sec`. For accepts IntegerType as `sec` 
and integer can be
     // casted into decimal safely, we use DecimalType(16, 6) which is wider 
than DecimalType(10, 0).
   ```



##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala:
##########
@@ -763,4 +763,29 @@ object DateTimeUtils extends SparkDateTimeUtils {
       throw QueryExecutionErrors.invalidDatetimeUnitError("TIMESTAMPDIFF", 
unit)
     }
   }
+
+  /**
+   * Converts separate time fields in a long that represents microseconds 
since the start of
+   * the day
+   * @param hours the hour, from 0 to 23
+   * @param minutes the minute, from 0 to 59
+   * @param secsAndMicros the second, from 0 to 59.999999
+   * @return Time time represented as microseconds since the start of the day
+   */
+  def timeToMicros(hours: Int, minutes: Int, secsAndMicros: Decimal): Long = {
+    assert(secsAndMicros.scale == 6,
+      s"Seconds fraction must have 6 digits for microseconds but got 
${secsAndMicros.scale}")
+    val unscaledSecFrac = secsAndMicros.toUnscaledLong
+    val totalMicros = unscaledSecFrac.toInt // 8 digits cannot overflow Int

Review Comment:
   Could you clarify why 8 digits if the input type is `DecimalType(16, 6)`, 
please.



##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala:
##########
@@ -763,4 +763,29 @@ object DateTimeUtils extends SparkDateTimeUtils {
       throw QueryExecutionErrors.invalidDatetimeUnitError("TIMESTAMPDIFF", 
unit)
     }
   }
+
+  /**
+   * Converts separate time fields in a long that represents microseconds 
since the start of
+   * the day
+   * @param hours the hour, from 0 to 23
+   * @param minutes the minute, from 0 to 59
+   * @param secsAndMicros the second, from 0 to 59.999999
+   * @return Time time represented as microseconds since the start of the day
+   */
+  def timeToMicros(hours: Int, minutes: Int, secsAndMicros: Decimal): Long = {
+    assert(secsAndMicros.scale == 6,
+      s"Seconds fraction must have 6 digits for microseconds but got 
${secsAndMicros.scale}")
+    val unscaledSecFrac = secsAndMicros.toUnscaledLong
+    val totalMicros = unscaledSecFrac.toInt // 8 digits cannot overflow Int
+    val fullSecs = Math.floorDiv(totalMicros, MICROS_PER_SECOND.toInt)
+    val nanos = Math.floorMod(totalMicros, MICROS_PER_SECOND.toInt) * 
NANOS_PER_MICROS.toInt

Review Comment:
   Why do you cast to `Int` explicitly? I guess ops over `Long` as much 
performant as over `Int` on modern CPUs. How about:
   ```scala
       try {
         val unscaledSecFrac = secsAndMicros.toUnscaledLong
         val fullSecs = Math.floorDiv(unscaledSecFrac, MICROS_PER_SECOND)
         val nanos = Math.floorMod(unscaledSecFrac, MICROS_PER_SECOND) * 
NANOS_PER_MICROS
         val lt = LocalTime.of(hours, minutes, fullSecs.toInt, nanos.toInt)
         localTimeToMicros(lt)
       } catch {
         case e @ (_: DateTimeException | _: ArithmeticException) =>
           throw 
QueryExecutionErrors.ansiDateTimeArgumentOutOfRangeWithoutSuggestion(e)
       }
   ```
   
   Could you add tests for the function `timeToMicros` to `DateTimeUtilsSuite`, 
and check the corner cases like:
   - secsAndMicros = 9999999999.999999, the max value of the type 
DecimalType(16, 6).
   - secsAndMicros = -999999999.999999
   - secsAndMicros = 60.0
   - hours = -1 or 24
   - minutes = -1 or 24



##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala:
##########
@@ -763,4 +763,29 @@ object DateTimeUtils extends SparkDateTimeUtils {
       throw QueryExecutionErrors.invalidDatetimeUnitError("TIMESTAMPDIFF", 
unit)
     }
   }
+
+  /**
+   * Converts separate time fields in a long that represents microseconds 
since the start of
+   * the day
+   * @param hours the hour, from 0 to 23
+   * @param minutes the minute, from 0 to 59
+   * @param secsAndMicros the second, from 0 to 59.999999
+   * @return Time time represented as microseconds since the start of the day

Review Comment:
   ```suggestion
      * @return A time value represented as microseconds since the start of the 
day
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to