MaxGekk commented on code in PR #50269: URL: https://github.com/apache/spark/pull/50269#discussion_r2034758745
########## sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/timeExpressions.scala: ########## @@ -290,3 +290,47 @@ object HourExpressionBuilder extends ExpressionBuilder { } } } + +// scalastyle:off line.size.limit +@ExpressionDescription( + usage = "_FUNC_(hour, minute, second) - Create time from hour, minute and second fields. For invalid inputs it will throw an error.", + arguments = """ + Arguments: + * hour - the hour to represent, from 0 to 23 + * minute - the minute to represent, from 0 to 59 + * second - the second to represent, from 0 to 59.999999 + """, + examples = """ + Examples: + > SELECT _FUNC_(6, 30, 45.887); + 06:30:45.887 + > SELECT _FUNC_(NULL, 30, 0); + NULL + """, + group = "datetime_funcs", + since = "4.1.0") +// scalastyle:on line.size.limit +case class MakeTime( + hours: Expression, + minutes: Expression, + secsAndMicros: Expression) + extends RuntimeReplaceable + with ImplicitCastInputTypes + with ExpectsInputTypes { + + override def inputTypes: Seq[AbstractDataType] = Seq(IntegerType, IntegerType, DecimalType(16, 6)) + override def children: Seq[Expression] = Seq(hours, minutes, secsAndMicros) + override def dataType: DataType = TimeType(TimeType.MAX_PRECISION) + override def prettyName: String = "make_time" + + override def replacement: Expression = StaticInvoke( + classOf[DateTimeUtils.type], + dataType, Review Comment: In the near future, the max precision can be changed to nanoseconds, and you will have to fix all such places. Let's make the current implementation consistent, and set the return data type to `TimeType(TimeType.MICROS_PRECISION)`. ########## sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/timeExpressions.scala: ########## @@ -290,3 +290,47 @@ object HourExpressionBuilder extends ExpressionBuilder { } } } + +// scalastyle:off line.size.limit +@ExpressionDescription( + usage = "_FUNC_(hour, minute, second) - Create time from hour, minute and second fields. For invalid inputs it will throw an error.", + arguments = """ + Arguments: + * hour - the hour to represent, from 0 to 23 + * minute - the minute to represent, from 0 to 59 + * second - the second to represent, from 0 to 59.999999 + """, + examples = """ + Examples: + > SELECT _FUNC_(6, 30, 45.887); + 06:30:45.887 + > SELECT _FUNC_(NULL, 30, 0); + NULL + """, + group = "datetime_funcs", + since = "4.1.0") +// scalastyle:on line.size.limit +case class MakeTime( + hours: Expression, + minutes: Expression, + secsAndMicros: Expression) + extends RuntimeReplaceable + with ImplicitCastInputTypes + with ExpectsInputTypes { + + override def inputTypes: Seq[AbstractDataType] = Seq(IntegerType, IntegerType, DecimalType(16, 6)) + override def children: Seq[Expression] = Seq(hours, minutes, secsAndMicros) + override def dataType: DataType = TimeType(TimeType.MAX_PRECISION) Review Comment: When you extend `RuntimeReplaceable`, the `dataType()` method returns `dataType` of `replacement`, see: ```scala trait RuntimeReplaceable extends Expression { ... override def dataType: DataType = replacement.dataType ``` ########## sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/timeExpressions.scala: ########## @@ -290,3 +290,47 @@ object HourExpressionBuilder extends ExpressionBuilder { } } } + +// scalastyle:off line.size.limit +@ExpressionDescription( + usage = "_FUNC_(hour, minute, second) - Create time from hour, minute and second fields. For invalid inputs it will throw an error.", + arguments = """ + Arguments: + * hour - the hour to represent, from 0 to 23 + * minute - the minute to represent, from 0 to 59 + * second - the second to represent, from 0 to 59.999999 + """, + examples = """ + Examples: + > SELECT _FUNC_(6, 30, 45.887); + 06:30:45.887 + > SELECT _FUNC_(NULL, 30, 0); + NULL + """, + group = "datetime_funcs", + since = "4.1.0") +// scalastyle:on line.size.limit +case class MakeTime( + hours: Expression, + minutes: Expression, + secsAndMicros: Expression) + extends RuntimeReplaceable + with ImplicitCastInputTypes + with ExpectsInputTypes { + + override def inputTypes: Seq[AbstractDataType] = Seq(IntegerType, IntegerType, DecimalType(16, 6)) Review Comment: Could you add a comment from `MakeTimestamp` regarding the type `DecimalType(16, 6)`: ``` // Accept `sec` as DecimalType to avoid loosing precision of microseconds while converting // them to the fractional part of `sec`. For accepts IntegerType as `sec` and integer can be // casted into decimal safely, we use DecimalType(16, 6) which is wider than DecimalType(10, 0). ``` ########## sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala: ########## @@ -763,4 +763,29 @@ object DateTimeUtils extends SparkDateTimeUtils { throw QueryExecutionErrors.invalidDatetimeUnitError("TIMESTAMPDIFF", unit) } } + + /** + * Converts separate time fields in a long that represents microseconds since the start of + * the day + * @param hours the hour, from 0 to 23 + * @param minutes the minute, from 0 to 59 + * @param secsAndMicros the second, from 0 to 59.999999 + * @return Time time represented as microseconds since the start of the day + */ + def timeToMicros(hours: Int, minutes: Int, secsAndMicros: Decimal): Long = { + assert(secsAndMicros.scale == 6, + s"Seconds fraction must have 6 digits for microseconds but got ${secsAndMicros.scale}") + val unscaledSecFrac = secsAndMicros.toUnscaledLong + val totalMicros = unscaledSecFrac.toInt // 8 digits cannot overflow Int Review Comment: Could you clarify why 8 digits if the input type is `DecimalType(16, 6)`, please. ########## sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala: ########## @@ -763,4 +763,29 @@ object DateTimeUtils extends SparkDateTimeUtils { throw QueryExecutionErrors.invalidDatetimeUnitError("TIMESTAMPDIFF", unit) } } + + /** + * Converts separate time fields in a long that represents microseconds since the start of + * the day + * @param hours the hour, from 0 to 23 + * @param minutes the minute, from 0 to 59 + * @param secsAndMicros the second, from 0 to 59.999999 + * @return Time time represented as microseconds since the start of the day + */ + def timeToMicros(hours: Int, minutes: Int, secsAndMicros: Decimal): Long = { + assert(secsAndMicros.scale == 6, + s"Seconds fraction must have 6 digits for microseconds but got ${secsAndMicros.scale}") + val unscaledSecFrac = secsAndMicros.toUnscaledLong + val totalMicros = unscaledSecFrac.toInt // 8 digits cannot overflow Int + val fullSecs = Math.floorDiv(totalMicros, MICROS_PER_SECOND.toInt) + val nanos = Math.floorMod(totalMicros, MICROS_PER_SECOND.toInt) * NANOS_PER_MICROS.toInt Review Comment: Why do you cast to `Int` explicitly? I guess ops over `Long` as much performant as over `Int` on modern CPUs. How about: ```scala try { val unscaledSecFrac = secsAndMicros.toUnscaledLong val fullSecs = Math.floorDiv(unscaledSecFrac, MICROS_PER_SECOND) val nanos = Math.floorMod(unscaledSecFrac, MICROS_PER_SECOND) * NANOS_PER_MICROS val lt = LocalTime.of(hours, minutes, fullSecs.toInt, nanos.toInt) localTimeToMicros(lt) } catch { case e @ (_: DateTimeException | _: ArithmeticException) => throw QueryExecutionErrors.ansiDateTimeArgumentOutOfRangeWithoutSuggestion(e) } ``` Could you add tests for the function `timeToMicros` to `DateTimeUtilsSuite`, and check the corner cases like: - secsAndMicros = 9999999999.999999, the max value of the type DecimalType(16, 6). - secsAndMicros = -999999999.999999 - secsAndMicros = 60.0 - hours = -1 or 24 - minutes = -1 or 24 ########## sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala: ########## @@ -763,4 +763,29 @@ object DateTimeUtils extends SparkDateTimeUtils { throw QueryExecutionErrors.invalidDatetimeUnitError("TIMESTAMPDIFF", unit) } } + + /** + * Converts separate time fields in a long that represents microseconds since the start of + * the day + * @param hours the hour, from 0 to 23 + * @param minutes the minute, from 0 to 59 + * @param secsAndMicros the second, from 0 to 59.999999 + * @return Time time represented as microseconds since the start of the day Review Comment: ```suggestion * @return A time value represented as microseconds since the start of the day ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org