This is an automated email from the ASF dual-hosted git repository.
rui pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new 4fdae714d9 [GLUTEN-10566][VL] Add Spark unix_timestamp support with
timestamp and format arguments (#10567)
4fdae714d9 is described below
commit 4fdae714d9a986833d6917c3efb0c81b02ed8864
Author: nimesh1601 <[email protected]>
AuthorDate: Thu Sep 4 15:03:03 2025 +0530
[GLUTEN-10566][VL] Add Spark unix_timestamp support with timestamp and
format arguments (#10567)
---
.../backendsapi/clickhouse/CHSparkPlanExecApi.scala | 12 ++++++++++++
.../backendsapi/velox/VeloxSparkPlanExecApi.scala | 8 ++++++++
.../gluten/expression/ExpressionTransformer.scala | 17 ++++++++++++++++-
.../functions/DateFunctionsValidateSuite.scala | 17 +++++++++++++++++
.../apache/gluten/backendsapi/SparkPlanExecApi.scala | 6 ++++++
.../gluten/expression/ExpressionConverter.scala | 20 +++++++-------------
.../apache/spark/sql/GlutenDateFunctionsSuite.scala | 16 ++++++++++++++++
7 files changed, 82 insertions(+), 14 deletions(-)
diff --git
a/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHSparkPlanExecApi.scala
b/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHSparkPlanExecApi.scala
index 2f443c2b2c..f327b3b00e 100644
---
a/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHSparkPlanExecApi.scala
+++
b/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHSparkPlanExecApi.scala
@@ -641,6 +641,18 @@ class CHSparkPlanExecApi extends SparkPlanExecApi with
Logging {
CHTruncTimestampTransformer(substraitExprName, format, timestamp,
timeZoneId, original)
}
+ override def genToUnixTimestampTransformer(
+ substraitExprName: String,
+ timeExp: ExpressionTransformer,
+ format: ExpressionTransformer,
+ original: Expression): ExpressionTransformer = {
+ GenericExpressionTransformer(
+ substraitExprName,
+ Seq(timeExp, format),
+ original
+ )
+ }
+
override def genDateDiffTransformer(
substraitExprName: String,
endDate: ExpressionTransformer,
diff --git
a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala
b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala
index af46ff2673..0fe000c5c6 100644
---
a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala
+++
b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala
@@ -1048,4 +1048,12 @@ class VeloxSparkPlanExecApi extends SparkPlanExecApi {
}
TimestampDiffTransformer(substraitExprName, extract.get, left, right,
original)
}
+
+ override def genToUnixTimestampTransformer(
+ substraitExprName: String,
+ timeExp: ExpressionTransformer,
+ format: ExpressionTransformer,
+ original: Expression): ExpressionTransformer = {
+ ToUnixTimestampTransformer(substraitExprName, timeExp, format, original)
+ }
}
diff --git
a/backends-velox/src/main/scala/org/apache/gluten/expression/ExpressionTransformer.scala
b/backends-velox/src/main/scala/org/apache/gluten/expression/ExpressionTransformer.scala
index a5e77920e4..7567a35663 100644
---
a/backends-velox/src/main/scala/org/apache/gluten/expression/ExpressionTransformer.scala
+++
b/backends-velox/src/main/scala/org/apache/gluten/expression/ExpressionTransformer.scala
@@ -23,7 +23,7 @@ import org.apache.gluten.substrait.SubstraitContext
import org.apache.gluten.substrait.expression._
import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.types.{IntegerType, LongType}
+import org.apache.spark.sql.types.{IntegerType, LongType, TimestampType}
import java.lang.{Integer => JInteger}
import java.util.{ArrayList => JArrayList}
@@ -109,3 +109,18 @@ case class VeloxHashExpressionTransformer(
ExpressionBuilder.makeScalarFunction(functionId, nodes, typeNode)
}
}
+
+case class ToUnixTimestampTransformer(
+ substraitExprName: String,
+ timeExpTransformer: ExpressionTransformer,
+ formatTransformer: ExpressionTransformer,
+ original: Expression)
+ extends ExpressionTransformer {
+
+ override def children: Seq[ExpressionTransformer] = {
+ timeExpTransformer.dataType match {
+ case _: TimestampType => Seq(timeExpTransformer)
+ case _ => Seq(timeExpTransformer, formatTransformer)
+ }
+ }
+}
diff --git
a/backends-velox/src/test/scala/org/apache/gluten/functions/DateFunctionsValidateSuite.scala
b/backends-velox/src/test/scala/org/apache/gluten/functions/DateFunctionsValidateSuite.scala
index df761ce2df..81e1457021 100644
---
a/backends-velox/src/test/scala/org/apache/gluten/functions/DateFunctionsValidateSuite.scala
+++
b/backends-velox/src/test/scala/org/apache/gluten/functions/DateFunctionsValidateSuite.scala
@@ -471,4 +471,21 @@ abstract class DateFunctionsValidateSuite extends
FunctionsValidateSuite {
}
}
}
+
+ test("unix_timestamp with timestamp and format - no fallback") {
+ withTempPath {
+ path =>
+ Seq(
+ (Timestamp.valueOf("2016-04-08 13:10:15"), "yyyy-MM-dd"),
+ (Timestamp.valueOf("2017-05-19 18:25:30"), "MM/dd/yyyy")
+ ).toDF("ts", "fmt").write.parquet(path.getCanonicalPath)
+
+
spark.read.parquet(path.getCanonicalPath).createOrReplaceTempView("unix_timestamp_test")
+
+ // Test unix_timestamp(timestamp, format) - should use native
execution without fallback
+ runQueryAndCompare("SELECT unix_timestamp(ts, fmt) FROM
unix_timestamp_test") {
+ checkGlutenOperatorMatch[ProjectExecTransformer]
+ }
+ }
+ }
}
diff --git
a/gluten-substrait/src/main/scala/org/apache/gluten/backendsapi/SparkPlanExecApi.scala
b/gluten-substrait/src/main/scala/org/apache/gluten/backendsapi/SparkPlanExecApi.scala
index 1625cce062..58ad9d3e4b 100644
---
a/gluten-substrait/src/main/scala/org/apache/gluten/backendsapi/SparkPlanExecApi.scala
+++
b/gluten-substrait/src/main/scala/org/apache/gluten/backendsapi/SparkPlanExecApi.scala
@@ -470,6 +470,12 @@ trait SparkPlanExecApi {
TruncTimestampTransformer(substraitExprName, format, timestamp, original)
}
+ def genToUnixTimestampTransformer(
+ substraitExprName: String,
+ timeExp: ExpressionTransformer,
+ format: ExpressionTransformer,
+ original: Expression): ExpressionTransformer
+
def genDateDiffTransformer(
substraitExprName: String,
endDate: ExpressionTransformer,
diff --git
a/gluten-substrait/src/main/scala/org/apache/gluten/expression/ExpressionConverter.scala
b/gluten-substrait/src/main/scala/org/apache/gluten/expression/ExpressionConverter.scala
index fc1de383d0..4d62ae1804 100644
---
a/gluten-substrait/src/main/scala/org/apache/gluten/expression/ExpressionConverter.scala
+++
b/gluten-substrait/src/main/scala/org/apache/gluten/expression/ExpressionConverter.scala
@@ -277,24 +277,18 @@ object ExpressionConverter extends SQLConfHelper with
Logging {
replaceWithExpressionTransformer0(r.child, attributeSeq,
expressionsMap),
r)
case t: ToUnixTimestamp =>
- // The failOnError depends on the config for ANSI. ANSI is not
supported currently.
- // And timeZoneId is passed to backend config.
- GenericExpressionTransformer(
+
BackendsApiManager.getSparkPlanExecApiInstance.genToUnixTimestampTransformer(
substraitExprName,
- Seq(
- replaceWithExpressionTransformer0(t.timeExp, attributeSeq,
expressionsMap),
- replaceWithExpressionTransformer0(t.format, attributeSeq,
expressionsMap)
- ),
+ replaceWithExpressionTransformer0(t.timeExp, attributeSeq,
expressionsMap),
+ replaceWithExpressionTransformer0(t.format, attributeSeq,
expressionsMap),
t
)
case u: UnixTimestamp =>
- GenericExpressionTransformer(
+
BackendsApiManager.getSparkPlanExecApiInstance.genToUnixTimestampTransformer(
substraitExprName,
- Seq(
- replaceWithExpressionTransformer0(u.timeExp, attributeSeq,
expressionsMap),
- replaceWithExpressionTransformer0(u.format, attributeSeq,
expressionsMap)
- ),
- ToUnixTimestamp(u.timeExp, u.format, u.timeZoneId, u.failOnError)
+ replaceWithExpressionTransformer0(u.timeExp, attributeSeq,
expressionsMap),
+ replaceWithExpressionTransformer0(u.format, attributeSeq,
expressionsMap),
+ u
)
case t: TruncTimestamp =>
BackendsApiManager.getSparkPlanExecApiInstance.genTruncTimestampTransformer(
diff --git
a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala
b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala
index f9c5995caf..082f06641b 100644
---
a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala
+++
b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala
@@ -125,6 +125,14 @@ class GlutenDateFunctionsSuite extends DateFunctionsSuite
with GlutenSQLTestsTra
df2.select(unix_timestamp(col("y"), "yyyy-MM-dd")),
Seq(Row(secs(ts5.getTime)), Row(null)))
+ // Test unix_timestamp(timestamp, format) - format should be ignored
+ checkAnswer(
+ df.select(unix_timestamp(col("ts"), "yyyy-MM-dd")),
+ Seq(Row(secs(ts1.getTime)), Row(secs(ts2.getTime))))
+ checkAnswer(
+ df.selectExpr("unix_timestamp(ts, 'invalid-format')"),
+ Seq(Row(secs(ts1.getTime)), Row(secs(ts2.getTime))))
+
val now = sql("select unix_timestamp()").collect().head.getLong(0)
checkAnswer(
sql(s"select timestamp_seconds($now)"),
@@ -187,6 +195,14 @@ class GlutenDateFunctionsSuite extends DateFunctionsSuite
with GlutenSQLTestsTra
df2.select(unix_timestamp(col("y"), "yyyy-MM-dd")),
Seq(Row(secs(ts5.getTime)), Row(null)))
+ // Test to_unix_timestamp(timestamp, format) - format should be
ignored
+ checkAnswer(
+ df.selectExpr("to_unix_timestamp(ts, 'yyyy-MM-dd')"),
+ Seq(Row(secs(ts1.getTime)), Row(secs(ts2.getTime))))
+ checkAnswer(
+ df.selectExpr("to_unix_timestamp(ts, 'invalid-format')"),
+ Seq(Row(secs(ts1.getTime)), Row(secs(ts2.getTime))))
+
val invalid = df1.selectExpr(s"to_unix_timestamp(x, 'yyyy-MM-dd
bb:HH:ss')")
checkAnswer(invalid, Seq(Row(null), Row(null), Row(null), Row(null)))
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]