This is an automated email from the ASF dual-hosted git repository.

rui pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
     new 4fdae714d9 [GLUTEN-10566][VL] Add Spark unix_timestamp support with 
timestamp and format arguments (#10567)
4fdae714d9 is described below

commit 4fdae714d9a986833d6917c3efb0c81b02ed8864
Author: nimesh1601 <[email protected]>
AuthorDate: Thu Sep 4 15:03:03 2025 +0530

    [GLUTEN-10566][VL] Add Spark unix_timestamp support with timestamp and 
format arguments (#10567)
---
 .../backendsapi/clickhouse/CHSparkPlanExecApi.scala  | 12 ++++++++++++
 .../backendsapi/velox/VeloxSparkPlanExecApi.scala    |  8 ++++++++
 .../gluten/expression/ExpressionTransformer.scala    | 17 ++++++++++++++++-
 .../functions/DateFunctionsValidateSuite.scala       | 17 +++++++++++++++++
 .../apache/gluten/backendsapi/SparkPlanExecApi.scala |  6 ++++++
 .../gluten/expression/ExpressionConverter.scala      | 20 +++++++-------------
 .../apache/spark/sql/GlutenDateFunctionsSuite.scala  | 16 ++++++++++++++++
 7 files changed, 82 insertions(+), 14 deletions(-)

diff --git 
a/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHSparkPlanExecApi.scala
 
b/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHSparkPlanExecApi.scala
index 2f443c2b2c..f327b3b00e 100644
--- 
a/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHSparkPlanExecApi.scala
+++ 
b/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHSparkPlanExecApi.scala
@@ -641,6 +641,18 @@ class CHSparkPlanExecApi extends SparkPlanExecApi with 
Logging {
     CHTruncTimestampTransformer(substraitExprName, format, timestamp, 
timeZoneId, original)
   }
 
+  override def genToUnixTimestampTransformer(
+      substraitExprName: String,
+      timeExp: ExpressionTransformer,
+      format: ExpressionTransformer,
+      original: Expression): ExpressionTransformer = {
+    GenericExpressionTransformer(
+      substraitExprName,
+      Seq(timeExp, format),
+      original
+    )
+  }
+
   override def genDateDiffTransformer(
       substraitExprName: String,
       endDate: ExpressionTransformer,
diff --git 
a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala
 
b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala
index af46ff2673..0fe000c5c6 100644
--- 
a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala
+++ 
b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala
@@ -1048,4 +1048,12 @@ class VeloxSparkPlanExecApi extends SparkPlanExecApi {
     }
     TimestampDiffTransformer(substraitExprName, extract.get, left, right, 
original)
   }
+
+  override def genToUnixTimestampTransformer(
+      substraitExprName: String,
+      timeExp: ExpressionTransformer,
+      format: ExpressionTransformer,
+      original: Expression): ExpressionTransformer = {
+    ToUnixTimestampTransformer(substraitExprName, timeExp, format, original)
+  }
 }
diff --git 
a/backends-velox/src/main/scala/org/apache/gluten/expression/ExpressionTransformer.scala
 
b/backends-velox/src/main/scala/org/apache/gluten/expression/ExpressionTransformer.scala
index a5e77920e4..7567a35663 100644
--- 
a/backends-velox/src/main/scala/org/apache/gluten/expression/ExpressionTransformer.scala
+++ 
b/backends-velox/src/main/scala/org/apache/gluten/expression/ExpressionTransformer.scala
@@ -23,7 +23,7 @@ import org.apache.gluten.substrait.SubstraitContext
 import org.apache.gluten.substrait.expression._
 
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.types.{IntegerType, LongType}
+import org.apache.spark.sql.types.{IntegerType, LongType, TimestampType}
 
 import java.lang.{Integer => JInteger}
 import java.util.{ArrayList => JArrayList}
@@ -109,3 +109,18 @@ case class VeloxHashExpressionTransformer(
     ExpressionBuilder.makeScalarFunction(functionId, nodes, typeNode)
   }
 }
+
+case class ToUnixTimestampTransformer(
+    substraitExprName: String,
+    timeExpTransformer: ExpressionTransformer,
+    formatTransformer: ExpressionTransformer,
+    original: Expression)
+  extends ExpressionTransformer {
+
+  override def children: Seq[ExpressionTransformer] = {
+    timeExpTransformer.dataType match {
+      case _: TimestampType => Seq(timeExpTransformer)
+      case _ => Seq(timeExpTransformer, formatTransformer)
+    }
+  }
+}
diff --git 
a/backends-velox/src/test/scala/org/apache/gluten/functions/DateFunctionsValidateSuite.scala
 
b/backends-velox/src/test/scala/org/apache/gluten/functions/DateFunctionsValidateSuite.scala
index df761ce2df..81e1457021 100644
--- 
a/backends-velox/src/test/scala/org/apache/gluten/functions/DateFunctionsValidateSuite.scala
+++ 
b/backends-velox/src/test/scala/org/apache/gluten/functions/DateFunctionsValidateSuite.scala
@@ -471,4 +471,21 @@ abstract class DateFunctionsValidateSuite extends 
FunctionsValidateSuite {
         }
     }
   }
+
+  test("unix_timestamp with timestamp and format - no fallback") {
+    withTempPath {
+      path =>
+        Seq(
+          (Timestamp.valueOf("2016-04-08 13:10:15"), "yyyy-MM-dd"),
+          (Timestamp.valueOf("2017-05-19 18:25:30"), "MM/dd/yyyy")
+        ).toDF("ts", "fmt").write.parquet(path.getCanonicalPath)
+
+        
spark.read.parquet(path.getCanonicalPath).createOrReplaceTempView("unix_timestamp_test")
+
+        // Test unix_timestamp(timestamp, format) - should use native 
execution without fallback
+        runQueryAndCompare("SELECT unix_timestamp(ts, fmt) FROM 
unix_timestamp_test") {
+          checkGlutenOperatorMatch[ProjectExecTransformer]
+        }
+    }
+  }
 }
diff --git 
a/gluten-substrait/src/main/scala/org/apache/gluten/backendsapi/SparkPlanExecApi.scala
 
b/gluten-substrait/src/main/scala/org/apache/gluten/backendsapi/SparkPlanExecApi.scala
index 1625cce062..58ad9d3e4b 100644
--- 
a/gluten-substrait/src/main/scala/org/apache/gluten/backendsapi/SparkPlanExecApi.scala
+++ 
b/gluten-substrait/src/main/scala/org/apache/gluten/backendsapi/SparkPlanExecApi.scala
@@ -470,6 +470,12 @@ trait SparkPlanExecApi {
     TruncTimestampTransformer(substraitExprName, format, timestamp, original)
   }
 
+  def genToUnixTimestampTransformer(
+      substraitExprName: String,
+      timeExp: ExpressionTransformer,
+      format: ExpressionTransformer,
+      original: Expression): ExpressionTransformer
+
   def genDateDiffTransformer(
       substraitExprName: String,
       endDate: ExpressionTransformer,
diff --git 
a/gluten-substrait/src/main/scala/org/apache/gluten/expression/ExpressionConverter.scala
 
b/gluten-substrait/src/main/scala/org/apache/gluten/expression/ExpressionConverter.scala
index fc1de383d0..4d62ae1804 100644
--- 
a/gluten-substrait/src/main/scala/org/apache/gluten/expression/ExpressionConverter.scala
+++ 
b/gluten-substrait/src/main/scala/org/apache/gluten/expression/ExpressionConverter.scala
@@ -277,24 +277,18 @@ object ExpressionConverter extends SQLConfHelper with 
Logging {
           replaceWithExpressionTransformer0(r.child, attributeSeq, 
expressionsMap),
           r)
       case t: ToUnixTimestamp =>
-        // The failOnError depends on the config for ANSI. ANSI is not 
supported currently.
-        // And timeZoneId is passed to backend config.
-        GenericExpressionTransformer(
+        
BackendsApiManager.getSparkPlanExecApiInstance.genToUnixTimestampTransformer(
           substraitExprName,
-          Seq(
-            replaceWithExpressionTransformer0(t.timeExp, attributeSeq, 
expressionsMap),
-            replaceWithExpressionTransformer0(t.format, attributeSeq, 
expressionsMap)
-          ),
+          replaceWithExpressionTransformer0(t.timeExp, attributeSeq, 
expressionsMap),
+          replaceWithExpressionTransformer0(t.format, attributeSeq, 
expressionsMap),
           t
         )
       case u: UnixTimestamp =>
-        GenericExpressionTransformer(
+        
BackendsApiManager.getSparkPlanExecApiInstance.genToUnixTimestampTransformer(
           substraitExprName,
-          Seq(
-            replaceWithExpressionTransformer0(u.timeExp, attributeSeq, 
expressionsMap),
-            replaceWithExpressionTransformer0(u.format, attributeSeq, 
expressionsMap)
-          ),
-          ToUnixTimestamp(u.timeExp, u.format, u.timeZoneId, u.failOnError)
+          replaceWithExpressionTransformer0(u.timeExp, attributeSeq, 
expressionsMap),
+          replaceWithExpressionTransformer0(u.format, attributeSeq, 
expressionsMap),
+          u
         )
       case t: TruncTimestamp =>
         
BackendsApiManager.getSparkPlanExecApiInstance.genTruncTimestampTransformer(
diff --git 
a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala
 
b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala
index f9c5995caf..082f06641b 100644
--- 
a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala
+++ 
b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala
@@ -125,6 +125,14 @@ class GlutenDateFunctionsSuite extends DateFunctionsSuite 
with GlutenSQLTestsTra
             df2.select(unix_timestamp(col("y"), "yyyy-MM-dd")),
             Seq(Row(secs(ts5.getTime)), Row(null)))
 
+          // Test unix_timestamp(timestamp, format) - format should be ignored
+          checkAnswer(
+            df.select(unix_timestamp(col("ts"), "yyyy-MM-dd")),
+            Seq(Row(secs(ts1.getTime)), Row(secs(ts2.getTime))))
+          checkAnswer(
+            df.selectExpr("unix_timestamp(ts, 'invalid-format')"),
+            Seq(Row(secs(ts1.getTime)), Row(secs(ts2.getTime))))
+
           val now = sql("select unix_timestamp()").collect().head.getLong(0)
           checkAnswer(
             sql(s"select timestamp_seconds($now)"),
@@ -187,6 +195,14 @@ class GlutenDateFunctionsSuite extends DateFunctionsSuite 
with GlutenSQLTestsTra
             df2.select(unix_timestamp(col("y"), "yyyy-MM-dd")),
             Seq(Row(secs(ts5.getTime)), Row(null)))
 
+          // Test to_unix_timestamp(timestamp, format) - format should be 
ignored
+          checkAnswer(
+            df.selectExpr("to_unix_timestamp(ts, 'yyyy-MM-dd')"),
+            Seq(Row(secs(ts1.getTime)), Row(secs(ts2.getTime))))
+          checkAnswer(
+            df.selectExpr("to_unix_timestamp(ts, 'invalid-format')"),
+            Seq(Row(secs(ts1.getTime)), Row(secs(ts2.getTime))))
+
           val invalid = df1.selectExpr(s"to_unix_timestamp(x, 'yyyy-MM-dd 
bb:HH:ss')")
           checkAnswer(invalid, Seq(Row(null), Row(null), Row(null), Row(null)))
         }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to