This is an automated email from the ASF dual-hosted git repository.
agrove pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-comet.git
The following commit(s) were added to refs/heads/main by this push:
new d2e0393e3 chore: Run Spark 4.0 SQL tests with native_datafusion scan
(#3728)
d2e0393e3 is described below
commit d2e0393e38ef59f19efef008552baca9dc2cf2a2
Author: Andy Grove <[email protected]>
AuthorDate: Tue Mar 24 09:22:00 2026 -0700
chore: Run Spark 4.0 SQL tests with native_datafusion scan (#3728)
---
.github/workflows/spark_sql_test.yml | 3 +++
dev/diffs/4.0.1.diff | 38 ++++++++++++++----------------------
2 files changed, 18 insertions(+), 23 deletions(-)
diff --git a/.github/workflows/spark_sql_test.yml
b/.github/workflows/spark_sql_test.yml
index 3a763d321..8f48078ca 100644
--- a/.github/workflows/spark_sql_test.yml
+++ b/.github/workflows/spark_sql_test.yml
@@ -130,10 +130,13 @@ jobs:
- {spark-short: '3.5', spark-full: '3.5.8', java: 11, scan-impl:
'auto'}
- {spark-short: '3.5', spark-full: '3.5.8', java: 11, scan-impl:
'native_datafusion'}
- {spark-short: '4.0', spark-full: '4.0.1', java: 17, scan-impl:
'auto'}
+ - {spark-short: '4.0', spark-full: '4.0.1', java: 17, scan-impl:
'native_datafusion'}
# Skip sql_hive-1 for Spark 4.0 due to
https://github.com/apache/datafusion-comet/issues/2946
exclude:
- config: {spark-short: '4.0', spark-full: '4.0.1', java: 17,
scan-impl: 'auto'}
module: {name: "sql_hive-1", args1: "", args2: "hive/testOnly * --
-l org.apache.spark.tags.ExtendedHiveTest -l
org.apache.spark.tags.SlowHiveTest"}
+ - config: {spark-short: '4.0', spark-full: '4.0.1', java: 17,
scan-impl: 'native_datafusion'}
+ module: {name: "sql_hive-1", args1: "", args2: "hive/testOnly * --
-l org.apache.spark.tags.ExtendedHiveTest -l
org.apache.spark.tags.SlowHiveTest"}
fail-fast: false
name: spark-sql-${{ matrix.config.scan-impl }}-${{ matrix.module.name
}}/spark-${{ matrix.config.spark-full }}
runs-on: ${{ matrix.os }}
diff --git a/dev/diffs/4.0.1.diff b/dev/diffs/4.0.1.diff
index 407807cac..a0b1e81d0 100644
--- a/dev/diffs/4.0.1.diff
+++ b/dev/diffs/4.0.1.diff
@@ -574,7 +574,7 @@ index 81713c777bc..b5f92ed9742 100644
assert(exchanges.size == 2)
}
diff --git
a/sql/core/src/test/scala/org/apache/spark/sql/DynamicPartitionPruningSuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/DynamicPartitionPruningSuite.scala
-index 2c24cc7d570..3311e6e3773 100644
+index 2c24cc7d570..12096ea361e 100644
---
a/sql/core/src/test/scala/org/apache/spark/sql/DynamicPartitionPruningSuite.scala
+++
b/sql/core/src/test/scala/org/apache/spark/sql/DynamicPartitionPruningSuite.scala
@@ -22,6 +22,7 @@ import org.scalatest.GivenWhenThen
@@ -669,7 +669,7 @@ index 2c24cc7d570..3311e6e3773 100644
test("static scan metrics",
- DisableAdaptiveExecution("DPP in AQE must reuse broadcast")) {
+ DisableAdaptiveExecution("DPP in AQE must reuse broadcast"),
-+
IgnoreCometNativeDataFusion("https://github.com/apache/datafusion-comet/issues/3311"))
{
++
IgnoreCometNativeDataFusion("https://github.com/apache/datafusion-comet/issues/3442"))
{
withSQLConf(SQLConf.DYNAMIC_PARTITION_PRUNING_ENABLED.key -> "true",
SQLConf.DYNAMIC_PARTITION_PRUNING_REUSE_BROADCAST_ONLY.key -> "false",
SQLConf.EXCHANGE_REUSE_ENABLED.key -> "false") {
@@ -708,7 +708,7 @@ index 9c90e0105a4..fadf2f0f698 100644
test("SPARK-35884: Explain Formatted") {
diff --git
a/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala
-index 9c529d14221..6cfd87ad864 100644
+index 9c529d14221..5c4e370dfff 100644
---
a/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala
+++
b/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala
@@ -33,6 +33,8 @@ import
org.apache.spark.sql.catalyst.expressions.{AttributeReference, GreaterTha
@@ -748,7 +748,7 @@ index 9c529d14221..6cfd87ad864 100644
Seq("parquet", "orc").foreach { format =>
- test(s"Spark native readers should respect spark.sql.caseSensitive -
${format}") {
+ test(s"Spark native readers should respect spark.sql.caseSensitive -
${format}",
-+
IgnoreCometNativeDataFusion("https://github.com/apache/datafusion-comet/issues/3311"))
{
++
IgnoreCometNativeDataFusion("https://github.com/apache/datafusion-comet/issues/3760"))
{
withTempDir { dir =>
val tableName = s"spark_25132_${format}_native"
val tableDir = dir.getCanonicalPath + s"/$tableName"
@@ -2727,7 +2727,7 @@ index cd6f41b4ef4..4b6a17344bc 100644
ParquetOutputFormat.WRITER_VERSION ->
ParquetProperties.WriterVersion.PARQUET_2_0.toString
)
diff --git
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala
-index 6080a5e8e4b..dc64436164f 100644
+index 6080a5e8e4b..cef477c8b4d 100644
---
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala
+++
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala
@@ -38,6 +38,7 @@ import org.apache.parquet.schema.MessageType
@@ -2812,7 +2812,7 @@ index 6080a5e8e4b..dc64436164f 100644
- test("SPARK-25207: exception when duplicate fields in case-insensitive
mode") {
+ test("SPARK-25207: exception when duplicate fields in case-insensitive
mode",
-+
IgnoreCometNativeDataFusion("https://github.com/apache/datafusion-comet/issues/3311"))
{
++
IgnoreCometNativeDataFusion("https://github.com/apache/datafusion-comet/issues/3760"))
{
withTempPath { dir =>
val count = 10
val tableName = "spark_25207"
@@ -3316,41 +3316,32 @@ index 0dd90925d3c..7d53ec845ef 100644
spark.range(10).selectExpr("id", "id % 3 as p")
.write.partitionBy("p").saveAsTable("testDataForScan")
diff --git
a/sql/core/src/test/scala/org/apache/spark/sql/execution/python/ExtractPythonUDFsSuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/execution/python/ExtractPythonUDFsSuite.scala
-index 0ab8691801d..f1c4b3d92b1 100644
+index 0ab8691801d..b18a5bea944 100644
---
a/sql/core/src/test/scala/org/apache/spark/sql/execution/python/ExtractPythonUDFsSuite.scala
+++
b/sql/core/src/test/scala/org/apache/spark/sql/execution/python/ExtractPythonUDFsSuite.scala
-@@ -18,6 +18,8 @@
+@@ -18,6 +18,7 @@
package org.apache.spark.sql.execution.python
import org.apache.spark.sql.catalyst.plans.logical.{ArrowEvalPython,
BatchEvalPython, Limit, LocalLimit}
-+import org.apache.spark.sql.IgnoreCometNativeDataFusion
+import org.apache.spark.sql.comet._
import org.apache.spark.sql.execution.{FileSourceScanExec, SparkPlan,
SparkPlanTest}
import org.apache.spark.sql.execution.datasources.v2.BatchScanExec
import org.apache.spark.sql.execution.datasources.v2.parquet.ParquetScan
-@@ -93,7 +95,8 @@ class ExtractPythonUDFsSuite extends SparkPlanTest with
SharedSparkSession {
- assert(arrowEvalNodes.size == 2)
- }
-
-- test("Python UDF should not break column pruning/filter pushdown -- Parquet
V1") {
-+ test("Python UDF should not break column pruning/filter pushdown -- Parquet
V1",
-+
IgnoreCometNativeDataFusion("https://github.com/apache/datafusion-comet/issues/3311"))
{
- withSQLConf(SQLConf.USE_V1_SOURCE_LIST.key -> "parquet") {
- withTempPath { f =>
- spark.range(10).select($"id".as("a"), $"id".as("b"))
-@@ -108,6 +111,7 @@ class ExtractPythonUDFsSuite extends SparkPlanTest with
SharedSparkSession {
+@@ -108,6 +109,8 @@ class ExtractPythonUDFsSuite extends SparkPlanTest with
SharedSparkSession {
val scanNodes = query.queryExecution.executedPlan.collect {
case scan: FileSourceScanExec => scan
+ case scan: CometScanExec => scan
++ case scan: CometNativeScanExec => scan
}
assert(scanNodes.length == 1)
assert(scanNodes.head.output.map(_.name) == Seq("a"))
-@@ -120,11 +124,16 @@ class ExtractPythonUDFsSuite extends SparkPlanTest with
SharedSparkSession {
+@@ -120,11 +123,18 @@ class ExtractPythonUDFsSuite extends SparkPlanTest with
SharedSparkSession {
val scanNodes = query.queryExecution.executedPlan.collect {
case scan: FileSourceScanExec => scan
+ case scan: CometScanExec => scan
++ case scan: CometNativeScanExec => scan
}
assert(scanNodes.length == 1)
// $"a" is not null and $"a" > 1
@@ -3359,13 +3350,14 @@ index 0ab8691801d..f1c4b3d92b1 100644
+ val dataFilters = scanNodes.head match {
+ case scan: FileSourceScanExec => scan.dataFilters
+ case scan: CometScanExec => scan.dataFilters
++ case scan: CometNativeScanExec => scan.dataFilters
+ }
+ assert(dataFilters.length == 2)
+ assert(dataFilters.flatMap(_.references.map(_.name)).distinct ==
Seq("a"))
}
}
}
-@@ -145,6 +154,7 @@ class ExtractPythonUDFsSuite extends SparkPlanTest with
SharedSparkSession {
+@@ -145,6 +155,7 @@ class ExtractPythonUDFsSuite extends SparkPlanTest with
SharedSparkSession {
val scanNodes = query.queryExecution.executedPlan.collect {
case scan: BatchScanExec => scan
@@ -3373,7 +3365,7 @@ index 0ab8691801d..f1c4b3d92b1 100644
}
assert(scanNodes.length == 1)
assert(scanNodes.head.output.map(_.name) == Seq("a"))
-@@ -157,6 +167,7 @@ class ExtractPythonUDFsSuite extends SparkPlanTest with
SharedSparkSession {
+@@ -157,6 +168,7 @@ class ExtractPythonUDFsSuite extends SparkPlanTest with
SharedSparkSession {
val scanNodes = query.queryExecution.executedPlan.collect {
case scan: BatchScanExec => scan
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]