This is an automated email from the ASF dual-hosted git repository.

jmalkin pushed a commit to branch test_codegen
in repository https://gitbox.apache.org/repos/asf/datasketches-spark.git

commit 20bfb8d914b206a48ddaa31839becda6d227bc11
Author: Jon <[email protected]>
AuthorDate: Wed Mar 5 00:15:59 2025 -0800

    Add explicit test forcing the use of codegen for UDFs which provide it
---
 .github/workflows/ci.yaml                          |  7 +++++-
 .../sql/datasketches/SparkSessionManager.scala     | 27 +++++++++++++++++-----
 2 files changed, 27 insertions(+), 7 deletions(-)

diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
index 290d141..6e402bc 100644
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -65,7 +65,12 @@ jobs:
 
       - name: Build and test
         run: >
-          sbt ++$SCALA_VEERSION --batch clean test
+          sbt ++$SCALA_VERSION --batch clean test
+
+      - name: Build and test with Codegen
+        run: >
+          FORCE_CODEGEN=true sbt ++$SCALA_VERSION --batch clean test
+
 
 # Architecture options: x86, x64, armv7, aarch64, ppc64le
 # setup-java@v4 has a "with cache" option
diff --git 
a/src/test/scala/org/apache/spark/sql/datasketches/SparkSessionManager.scala 
b/src/test/scala/org/apache/spark/sql/datasketches/SparkSessionManager.scala
index 1430de2..6d55eb4 100644
--- a/src/test/scala/org/apache/spark/sql/datasketches/SparkSessionManager.scala
+++ b/src/test/scala/org/apache/spark/sql/datasketches/SparkSessionManager.scala
@@ -30,16 +30,31 @@ import org.apache.spark.sql.SparkSession
 trait SparkSessionManager extends AnyFunSuite with BeforeAndAfterAll {
   Logger.getRootLogger().setLevel(Level.OFF)
 
-  lazy val spark: SparkSession = SparkSession
-      .builder()
-      .appName("datasketches-spark-tests")
-      .master("local[3]")
+  lazy val spark: SparkSession = {
+    // environment variable to force codegen for testing
+    // FORCE_CODEGEN means allow only codegen
+    // no argument means to use Spark's default (try codegen, fall back on 
error)
+    val forceCodegen = sys.env.getOrElse("FORCE_CODEGEN", "false").toBoolean
+
+    val builder = SparkSession.builder()
+    builder
+      .appName(s"datasketches-spark-tests")
+      .master("local[1]")
       .config("spark.driver.bindAddress", "localhost")
       .config("spark.driver.host", "localhost")
-      //.config("spark.sql.debug.codegen", "true")
-      .getOrCreate()
+
+    if (forceCodegen) {
+      builder
+        .config("spark.sql.codegen.wholeStage", "true")
+        .config("spark.sql.codegen.fallback", "false")
+    }
+
+    Logger.getRootLogger().info(s"Spark session started with codegen: 
$forceCodegen")
+    builder.getOrCreate()
+  }
 
   override def beforeAll(): Unit = {
+    super.beforeAll()
     spark.sparkContext.setLogLevel("OFF")
   }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to