This is an automated email from the ASF dual-hosted git repository. jmalkin pushed a commit to branch test_improvements in repository https://gitbox.apache.org/repos/asf/datasketches-spark.git
commit 6baf9241c018a95b720babca63f5e1f53247ed65 Author: Jon <[email protected]> AuthorDate: Sat Mar 8 11:02:23 2025 -0800 Should improve codegen on/off control, force use of both in workflow --- .github/workflows/ci.yaml | 17 ++++------------- .../spark/sql/datasketches/SparkSessionManager.scala | 19 +++++++++++++------ 2 files changed, 17 insertions(+), 19 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 6e402bc..0f9e367 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -63,19 +63,10 @@ jobs: echo Scala version: $SCALA_VERSION echo Spark version: $SPARK_VERSION - - name: Build and test + - name: Build and test interpreted path run: > - sbt ++$SCALA_VERSION --batch clean test + CODEGEN_FACTORYMODE=NO_CODEGEN sbt ++$SCALA_VERSION --batch clean test - - name: Build and test with Codegen + - name: Build and test codegen path run: > - FORCE_CODEGEN=true sbt ++$SCALA_VERSION --batch clean test - - -# Architecture options: x86, x64, armv7, aarch64, ppc64le -# setup-java@v4 has a "with cache" option -# Lifecycle: validate, compile, test, package, verify, install, deploy -# -B batch mode, never stops for user input -# -V show Version without stopping -# -X debug mode -# -q quiet, only show errors \ No newline at end of file + CODEGEN_FACTORYMODE=ONLY_CODEGEN sbt ++$SCALA_VERSION --batch clean test diff --git a/src/test/scala/org/apache/spark/sql/datasketches/SparkSessionManager.scala b/src/test/scala/org/apache/spark/sql/datasketches/SparkSessionManager.scala index 6d55eb4..6ea1151 100644 --- a/src/test/scala/org/apache/spark/sql/datasketches/SparkSessionManager.scala +++ b/src/test/scala/org/apache/spark/sql/datasketches/SparkSessionManager.scala @@ -31,10 +31,11 @@ trait SparkSessionManager extends AnyFunSuite with BeforeAndAfterAll { Logger.getRootLogger().setLevel(Level.OFF) lazy val spark: SparkSession = { - // environment variable to force codegen for testing - // FORCE_CODEGEN means allow only codegen - // no argument means to use Spark's default (try codegen, fall back on error) - val forceCodegen = sys.env.getOrElse("FORCE_CODEGEN", "false").toBoolean + // environment variable to set codegen state + // FALLBACK: (default) try codegen else fall back to interpreted + // CODEGEN_ONLY: only use codegen path + // NO_CODEGEN: only use interpreted path + val codegenState = sys.env.getOrElse("CODEGEN_FACTORYMODE", "FALLBACK") val builder = SparkSession.builder() builder @@ -42,14 +43,20 @@ trait SparkSessionManager extends AnyFunSuite with BeforeAndAfterAll { .master("local[1]") .config("spark.driver.bindAddress", "localhost") .config("spark.driver.host", "localhost") + .config("spark.sql.codegen.factoryMode", codegenState) - if (forceCodegen) { + // additional flags used for codegen state + if ("ONLY_CODEGEN".equals(codegenState)) { builder .config("spark.sql.codegen.wholeStage", "true") .config("spark.sql.codegen.fallback", "false") + } else if ("NO_CODEGEN".equals(codegenState)) { + builder + .config("spark.sql.codegen.wholeStage", "false") + } - Logger.getRootLogger().info(s"Spark session started with codegen: $forceCodegen") + Logger.getRootLogger().info(s"Spark session started with codegen.factoryMode: $codegenState") builder.getOrCreate() } --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
