This is an automated email from the ASF dual-hosted git repository.
jiayu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/sedona.git
The following commit(s) were added to refs/heads/master by this push:
new 01a7b480ed [GH-2300] Usage of transformUpWithPruning prevents sedona
from working on Databricks (#2301)
01a7b480ed is described below
commit 01a7b480edf64db613975fc4e01c930b57791a9d
Author: Kristin Cowalcijk <[email protected]>
AuthorDate: Wed Aug 20 14:25:10 2025 +0800
[GH-2300] Usage of transformUpWithPruning prevents sedona from working on
Databricks (#2301)
---
.../spark/sql/udf/ExtractSedonaUDFRule.scala | 25 ++++++++++++++++++----
1 file changed, 21 insertions(+), 4 deletions(-)
diff --git
a/spark/spark-3.5/src/main/scala/org/apache/spark/sql/udf/ExtractSedonaUDFRule.scala
b/spark/spark-3.5/src/main/scala/org/apache/spark/sql/udf/ExtractSedonaUDFRule.scala
index 03e10a1602..3d3301580c 100644
---
a/spark/spark-3.5/src/main/scala/org/apache/spark/sql/udf/ExtractSedonaUDFRule.scala
+++
b/spark/spark-3.5/src/main/scala/org/apache/spark/sql/udf/ExtractSedonaUDFRule.scala
@@ -19,6 +19,7 @@
package org.apache.spark.sql.udf
import org.apache.sedona.sql.UDF.PythonEvalType
+import org.apache.spark.internal.Logging
import org.apache.spark.sql.catalyst.expressions.{AttributeReference,
Expression, ExpressionSet, PythonUDF}
import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project,
Subquery}
import org.apache.spark.sql.catalyst.rules.Rule
@@ -28,7 +29,7 @@ import scala.collection.mutable
// That rule extracts scalar Python UDFs, currently Apache Spark has
// assert on types which blocks using the vectorized udfs with geometry type
-class ExtractSedonaUDFRule extends Rule[LogicalPlan] {
+class ExtractSedonaUDFRule extends Rule[LogicalPlan] with Logging {
private def hasScalarPythonUDF(e: Expression): Boolean = {
e.exists(PythonUDF.isScalarPythonUDF)
@@ -73,14 +74,30 @@ class ExtractSedonaUDFRule extends Rule[LogicalPlan] {
expressions.flatMap(collectEvaluableUDFs)
}
+ private var hasFailedBefore: Boolean = false
+
def apply(plan: LogicalPlan): LogicalPlan = plan match {
case s: Subquery if s.correlated => plan
case _ =>
- plan.transformUpWithPruning(_.containsPattern(PYTHON_UDF)) {
- case p: SedonaArrowEvalPython => p
+ try {
+ plan.transformUpWithPruning(_.containsPattern(PYTHON_UDF)) {
+ case p: SedonaArrowEvalPython => p
- case plan: LogicalPlan => extract(plan)
+ case plan: LogicalPlan => extract(plan)
+ }
+ } catch {
+ case e: Throwable =>
+ if (!hasFailedBefore) {
+ log.warn(
+ s"Vectorized UDF feature won't be available due to plan
transformation error.")
+ log.warn(
+ s"Failed to extract Sedona UDFs from plan: ${plan.treeString}\n"
+
+ s"Exception: ${e.getMessage}",
+ e)
+ hasFailedBefore = true
+ }
+ plan
}
}