(sedona) branch master updated: [GH-2472] Fix compatibility issue with DBR 17.3 LTS (#2499)

jiayu Thu, 13 Nov 2025 09:45:56 -0800

This is an automated email from the ASF dual-hosted git repository.

jiayu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/sedona.git



The following commit(s) were added to refs/heads/master by this push:
     new 46a8432367 [GH-2472] Fix compatibility issue with DBR 17.3 LTS (#2499)
46a8432367 is described below

commit 46a8432367b9da6f288840f3fd3e1de4555e458b
Author: Kristin Cowalcijk <[email protected]>
AuthorDate: Fri Nov 14 01:45:23 2025 +0800

    [GH-2472] Fix compatibility issue with DBR 17.3 LTS (#2499)
---
 .../scala/org/apache/sedona/sql/UDF/Catalog.scala  | 32 ++++++++++++++++------
 .../spark/sql/udf/ExtractSedonaUDFRule.scala       | 25 ++++++++++++++---
 2 files changed, 45 insertions(+), 12 deletions(-)

diff --git 
a/spark/common/src/main/scala/org/apache/sedona/sql/UDF/Catalog.scala 
b/spark/common/src/main/scala/org/apache/sedona/sql/UDF/Catalog.scala
index 1063bf63e2..f79c3762db 100644
--- a/spark/common/src/main/scala/org/apache/sedona/sql/UDF/Catalog.scala
+++ b/spark/common/src/main/scala/org/apache/sedona/sql/UDF/Catalog.scala
@@ -18,6 +18,7 @@
  */
 package org.apache.sedona.sql.UDF
 
+import org.apache.spark.internal.Logging
 import org.apache.spark.sql.expressions.Aggregator
 import org.apache.spark.sql.sedona_sql.expressions.collect.ST_Collect
 import org.apache.spark.sql.sedona_sql.expressions.raster._
@@ -26,7 +27,7 @@ import 
org.apache.spark.sql.sedona_sql.expressions.geography.{ST_GeogCollFromTex
 import org.locationtech.jts.geom.Geometry
 import org.locationtech.jts.operation.buffer.BufferParameters
 
-object Catalog extends AbstractCatalog {
+object Catalog extends AbstractCatalog with Logging {
 
   override val expressions: Seq[FunctionDescription] = Seq(
     // Expression for vectors
@@ -350,15 +351,30 @@ object Catalog extends AbstractCatalog {
     function[RS_ReprojectMatch]("nearestneighbor"),
     function[RS_FromNetCDF](),
     function[RS_NetCDFInfo](),
-    // geostats functions
-    function[ST_DBSCAN](),
-    function[ST_LocalOutlierFactor](),
-    function[ST_GLocal](),
-    function[ST_BinaryDistanceBandColumn](),
-    function[ST_WeightedDistanceBandColumn](),
+    // geom <-> geog conversion functions
     function[ST_GeogToGeometry](),
-    function[ST_GeomToGeography]())
+    function[ST_GeomToGeography]()) ++ geoStatsFunctions()
 
   val aggregateExpressions: Seq[Aggregator[Geometry, _, _]] =
     Seq(new ST_Envelope_Aggr, new ST_Intersection_Aggr, new ST_Union_Aggr())
+
+  private def geoStatsFunctions(): Seq[FunctionDescription] = {
+    // Try loading geostats functions. Return a seq of geo-stats functions. If 
any error occurs,
+    // return an empty seq to skip registering these functions.
+    // This is for fixing a compatibility issue with DBR 17.3 LTS. See 
https://github.com/apache/sedona/issues/2472
+    try {
+      Seq(
+        function[ST_DBSCAN](),
+        function[ST_LocalOutlierFactor](),
+        function[ST_GLocal](),
+        function[ST_BinaryDistanceBandColumn](),
+        function[ST_WeightedDistanceBandColumn]())
+    } catch {
+      case e: Throwable =>
+        log.warn(
+          "GEO stats functions are not available due to Spark/DBR 
compatibility issues.",
+          e)
+        Seq.empty
+    }
+  }
 }
diff --git 
a/spark/spark-4.0/src/main/scala/org/apache/spark/sql/udf/ExtractSedonaUDFRule.scala
 
b/spark/spark-4.0/src/main/scala/org/apache/spark/sql/udf/ExtractSedonaUDFRule.scala
index 03e10a1602..3d3301580c 100644
--- 
a/spark/spark-4.0/src/main/scala/org/apache/spark/sql/udf/ExtractSedonaUDFRule.scala
+++ 
b/spark/spark-4.0/src/main/scala/org/apache/spark/sql/udf/ExtractSedonaUDFRule.scala
@@ -19,6 +19,7 @@
 package org.apache.spark.sql.udf
 
 import org.apache.sedona.sql.UDF.PythonEvalType
+import org.apache.spark.internal.Logging
 import org.apache.spark.sql.catalyst.expressions.{AttributeReference, 
Expression, ExpressionSet, PythonUDF}
 import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project, 
Subquery}
 import org.apache.spark.sql.catalyst.rules.Rule
@@ -28,7 +29,7 @@ import scala.collection.mutable
 
 // That rule extracts scalar Python UDFs, currently Apache Spark has
 // assert on types which blocks using the vectorized udfs with geometry type
-class ExtractSedonaUDFRule extends Rule[LogicalPlan] {
+class ExtractSedonaUDFRule extends Rule[LogicalPlan] with Logging {
 
   private def hasScalarPythonUDF(e: Expression): Boolean = {
     e.exists(PythonUDF.isScalarPythonUDF)
@@ -73,14 +74,30 @@ class ExtractSedonaUDFRule extends Rule[LogicalPlan] {
     expressions.flatMap(collectEvaluableUDFs)
   }
 
+  private var hasFailedBefore: Boolean = false
+
   def apply(plan: LogicalPlan): LogicalPlan = plan match {
     case s: Subquery if s.correlated => plan
 
     case _ =>
-      plan.transformUpWithPruning(_.containsPattern(PYTHON_UDF)) {
-        case p: SedonaArrowEvalPython => p
+      try {
+        plan.transformUpWithPruning(_.containsPattern(PYTHON_UDF)) {
+          case p: SedonaArrowEvalPython => p
 
-        case plan: LogicalPlan => extract(plan)
+          case plan: LogicalPlan => extract(plan)
+        }
+      } catch {
+        case e: Throwable =>
+          if (!hasFailedBefore) {
+            log.warn(
+              s"Vectorized UDF feature won't be available due to plan 
transformation error.")
+            log.warn(
+              s"Failed to extract Sedona UDFs from plan: ${plan.treeString}\n" 
+
+                s"Exception: ${e.getMessage}",
+              e)
+            hasFailedBefore = true
+          }
+          plan
       }
   }

(sedona) branch master updated: [GH-2472] Fix compatibility issue with DBR 17.3 LTS (#2499)

Reply via email to