This is an automated email from the ASF dual-hosted git repository.
jiayu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/sedona.git
The following commit(s) were added to refs/heads/master by this push:
new 46a8432367 [GH-2472] Fix compatibility issue with DBR 17.3 LTS (#2499)
46a8432367 is described below
commit 46a8432367b9da6f288840f3fd3e1de4555e458b
Author: Kristin Cowalcijk <[email protected]>
AuthorDate: Fri Nov 14 01:45:23 2025 +0800
[GH-2472] Fix compatibility issue with DBR 17.3 LTS (#2499)
---
.../scala/org/apache/sedona/sql/UDF/Catalog.scala | 32 ++++++++++++++++------
.../spark/sql/udf/ExtractSedonaUDFRule.scala | 25 ++++++++++++++---
2 files changed, 45 insertions(+), 12 deletions(-)
diff --git
a/spark/common/src/main/scala/org/apache/sedona/sql/UDF/Catalog.scala
b/spark/common/src/main/scala/org/apache/sedona/sql/UDF/Catalog.scala
index 1063bf63e2..f79c3762db 100644
--- a/spark/common/src/main/scala/org/apache/sedona/sql/UDF/Catalog.scala
+++ b/spark/common/src/main/scala/org/apache/sedona/sql/UDF/Catalog.scala
@@ -18,6 +18,7 @@
*/
package org.apache.sedona.sql.UDF
+import org.apache.spark.internal.Logging
import org.apache.spark.sql.expressions.Aggregator
import org.apache.spark.sql.sedona_sql.expressions.collect.ST_Collect
import org.apache.spark.sql.sedona_sql.expressions.raster._
@@ -26,7 +27,7 @@ import
org.apache.spark.sql.sedona_sql.expressions.geography.{ST_GeogCollFromTex
import org.locationtech.jts.geom.Geometry
import org.locationtech.jts.operation.buffer.BufferParameters
-object Catalog extends AbstractCatalog {
+object Catalog extends AbstractCatalog with Logging {
override val expressions: Seq[FunctionDescription] = Seq(
// Expression for vectors
@@ -350,15 +351,30 @@ object Catalog extends AbstractCatalog {
function[RS_ReprojectMatch]("nearestneighbor"),
function[RS_FromNetCDF](),
function[RS_NetCDFInfo](),
- // geostats functions
- function[ST_DBSCAN](),
- function[ST_LocalOutlierFactor](),
- function[ST_GLocal](),
- function[ST_BinaryDistanceBandColumn](),
- function[ST_WeightedDistanceBandColumn](),
+ // geom <-> geog conversion functions
function[ST_GeogToGeometry](),
- function[ST_GeomToGeography]())
+ function[ST_GeomToGeography]()) ++ geoStatsFunctions()
val aggregateExpressions: Seq[Aggregator[Geometry, _, _]] =
Seq(new ST_Envelope_Aggr, new ST_Intersection_Aggr, new ST_Union_Aggr())
+
+ private def geoStatsFunctions(): Seq[FunctionDescription] = {
+ // Try loading geostats functions. Return a seq of geo-stats functions. If
any error occurs,
+ // return an empty seq to skip registering these functions.
+ // This is for fixing a compatibility issue with DBR 17.3 LTS. See
https://github.com/apache/sedona/issues/2472
+ try {
+ Seq(
+ function[ST_DBSCAN](),
+ function[ST_LocalOutlierFactor](),
+ function[ST_GLocal](),
+ function[ST_BinaryDistanceBandColumn](),
+ function[ST_WeightedDistanceBandColumn]())
+ } catch {
+ case e: Throwable =>
+ log.warn(
+ "GEO stats functions are not available due to Spark/DBR
compatibility issues.",
+ e)
+ Seq.empty
+ }
+ }
}
diff --git
a/spark/spark-4.0/src/main/scala/org/apache/spark/sql/udf/ExtractSedonaUDFRule.scala
b/spark/spark-4.0/src/main/scala/org/apache/spark/sql/udf/ExtractSedonaUDFRule.scala
index 03e10a1602..3d3301580c 100644
---
a/spark/spark-4.0/src/main/scala/org/apache/spark/sql/udf/ExtractSedonaUDFRule.scala
+++
b/spark/spark-4.0/src/main/scala/org/apache/spark/sql/udf/ExtractSedonaUDFRule.scala
@@ -19,6 +19,7 @@
package org.apache.spark.sql.udf
import org.apache.sedona.sql.UDF.PythonEvalType
+import org.apache.spark.internal.Logging
import org.apache.spark.sql.catalyst.expressions.{AttributeReference,
Expression, ExpressionSet, PythonUDF}
import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project,
Subquery}
import org.apache.spark.sql.catalyst.rules.Rule
@@ -28,7 +29,7 @@ import scala.collection.mutable
// That rule extracts scalar Python UDFs, currently Apache Spark has
// assert on types which blocks using the vectorized udfs with geometry type
-class ExtractSedonaUDFRule extends Rule[LogicalPlan] {
+class ExtractSedonaUDFRule extends Rule[LogicalPlan] with Logging {
private def hasScalarPythonUDF(e: Expression): Boolean = {
e.exists(PythonUDF.isScalarPythonUDF)
@@ -73,14 +74,30 @@ class ExtractSedonaUDFRule extends Rule[LogicalPlan] {
expressions.flatMap(collectEvaluableUDFs)
}
+ private var hasFailedBefore: Boolean = false
+
def apply(plan: LogicalPlan): LogicalPlan = plan match {
case s: Subquery if s.correlated => plan
case _ =>
- plan.transformUpWithPruning(_.containsPattern(PYTHON_UDF)) {
- case p: SedonaArrowEvalPython => p
+ try {
+ plan.transformUpWithPruning(_.containsPattern(PYTHON_UDF)) {
+ case p: SedonaArrowEvalPython => p
- case plan: LogicalPlan => extract(plan)
+ case plan: LogicalPlan => extract(plan)
+ }
+ } catch {
+ case e: Throwable =>
+ if (!hasFailedBefore) {
+ log.warn(
+ s"Vectorized UDF feature won't be available due to plan
transformation error.")
+ log.warn(
+ s"Failed to extract Sedona UDFs from plan: ${plan.treeString}\n"
+
+ s"Exception: ${e.getMessage}",
+ e)
+ hasFailedBefore = true
+ }
+ plan
}
}