This is an automated email from the ASF dual-hosted git repository.
jiayu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/sedona.git
The following commit(s) were added to refs/heads/master by this push:
new b46c010e78 [GH-2419] Register Sedona functions as built-in functions
to support permanent VIEW creation (#2420)
b46c010e78 is described below
commit b46c010e7878f132c01573ba4a7a70e19ebd2436
Author: Feng Zhang <[email protected]>
AuthorDate: Wed Oct 22 11:10:59 2025 -0700
[GH-2419] Register Sedona functions as built-in functions to support
permanent VIEW creation (#2420)
---
.../apache/sedona/sql/UDF/AbstractCatalog.scala | 16 +++-
.../org/apache/sedona/sql/functionTestScala.scala | 86 +++++++++++++++++++++-
2 files changed, 99 insertions(+), 3 deletions(-)
diff --git
a/spark/common/src/main/scala/org/apache/sedona/sql/UDF/AbstractCatalog.scala
b/spark/common/src/main/scala/org/apache/sedona/sql/UDF/AbstractCatalog.scala
index 6da8f56c32..6e6d13e023 100644
---
a/spark/common/src/main/scala/org/apache/sedona/sql/UDF/AbstractCatalog.scala
+++
b/spark/common/src/main/scala/org/apache/sedona/sql/UDF/AbstractCatalog.scala
@@ -20,6 +20,7 @@ package org.apache.sedona.sql.UDF
import org.apache.spark.sql.{SQLContext, SparkSession, functions}
import org.apache.spark.sql.catalyst.FunctionIdentifier
+import org.apache.spark.sql.catalyst.analysis.FunctionRegistry
import org.apache.spark.sql.catalyst.analysis.FunctionRegistry.FunctionBuilder
import org.apache.spark.sql.catalyst.expressions.{ExpectsInputTypes,
Expression, ExpressionInfo, Literal}
import org.apache.spark.sql.expressions.Aggregator
@@ -86,9 +87,20 @@ abstract class AbstractCatalog {
functionIdentifier,
expressionInfo,
functionBuilder)
+ FunctionRegistry.builtin.registerFunction(
+ functionIdentifier,
+ expressionInfo,
+ functionBuilder)
+ }
+ aggregateExpressions.foreach { f =>
+ sparkSession.udf.register(f.getClass.getSimpleName, functions.udaf(f))
+ FunctionRegistry.builtin.registerFunction(
+ FunctionIdentifier(f.getClass.getSimpleName),
+ new ExpressionInfo(f.getClass.getCanonicalName, null,
f.getClass.getSimpleName),
+ (_: Seq[Expression]) =>
+ throw new UnsupportedOperationException(
+ s"Aggregate function ${f.getClass.getSimpleName} cannot be used as
a regular function"))
}
- aggregateExpressions.foreach(f =>
- sparkSession.udf.register(f.getClass.getSimpleName, functions.udaf(f)))
}
def dropAll(sparkSession: SparkSession): Unit = {
diff --git
a/spark/common/src/test/scala/org/apache/sedona/sql/functionTestScala.scala
b/spark/common/src/test/scala/org/apache/sedona/sql/functionTestScala.scala
index 706612304c..f1311a03d1 100644
--- a/spark/common/src/test/scala/org/apache/sedona/sql/functionTestScala.scala
+++ b/spark/common/src/test/scala/org/apache/sedona/sql/functionTestScala.scala
@@ -19,6 +19,7 @@
package org.apache.sedona.sql
import org.apache.commons.codec.binary.Hex
+import org.apache.commons.io.FileUtils
import org.apache.sedona.common.FunctionsGeoTools
import org.apache.sedona.sql.implicits._
import org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema
@@ -35,7 +36,8 @@ import org.geotools.api.referencing.FactoryException
import org.scalatest.{GivenWhenThen, Matchers}
import org.xml.sax.InputSource
-import java.io.StringReader
+import java.io.{File, StringReader}
+import java.nio.file.Files
import javax.xml.parsers.DocumentBuilderFactory
import javax.xml.xpath.XPathFactory
@@ -4130,4 +4132,86 @@ class functionTestScala
squareWithTwoHolesSimplified.first().getAs[org.locationtech.jts.geom.Geometry](0)
assert(simplifiedMedialAxis != null, "Simplified medial axis should not be
null")
}
+
+ it("Test that CREATE VIEW fails with multiple temporary Sedona functions") {
+ val timestamp = System.currentTimeMillis()
+ val tmpDir: String =
+
Files.createTempDirectory("sedona_geoparquet_test_").toFile.getAbsolutePath
+
+ val buildings = sparkSession.sql("""
+ SELECT
+ ST_GeomFromWKT('POLYGON((0 0, 1 0, 1 1, 0 1, 0 0))') as geom,
+ 'Building 1' as PROP_ADDR
+ UNION ALL
+ SELECT
+ ST_GeomFromWKT('POLYGON((2 2, 3 2, 3 3, 2 3, 2 2))') as geom,
+ 'Building 2' as PROP_ADDR
+ """)
+ buildings.write
+ .mode("overwrite")
+ .option("path", s"$tmpDir/sedona_test_${timestamp}_buildings")
+ .saveAsTable("nyc_buildings_geom_test")
+
+ val zones = sparkSession.sql("""
+ SELECT
+ ST_GeomFromWKT('POLYGON((0 0, 5 0, 5 5, 0 5, 0 0))') as zone_geom,
+ 100.0 as elevation
+ """)
+ zones.write
+ .mode("overwrite")
+ .option("path", s"$tmpDir/sedona_test_${timestamp}_zones")
+ .saveAsTable("elevation_zones_test")
+
+ // Attempt to create a permanent VIEW with multiple Sedona functions
+ sparkSession.sql("""
+ CREATE VIEW nyc_buildings_with_functions AS
+ SELECT * FROM (
+ SELECT
+ nyc_buildings_geom_test.PROP_ADDR AS name,
+ nyc_buildings_geom_test.geom AS building_geom,
+ avg(elevation_zones_test.elevation) AS elevation
+ FROM
+ nyc_buildings_geom_test
+ JOIN
+ elevation_zones_test
+ ON
+ st_intersects(nyc_buildings_geom_test.geom,
elevation_zones_test.zone_geom)
+ GROUP BY
+ nyc_buildings_geom_test.PROP_ADDR, nyc_buildings_geom_test.geom
+ )
+ WHERE elevation > 0
+ """)
+
+ // Query the view and assert results
+ val result = sparkSession.sql("SELECT * FROM
nyc_buildings_with_functions").collect()
+ assert(result.length == 2, s"Expected 2 rows, but got ${result.length}")
+
+ // Assert both buildings are in the result
+ val buildingNames = result.map(_.getString(0)).toSet
+ assert(buildingNames.contains("Building 1"), "Building 1 should be in the
result")
+ assert(buildingNames.contains("Building 2"), "Building 2 should be in the
result")
+
+ sparkSession.sql("""
+ CREATE VIEW nyc_buildings_envelope_aggr_functions AS
+ SELECT
+ ST_Envelope_Aggr(nyc_buildings_geom_test.geom) AS
building_geom_envelope
+ FROM
+ nyc_buildings_geom_test
+ """)
+
+ // Query the aggregate view and assert results
+ val result_aggr =
+ sparkSession.sql("SELECT * FROM
nyc_buildings_envelope_aggr_functions").collect()
+ assert(result_aggr.length == 1, s"Expected 1 row, but got
${result_aggr.length}")
+
+ // Assert that the views were created
+ val views = sparkSession.sql("SHOW VIEWS").collect()
+ val view1Exists = views.exists(row => row.getString(1) ==
"nyc_buildings_with_functions")
+ assert(view1Exists, "View 'nyc_buildings_with_functions' should be
created")
+ val view2Exists =
+ views.exists(row => row.getString(1) ==
"nyc_buildings_envelope_aggr_functions")
+ assert(view2Exists, "View 'nyc_buildings_envelope_aggr_functions' should
be created")
+
+ FileUtils.deleteDirectory(new File(tmpDir))
+ }
}