This is an automated email from the ASF dual-hosted git repository.

jiayu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/sedona.git


The following commit(s) were added to refs/heads/master by this push:
     new 00749a45f8 [SEDONA-700] Fix ST_KNN fails on null and empty geometries 
(#1763)
00749a45f8 is described below

commit 00749a45f842e67808f9669d6cfc0142c332381c
Author: Feng Zhang <[email protected]>
AuthorDate: Thu Jan 16 21:45:12 2025 -0800

    [SEDONA-700] Fix ST_KNN fails on null and empty geometries (#1763)
    
    * [SEDONA-700] Fix ST_KNN fails on null and empty geometries
    
    * fix formatting issue
---
 .../spatialPartitioning/quadtree/ExtendedQuadTree.java  |  5 ++++-
 .../apache/sedona/sql/utils/GeometrySerializer.scala    |  5 ++++-
 .../test/scala/org/apache/sedona/sql/KnnJoinSuite.scala | 17 +++++++++++++++++
 3 files changed, 25 insertions(+), 2 deletions(-)

diff --git 
a/spark/common/src/main/java/org/apache/sedona/core/spatialPartitioning/quadtree/ExtendedQuadTree.java
 
b/spark/common/src/main/java/org/apache/sedona/core/spatialPartitioning/quadtree/ExtendedQuadTree.java
index 73169363ee..9925e93f8a 100644
--- 
a/spark/common/src/main/java/org/apache/sedona/core/spatialPartitioning/quadtree/ExtendedQuadTree.java
+++ 
b/spark/common/src/main/java/org/apache/sedona/core/spatialPartitioning/quadtree/ExtendedQuadTree.java
@@ -146,8 +146,11 @@ public class ExtendedQuadTree<T> extends PartitioningUtils 
implements Serializab
 
       final Set<Tuple2<Integer, Geometry>> result = new HashSet<>();
       for (QuadRectangle rectangle : matchedPartitions) {
+        // Ignore null or empty point
+        if (point == null || point.isEmpty()) break;
+
         // For points, make sure to return only one partition
-        if (point != null && !(new 
HalfOpenRectangle(rectangle.getEnvelope())).contains(point)) {
+        if (!(new HalfOpenRectangle(rectangle.getEnvelope())).contains(point)) 
{
           continue;
         }
 
diff --git 
a/spark/common/src/main/scala/org/apache/sedona/sql/utils/GeometrySerializer.scala
 
b/spark/common/src/main/scala/org/apache/sedona/sql/utils/GeometrySerializer.scala
index a13c181aa2..a75a88f7ba 100644
--- 
a/spark/common/src/main/scala/org/apache/sedona/sql/utils/GeometrySerializer.scala
+++ 
b/spark/common/src/main/scala/org/apache/sedona/sql/utils/GeometrySerializer.scala
@@ -19,7 +19,7 @@
 package org.apache.sedona.sql.utils
 
 import org.apache.sedona.common.geometrySerde
-import org.locationtech.jts.geom.Geometry
+import org.locationtech.jts.geom.{Geometry, GeometryFactory}
 
 /**
  * SerDe using the WKB reader and writer objects
@@ -47,6 +47,9 @@ object GeometrySerializer {
    *   JTS geometry
    */
   def deserialize(value: Array[Byte]): Geometry = {
+    if (value == null) {
+      return new GeometryFactory().createGeometryCollection()
+    }
     geometrySerde.GeometrySerializer.deserialize(value)
   }
 }
diff --git 
a/spark/common/src/test/scala/org/apache/sedona/sql/KnnJoinSuite.scala 
b/spark/common/src/test/scala/org/apache/sedona/sql/KnnJoinSuite.scala
index f3b07c2501..ab2c64898a 100644
--- a/spark/common/src/test/scala/org/apache/sedona/sql/KnnJoinSuite.scala
+++ b/spark/common/src/test/scala/org/apache/sedona/sql/KnnJoinSuite.scala
@@ -441,6 +441,23 @@ class KnnJoinSuite extends TestBaseScala with 
TableDrivenPropertyChecks {
         resultAll.mkString should be("[0,6][0,7]")
       }
     }
+
+    it("KNN Join with exact algorithms should not fail with null geometries") {
+      val df1 = sparkSession.sql(
+        "SELECT ST_GeomFromText(col1) as geom1 from values ('POINT (0.0 
0.0)'), (null)")
+      val df2 = sparkSession.sql("SELECT ST_Point(0.0, 0.0) as geom2")
+      df1.cache()
+      df2.cache()
+      df1.join(df2, expr("ST_KNN(geom1, geom2, 1)")).count() should be(1)
+    }
+
+    it("KNN Join with exact algorithms should not fail with empty geometries") 
{
+      val df1 = sparkSession.sql("SELECT ST_GeomFromText('POINT EMPTY') as 
geom1")
+      val df2 = sparkSession.sql("SELECT ST_Point(0.0, 0.0) as geom2")
+      df1.cache()
+      df2.cache()
+      df1.join(df2, expr("ST_KNN(geom1, geom2, 1)")).count() should be(0)
+    }
   }
 
   private def withOptimizationMode(mode: String)(body: => Unit): Unit = {

Reply via email to