This is an automated email from the ASF dual-hosted git repository.

imbruced pushed a commit to branch 
fix-reading-geoparquet-when-filtering-and-bbox-is-empty
in repository https://gitbox.apache.org/repos/asf/sedona.git

commit 8cf69352780c7255308c2139d733792913def652
Author: pawelkocinski <[email protected]>
AuthorDate: Thu Nov 14 13:01:29 2024 +0100

    Fix issue when loading geoparquet file.
---
 .../datasources/parquet/GeoParquetSpatialFilter.scala   |   4 ++++
 .../test/resources/geoparquet/overture/bbox.geoparquet  | Bin 0 -> 24496 bytes
 .../scala/org/apache/sedona/sql/geoparquetIOTests.scala |  14 +++++++++++++-
 3 files changed, 17 insertions(+), 1 deletion(-)

diff --git 
a/spark/common/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/GeoParquetSpatialFilter.scala
 
b/spark/common/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/GeoParquetSpatialFilter.scala
index 5aa782e5bd..ca932c6b34 100644
--- 
a/spark/common/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/GeoParquetSpatialFilter.scala
+++ 
b/spark/common/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/GeoParquetSpatialFilter.scala
@@ -69,6 +69,10 @@ object GeoParquetSpatialFilter {
     def evaluate(columns: Map[String, GeometryFieldMetaData]): Boolean = {
       columns.get(columnName).forall { column =>
         val bbox = column.bbox
+        if (bbox.isEmpty) {
+          return true
+        }
+
         val columnEnvelope =
           queryWindow.getFactory.toGeometry(new Envelope(bbox(0), bbox(2), 
bbox(1), bbox(3)))
         predicateType match {
diff --git 
a/spark/common/src/test/resources/geoparquet/overture/bbox.geoparquet 
b/spark/common/src/test/resources/geoparquet/overture/bbox.geoparquet
new file mode 100644
index 0000000000..b5393d3309
Binary files /dev/null and 
b/spark/common/src/test/resources/geoparquet/overture/bbox.geoparquet differ
diff --git 
a/spark/spark-3.5/src/test/scala/org/apache/sedona/sql/geoparquetIOTests.scala 
b/spark/spark-3.5/src/test/scala/org/apache/sedona/sql/geoparquetIOTests.scala
index a6e74730a0..c1d49586d8 100644
--- 
a/spark/spark-3.5/src/test/scala/org/apache/sedona/sql/geoparquetIOTests.scala
+++ 
b/spark/spark-3.5/src/test/scala/org/apache/sedona/sql/geoparquetIOTests.scala
@@ -57,7 +57,7 @@ class geoparquetIOTests extends TestBaseScala with 
BeforeAndAfterAll {
   val legacyparquetdatalocation: String =
     resourceFolder + "parquet/legacy-parquet-nested-columns.snappy.parquet"
   val geoparquetoutputlocation: String = resourceFolder + 
"geoparquet/geoparquet_output/"
-
+  val overtureBBOX: String = resourceFolder + 
"geoparquet/overture/bbox.geoparquet"
   override def afterAll(): Unit = FileUtils.deleteDirectory(new 
File(geoparquetoutputlocation))
 
   describe("GeoParquet IO tests") {
@@ -761,6 +761,18 @@ class geoparquetIOTests extends TestBaseScala with 
BeforeAndAfterAll {
     }
   }
 
+  describe("loading one file geoparquet and filtering") {
+    it("filtering one file geoparquet") {
+      val numberOfRecords = sparkSession.read
+        .format("geoparquet")
+        .load(overtureBBOX)
+        .where("ST_Intersects(geometry, ST_PolygonFromEnvelope(0, 0, 1, 1))")
+        .count()
+
+      assert(numberOfRecords == 9)
+    }
+  }
+
   def validateGeoParquetMetadata(path: String)(body: org.json4s.JValue => 
Unit): Unit = {
     val parquetFiles = new 
File(path).listFiles().filter(_.getName.endsWith(".parquet"))
     parquetFiles.foreach { filePath =>

Reply via email to