This is an automated email from the ASF dual-hosted git repository.

jiayu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/sedona.git


The following commit(s) were added to refs/heads/master by this push:
     new 59aa504c48 [SEDONA-673] Fix issue when loading geoparquet file without 
bbox metadata. (#1681)
59aa504c48 is described below

commit 59aa504c48c5879ab138bbbc9c13cdaea00f9443
Author: PaweÅ‚ Tokaj <[email protected]>
AuthorDate: Fri Nov 15 19:51:23 2024 +0100

    [SEDONA-673] Fix issue when loading geoparquet file without bbox metadata. 
(#1681)
    
    * Fix issue when loading geoparquet file.
    
    * Fix issue when loading geoparquet file.
---
 .../datasources/parquet/GeoParquetSpatialFilter.scala   |   4 ++++
 .../test/resources/geoparquet/overture/bbox.geoparquet  | Bin 0 -> 24496 bytes
 .../scala/org/apache/sedona/sql/geoparquetIOTests.scala |  13 +++++++++++++
 .../scala/org/apache/sedona/sql/geoparquetIOTests.scala |  13 +++++++++++++
 .../scala/org/apache/sedona/sql/geoparquetIOTests.scala |  14 +++++++++++++-
 5 files changed, 43 insertions(+), 1 deletion(-)

diff --git 
a/spark/common/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/GeoParquetSpatialFilter.scala
 
b/spark/common/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/GeoParquetSpatialFilter.scala
index 5aa782e5bd..ca932c6b34 100644
--- 
a/spark/common/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/GeoParquetSpatialFilter.scala
+++ 
b/spark/common/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/GeoParquetSpatialFilter.scala
@@ -69,6 +69,10 @@ object GeoParquetSpatialFilter {
     def evaluate(columns: Map[String, GeometryFieldMetaData]): Boolean = {
       columns.get(columnName).forall { column =>
         val bbox = column.bbox
+        if (bbox.isEmpty) {
+          return true
+        }
+
         val columnEnvelope =
           queryWindow.getFactory.toGeometry(new Envelope(bbox(0), bbox(2), 
bbox(1), bbox(3)))
         predicateType match {
diff --git 
a/spark/common/src/test/resources/geoparquet/overture/bbox.geoparquet 
b/spark/common/src/test/resources/geoparquet/overture/bbox.geoparquet
new file mode 100644
index 0000000000..b5393d3309
Binary files /dev/null and 
b/spark/common/src/test/resources/geoparquet/overture/bbox.geoparquet differ
diff --git 
a/spark/spark-3.3/src/test/scala/org/apache/sedona/sql/geoparquetIOTests.scala 
b/spark/spark-3.3/src/test/scala/org/apache/sedona/sql/geoparquetIOTests.scala
index ccfd560c84..86549bf71d 100644
--- 
a/spark/spark-3.3/src/test/scala/org/apache/sedona/sql/geoparquetIOTests.scala
+++ 
b/spark/spark-3.3/src/test/scala/org/apache/sedona/sql/geoparquetIOTests.scala
@@ -54,6 +54,7 @@ class geoparquetIOTests extends TestBaseScala with 
BeforeAndAfterAll {
   val legacyparquetdatalocation: String =
     resourceFolder + "parquet/legacy-parquet-nested-columns.snappy.parquet"
   val geoparquetoutputlocation: String = resourceFolder + 
"geoparquet/geoparquet_output/"
+  val overtureBBOX: String = resourceFolder + 
"geoparquet/overture/bbox.geoparquet"
 
   override def afterAll(): Unit = FileUtils.deleteDirectory(new 
File(geoparquetoutputlocation))
 
@@ -732,6 +733,18 @@ class geoparquetIOTests extends TestBaseScala with 
BeforeAndAfterAll {
     }
   }
 
+  describe("loading one file geoparquet and filtering") {
+    it("should not fail when bbox is not available in geoparquet metadata") {
+      val numberOfRecords = sparkSession.read
+        .format("geoparquet")
+        .load(overtureBBOX)
+        .where("ST_Intersects(geometry, ST_PolygonFromEnvelope(0, 0, 1, 1))")
+        .count()
+
+      assert(numberOfRecords == 9)
+    }
+  }
+
   def validateGeoParquetMetadata(path: String)(body: org.json4s.JValue => 
Unit): Unit = {
     val parquetFiles = new 
File(path).listFiles().filter(_.getName.endsWith(".parquet"))
     parquetFiles.foreach { filePath =>
diff --git 
a/spark/spark-3.4/src/test/scala/org/apache/sedona/sql/geoparquetIOTests.scala 
b/spark/spark-3.4/src/test/scala/org/apache/sedona/sql/geoparquetIOTests.scala
index f5bd8b486e..274394f3bb 100644
--- 
a/spark/spark-3.4/src/test/scala/org/apache/sedona/sql/geoparquetIOTests.scala
+++ 
b/spark/spark-3.4/src/test/scala/org/apache/sedona/sql/geoparquetIOTests.scala
@@ -54,6 +54,7 @@ class geoparquetIOTests extends TestBaseScala with 
BeforeAndAfterAll {
   val legacyparquetdatalocation: String =
     resourceFolder + "parquet/legacy-parquet-nested-columns.snappy.parquet"
   val geoparquetoutputlocation: String = resourceFolder + 
"geoparquet/geoparquet_output/"
+  val overtureBBOX: String = resourceFolder + 
"geoparquet/overture/bbox.geoparquet"
 
   override def afterAll(): Unit = FileUtils.deleteDirectory(new 
File(geoparquetoutputlocation))
 
@@ -758,6 +759,18 @@ class geoparquetIOTests extends TestBaseScala with 
BeforeAndAfterAll {
     }
   }
 
+  describe("loading one file geoparquet and filtering") {
+    it("should not fail when bbox is not available in geoparquet metadata") {
+      val numberOfRecords = sparkSession.read
+        .format("geoparquet")
+        .load(overtureBBOX)
+        .where("ST_Intersects(geometry, ST_PolygonFromEnvelope(0, 0, 1, 1))")
+        .count()
+
+      assert(numberOfRecords == 9)
+    }
+  }
+
   def validateGeoParquetMetadata(path: String)(body: org.json4s.JValue => 
Unit): Unit = {
     val parquetFiles = new 
File(path).listFiles().filter(_.getName.endsWith(".parquet"))
     parquetFiles.foreach { filePath =>
diff --git 
a/spark/spark-3.5/src/test/scala/org/apache/sedona/sql/geoparquetIOTests.scala 
b/spark/spark-3.5/src/test/scala/org/apache/sedona/sql/geoparquetIOTests.scala
index a6e74730a0..beca265641 100644
--- 
a/spark/spark-3.5/src/test/scala/org/apache/sedona/sql/geoparquetIOTests.scala
+++ 
b/spark/spark-3.5/src/test/scala/org/apache/sedona/sql/geoparquetIOTests.scala
@@ -57,7 +57,7 @@ class geoparquetIOTests extends TestBaseScala with 
BeforeAndAfterAll {
   val legacyparquetdatalocation: String =
     resourceFolder + "parquet/legacy-parquet-nested-columns.snappy.parquet"
   val geoparquetoutputlocation: String = resourceFolder + 
"geoparquet/geoparquet_output/"
-
+  val overtureBBOX: String = resourceFolder + 
"geoparquet/overture/bbox.geoparquet"
   override def afterAll(): Unit = FileUtils.deleteDirectory(new 
File(geoparquetoutputlocation))
 
   describe("GeoParquet IO tests") {
@@ -761,6 +761,18 @@ class geoparquetIOTests extends TestBaseScala with 
BeforeAndAfterAll {
     }
   }
 
+  describe("loading one file geoparquet and filtering") {
+    it("should not fail when bbox is not available in geoparquet metadata") {
+      val numberOfRecords = sparkSession.read
+        .format("geoparquet")
+        .load(overtureBBOX)
+        .where("ST_Intersects(geometry, ST_PolygonFromEnvelope(0, 0, 1, 1))")
+        .count()
+
+      assert(numberOfRecords == 9)
+    }
+  }
+
   def validateGeoParquetMetadata(path: String)(body: org.json4s.JValue => 
Unit): Unit = {
     val parquetFiles = new 
File(path).listFiles().filter(_.getName.endsWith(".parquet"))
     parquetFiles.foreach { filePath =>

Reply via email to