This is an automated email from the ASF dual-hosted git repository.

jiayu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/sedona.git


The following commit(s) were added to refs/heads/master by this push:
     new 38b0fc987d [SEDONA-702] Create IndexedGridPartitioner, Support 
preserveUncontain… (#1769)
38b0fc987d is described below

commit 38b0fc987dc0758464f5a48996b9f4089afb6d80
Author: James Willis <[email protected]>
AuthorDate: Fri Jan 24 18:00:12 2025 -0800

    [SEDONA-702] Create IndexedGridPartitioner, Support preserveUncontain… 
(#1769)
    
    * [SEDONA-702] Create IndexedGridPartitioner, Support 
preserveUncontainedGeometries in *GridPartitioners
    
    * add docstrings to the GridPartitioner classes
    
    * remove extra covers call
    
    ---------
    
    Co-authored-by: jameswillis <[email protected]>
---
 .../spatialPartitioning/EqualPartitioning.java     | 19 ++++-
 .../spatialPartitioning/FlatGridPartitioner.java   | 28 ++++++-
 .../IndexedGridPartitioner.java                    | 96 ++++++++++++++++++++++
 .../IndexedGridPartitionerTest.java                | 96 ++++++++++++++++++++++
 4 files changed, 231 insertions(+), 8 deletions(-)

diff --git 
a/spark/common/src/main/java/org/apache/sedona/core/spatialPartitioning/EqualPartitioning.java
 
b/spark/common/src/main/java/org/apache/sedona/core/spatialPartitioning/EqualPartitioning.java
index 94793fa68a..2d1b95eb75 100644
--- 
a/spark/common/src/main/java/org/apache/sedona/core/spatialPartitioning/EqualPartitioning.java
+++ 
b/spark/common/src/main/java/org/apache/sedona/core/spatialPartitioning/EqualPartitioning.java
@@ -37,8 +37,19 @@ public class EqualPartitioning implements Serializable {
   /** The grids. */
   List<Envelope> grids = new ArrayList<Envelope>();
 
-  public EqualPartitioning(List<Envelope> grids) {
+  /**
+   * Whether to discard geometries that do not intersect any grid. If true, 
geometries that are not
+   * contained in a grid are placed into the overflow container.
+   */
+  Boolean preserveUncontainedGeometries;
+
+  public EqualPartitioning(List<Envelope> grids, boolean 
preserveUncontainedGeometries) {
     this.grids = grids;
+    this.preserveUncontainedGeometries = preserveUncontainedGeometries;
+  }
+
+  public EqualPartitioning(List<Envelope> grids) {
+    this(grids, true);
   }
   /**
    * Instantiates a new equal partitioning.
@@ -100,12 +111,12 @@ public class EqualPartitioning implements Serializable {
       if (grid.covers(envelope)) {
         result.add(new Tuple2(i, geometry));
         containFlag = true;
-      } else if (grid.intersects(envelope) || envelope.covers(grid)) {
+      } else if (grid.intersects(envelope)) {
         result.add(new Tuple2<>(i, geometry));
       }
     }
 
-    if (!containFlag) {
+    if (!containFlag && preserveUncontainedGeometries) {
       result.add(new Tuple2<>(overflowContainerID, geometry));
     }
 
@@ -133,7 +144,7 @@ public class EqualPartitioning implements Serializable {
       }
     }
 
-    if (!containFlag) {
+    if (!containFlag && preserveUncontainedGeometries) {
       result.add(overflowContainerID);
     }
     return result;
diff --git 
a/spark/common/src/main/java/org/apache/sedona/core/spatialPartitioning/FlatGridPartitioner.java
 
b/spark/common/src/main/java/org/apache/sedona/core/spatialPartitioning/FlatGridPartitioner.java
index e962a965ee..a50ce43f09 100644
--- 
a/spark/common/src/main/java/org/apache/sedona/core/spatialPartitioning/FlatGridPartitioner.java
+++ 
b/spark/common/src/main/java/org/apache/sedona/core/spatialPartitioning/FlatGridPartitioner.java
@@ -27,19 +27,39 @@ import org.locationtech.jts.geom.Envelope;
 import org.locationtech.jts.geom.Geometry;
 import scala.Tuple2;
 
+/**
+ * The FlatGridPartitioner is used when there is already a set of grids which 
the data should be
+ * partitioned into. It iterates through all the grids to find the grids to 
place a geometry into.
+ * Unless you have very few objects to place, it may make more sense to use the
+ * IndexedGridPartitioner. If you do not have a strict requirement to use a 
specific set of grids,
+ * it may make more sense to use another partitioner that generates its own 
grids from a
+ * space-partitioning tree, e.g. the KDBTreePartitioner or the 
QuadTreePartitioner.
+ */
 public class FlatGridPartitioner extends SpatialPartitioner {
-  public FlatGridPartitioner(GridType gridType, List<Envelope> grids) {
+  protected final Boolean preserveUncontainedGeometries;
+
+  public FlatGridPartitioner(
+      GridType gridType, List<Envelope> grids, Boolean 
preserveUncontainedGeometries) {
     super(gridType, grids);
+    this.preserveUncontainedGeometries = preserveUncontainedGeometries;
+  }
+
+  public FlatGridPartitioner(GridType gridType, List<Envelope> grids) {
+    this(gridType, grids, true);
+  }
+
+  public FlatGridPartitioner(List<Envelope> grids, Boolean 
preserveUncontainedGeometries) {
+    this(null, grids, preserveUncontainedGeometries);
   }
 
   // For backwards compatibility (see 
SpatialRDD.spatialPartitioning(otherGrids))
   public FlatGridPartitioner(List<Envelope> grids) {
-    super(null, grids);
+    this(null, grids);
   }
 
   @Override
   public Iterator<Tuple2<Integer, Geometry>> placeObject(Geometry 
spatialObject) throws Exception {
-    EqualPartitioning partitioning = new EqualPartitioning(grids);
+    EqualPartitioning partitioning = new EqualPartitioning(grids, 
preserveUncontainedGeometries);
     return partitioning.placeObject(spatialObject);
   }
 
@@ -61,7 +81,7 @@ public class FlatGridPartitioner extends SpatialPartitioner {
 
   @Override
   public int numPartitions() {
-    return grids.size() + 1 /* overflow partition */;
+    return grids.size() + (preserveUncontainedGeometries ? 1 : 0);
   }
 
   @Override
diff --git 
a/spark/common/src/main/java/org/apache/sedona/core/spatialPartitioning/IndexedGridPartitioner.java
 
b/spark/common/src/main/java/org/apache/sedona/core/spatialPartitioning/IndexedGridPartitioner.java
new file mode 100644
index 0000000000..79073f2320
--- /dev/null
+++ 
b/spark/common/src/main/java/org/apache/sedona/core/spatialPartitioning/IndexedGridPartitioner.java
@@ -0,0 +1,96 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.sedona.core.spatialPartitioning;
+
+import java.util.*;
+import org.apache.sedona.core.enums.GridType;
+import org.locationtech.jts.geom.Envelope;
+import org.locationtech.jts.geom.Geometry;
+import org.locationtech.jts.index.strtree.STRtree;
+import scala.Tuple2;
+
+/**
+ * The IndexedGridPartitioner is used when there is already a set of grids 
which the data should be
+ * partitioned into. It leverages an STRTree to quickly find the grids to 
place a geometry into. If
+ * you have very few objects to place, it may make more sense to use the 
FlatGridPartitioner. If you
+ * do not have a strict requirement to use a specific set of grids, it may 
make more sense to use
+ * another partitioner that generates its own grids from space-partitioning 
tree, e.g. the
+ * KDBTreePartitioner or the QuadTreePartitioner.
+ */
+public class IndexedGridPartitioner extends FlatGridPartitioner {
+  private final STRtree index;
+
+  public IndexedGridPartitioner(
+      GridType gridType, List<Envelope> grids, Boolean 
preserveUncontainedGeometries) {
+    super(gridType, grids, preserveUncontainedGeometries);
+    this.index = new STRtree();
+    for (int i = 0; i < grids.size(); i++) {
+      final Envelope grid = grids.get(i);
+      index.insert(grid, i);
+    }
+    index.build();
+  }
+
+  public IndexedGridPartitioner(GridType gridType, List<Envelope> grids) {
+    this(gridType, grids, false);
+  }
+
+  public IndexedGridPartitioner(List<Envelope> grids, Boolean 
preserveUncontainedGeometries) {
+    this(null, grids, preserveUncontainedGeometries);
+  }
+
+  public IndexedGridPartitioner(List<Envelope> grids) {
+    this(null, grids);
+  }
+
+  public STRtree getIndex() {
+    return index;
+  }
+
+  @Override
+  public Iterator<Tuple2<Integer, Geometry>> placeObject(Geometry 
spatialObject) throws Exception {
+    List results = index.query(spatialObject.getEnvelopeInternal());
+    if (preserveUncontainedGeometries) {
+      // borrowed from EqualPartitioning.placeObject
+      final int overflowContainerID = grids.size();
+      final Envelope envelope = spatialObject.getEnvelopeInternal();
+
+      Set<Tuple2<Integer, Geometry>> result = new HashSet();
+      boolean containFlag = false;
+      for (Object queryResult : results) {
+        Integer i = (Integer) queryResult;
+        final Envelope grid = grids.get(i);
+        if (grid.covers(envelope)) {
+          result.add(new Tuple2(i, spatialObject));
+          containFlag = true;
+        } else if (grid.intersects(envelope)) {
+          result.add(new Tuple2<>(i, spatialObject));
+        }
+      }
+
+      if (!containFlag) {
+        result.add(new Tuple2<>(overflowContainerID, spatialObject));
+      }
+
+      return result.iterator();
+    } else {
+      return results.stream().map(i -> new Tuple2(i, 
spatialObject)).iterator();
+    }
+  }
+}
diff --git 
a/spark/common/src/test/java/org/apache/sedona/core/spatialPartitioning/IndexedGridPartitionerTest.java
 
b/spark/common/src/test/java/org/apache/sedona/core/spatialPartitioning/IndexedGridPartitionerTest.java
new file mode 100644
index 0000000000..cedd94eadb
--- /dev/null
+++ 
b/spark/common/src/test/java/org/apache/sedona/core/spatialPartitioning/IndexedGridPartitionerTest.java
@@ -0,0 +1,96 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.sedona.core.spatialPartitioning;
+
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+import junit.framework.TestCase;
+import org.junit.Assert;
+import org.junit.Test;
+import org.locationtech.jts.geom.Coordinate;
+import org.locationtech.jts.geom.Envelope;
+import org.locationtech.jts.geom.Geometry;
+import org.locationtech.jts.geom.GeometryFactory;
+import scala.Tuple2;
+
+public class IndexedGridPartitionerTest extends TestCase {
+
+  private List<Envelope> getGrids() {
+    List<Envelope> grids = new ArrayList<>();
+    grids.add(new Envelope(0, 50, 0, 50));
+    grids.add(new Envelope(50, 100, 0, 50));
+    grids.add(new Envelope(0, 50, 50, 100));
+    grids.add(new Envelope(50, 100, 50, 100));
+    return grids;
+  }
+
+  private IndexedGridPartitioner getPartitioner(Boolean 
preserveUncontainedGeometries) {
+    return new IndexedGridPartitioner(getGrids(), 
preserveUncontainedGeometries);
+  }
+
+  public void testPlaceObjectPreserveContainedGeometries() throws Exception {
+    IndexedGridPartitioner partitioner = getPartitioner(true);
+    GeometryFactory geometryFactory = new GeometryFactory();
+    Geometry spatialObject = geometryFactory.createPoint(new Coordinate(25, 
25));
+    Iterator<Tuple2<Integer, Geometry>> result = 
partitioner.placeObject(spatialObject);
+
+    List<Tuple2<Integer, Geometry>> resultList = new ArrayList<>();
+    result.forEachRemaining(resultList::add);
+
+    Assert.assertFalse(resultList.isEmpty());
+    Assert.assertEquals(1, resultList.size());
+    Assert.assertEquals(0, (int) resultList.get(0)._1());
+  }
+
+  public void testPlaceObjectDoesntPreserveUncontainedGeometries() throws 
Exception {
+    IndexedGridPartitioner partitioner = getPartitioner(false);
+    GeometryFactory geometryFactory = new GeometryFactory();
+    Geometry spatialObject = geometryFactory.createPoint(new Coordinate(-25, 
-25));
+    Iterator<Tuple2<Integer, Geometry>> result = 
partitioner.placeObject(spatialObject);
+    Assert.assertFalse(result.hasNext());
+  }
+
+  @Test
+  public void testGetGrids() {
+    IndexedGridPartitioner partitioner = getPartitioner(true);
+    Assert.assertEquals(getGrids(), partitioner.getGrids());
+  }
+
+  @Test
+  public void testNumPartitions() {
+    IndexedGridPartitioner partitioner = getPartitioner(true);
+    Assert.assertEquals(5, partitioner.numPartitions());
+
+    partitioner = getPartitioner(false);
+    Assert.assertEquals(4, partitioner.numPartitions());
+  }
+
+  @Test
+  public void testEquals() {
+    IndexedGridPartitioner partitioner = getPartitioner(true);
+    List<Envelope> grids = new ArrayList<>();
+    grids.add(new Envelope(0, 50, 0, 50));
+    grids.add(new Envelope(50, 100, 0, 50));
+    grids.add(new Envelope(0, 50, 50, 100));
+    grids.add(new Envelope(50, 100, 50, 100));
+    IndexedGridPartitioner otherPartitioner = new 
IndexedGridPartitioner(grids, true);
+    Assert.assertTrue(partitioner.equals(otherPartitioner));
+  }
+}

Reply via email to