This is an automated email from the ASF dual-hosted git repository.
jiayu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/sedona.git
The following commit(s) were added to refs/heads/master by this push:
new 38b0fc987d [SEDONA-702] Create IndexedGridPartitioner, Support
preserveUncontain… (#1769)
38b0fc987d is described below
commit 38b0fc987dc0758464f5a48996b9f4089afb6d80
Author: James Willis <[email protected]>
AuthorDate: Fri Jan 24 18:00:12 2025 -0800
[SEDONA-702] Create IndexedGridPartitioner, Support preserveUncontain…
(#1769)
* [SEDONA-702] Create IndexedGridPartitioner, Support
preserveUncontainedGeometries in *GridPartitioners
* add docstrings to the GridPartitioner classes
* remove extra covers call
---------
Co-authored-by: jameswillis <[email protected]>
---
.../spatialPartitioning/EqualPartitioning.java | 19 ++++-
.../spatialPartitioning/FlatGridPartitioner.java | 28 ++++++-
.../IndexedGridPartitioner.java | 96 ++++++++++++++++++++++
.../IndexedGridPartitionerTest.java | 96 ++++++++++++++++++++++
4 files changed, 231 insertions(+), 8 deletions(-)
diff --git
a/spark/common/src/main/java/org/apache/sedona/core/spatialPartitioning/EqualPartitioning.java
b/spark/common/src/main/java/org/apache/sedona/core/spatialPartitioning/EqualPartitioning.java
index 94793fa68a..2d1b95eb75 100644
---
a/spark/common/src/main/java/org/apache/sedona/core/spatialPartitioning/EqualPartitioning.java
+++
b/spark/common/src/main/java/org/apache/sedona/core/spatialPartitioning/EqualPartitioning.java
@@ -37,8 +37,19 @@ public class EqualPartitioning implements Serializable {
/** The grids. */
List<Envelope> grids = new ArrayList<Envelope>();
- public EqualPartitioning(List<Envelope> grids) {
+ /**
+ * Whether to discard geometries that do not intersect any grid. If true,
geometries that are not
+ * contained in a grid are placed into the overflow container.
+ */
+ Boolean preserveUncontainedGeometries;
+
+ public EqualPartitioning(List<Envelope> grids, boolean
preserveUncontainedGeometries) {
this.grids = grids;
+ this.preserveUncontainedGeometries = preserveUncontainedGeometries;
+ }
+
+ public EqualPartitioning(List<Envelope> grids) {
+ this(grids, true);
}
/**
* Instantiates a new equal partitioning.
@@ -100,12 +111,12 @@ public class EqualPartitioning implements Serializable {
if (grid.covers(envelope)) {
result.add(new Tuple2(i, geometry));
containFlag = true;
- } else if (grid.intersects(envelope) || envelope.covers(grid)) {
+ } else if (grid.intersects(envelope)) {
result.add(new Tuple2<>(i, geometry));
}
}
- if (!containFlag) {
+ if (!containFlag && preserveUncontainedGeometries) {
result.add(new Tuple2<>(overflowContainerID, geometry));
}
@@ -133,7 +144,7 @@ public class EqualPartitioning implements Serializable {
}
}
- if (!containFlag) {
+ if (!containFlag && preserveUncontainedGeometries) {
result.add(overflowContainerID);
}
return result;
diff --git
a/spark/common/src/main/java/org/apache/sedona/core/spatialPartitioning/FlatGridPartitioner.java
b/spark/common/src/main/java/org/apache/sedona/core/spatialPartitioning/FlatGridPartitioner.java
index e962a965ee..a50ce43f09 100644
---
a/spark/common/src/main/java/org/apache/sedona/core/spatialPartitioning/FlatGridPartitioner.java
+++
b/spark/common/src/main/java/org/apache/sedona/core/spatialPartitioning/FlatGridPartitioner.java
@@ -27,19 +27,39 @@ import org.locationtech.jts.geom.Envelope;
import org.locationtech.jts.geom.Geometry;
import scala.Tuple2;
+/**
+ * The FlatGridPartitioner is used when there is already a set of grids which
the data should be
+ * partitioned into. It iterates through all the grids to find the grids to
place a geometry into.
+ * Unless you have very few objects to place, it may make more sense to use the
+ * IndexedGridPartitioner. If you do not have a strict requirement to use a
specific set of grids,
+ * it may make more sense to use another partitioner that generates its own
grids from a
+ * space-partitioning tree, e.g. the KDBTreePartitioner or the
QuadTreePartitioner.
+ */
public class FlatGridPartitioner extends SpatialPartitioner {
- public FlatGridPartitioner(GridType gridType, List<Envelope> grids) {
+ protected final Boolean preserveUncontainedGeometries;
+
+ public FlatGridPartitioner(
+ GridType gridType, List<Envelope> grids, Boolean
preserveUncontainedGeometries) {
super(gridType, grids);
+ this.preserveUncontainedGeometries = preserveUncontainedGeometries;
+ }
+
+ public FlatGridPartitioner(GridType gridType, List<Envelope> grids) {
+ this(gridType, grids, true);
+ }
+
+ public FlatGridPartitioner(List<Envelope> grids, Boolean
preserveUncontainedGeometries) {
+ this(null, grids, preserveUncontainedGeometries);
}
// For backwards compatibility (see
SpatialRDD.spatialPartitioning(otherGrids))
public FlatGridPartitioner(List<Envelope> grids) {
- super(null, grids);
+ this(null, grids);
}
@Override
public Iterator<Tuple2<Integer, Geometry>> placeObject(Geometry
spatialObject) throws Exception {
- EqualPartitioning partitioning = new EqualPartitioning(grids);
+ EqualPartitioning partitioning = new EqualPartitioning(grids,
preserveUncontainedGeometries);
return partitioning.placeObject(spatialObject);
}
@@ -61,7 +81,7 @@ public class FlatGridPartitioner extends SpatialPartitioner {
@Override
public int numPartitions() {
- return grids.size() + 1 /* overflow partition */;
+ return grids.size() + (preserveUncontainedGeometries ? 1 : 0);
}
@Override
diff --git
a/spark/common/src/main/java/org/apache/sedona/core/spatialPartitioning/IndexedGridPartitioner.java
b/spark/common/src/main/java/org/apache/sedona/core/spatialPartitioning/IndexedGridPartitioner.java
new file mode 100644
index 0000000000..79073f2320
--- /dev/null
+++
b/spark/common/src/main/java/org/apache/sedona/core/spatialPartitioning/IndexedGridPartitioner.java
@@ -0,0 +1,96 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.sedona.core.spatialPartitioning;
+
+import java.util.*;
+import org.apache.sedona.core.enums.GridType;
+import org.locationtech.jts.geom.Envelope;
+import org.locationtech.jts.geom.Geometry;
+import org.locationtech.jts.index.strtree.STRtree;
+import scala.Tuple2;
+
+/**
+ * The IndexedGridPartitioner is used when there is already a set of grids
which the data should be
+ * partitioned into. It leverages an STRTree to quickly find the grids to
place a geometry into. If
+ * you have very few objects to place, it may make more sense to use the
FlatGridPartitioner. If you
+ * do not have a strict requirement to use a specific set of grids, it may
make more sense to use
+ * another partitioner that generates its own grids from space-partitioning
tree, e.g. the
+ * KDBTreePartitioner or the QuadTreePartitioner.
+ */
+public class IndexedGridPartitioner extends FlatGridPartitioner {
+ private final STRtree index;
+
+ public IndexedGridPartitioner(
+ GridType gridType, List<Envelope> grids, Boolean
preserveUncontainedGeometries) {
+ super(gridType, grids, preserveUncontainedGeometries);
+ this.index = new STRtree();
+ for (int i = 0; i < grids.size(); i++) {
+ final Envelope grid = grids.get(i);
+ index.insert(grid, i);
+ }
+ index.build();
+ }
+
+ public IndexedGridPartitioner(GridType gridType, List<Envelope> grids) {
+ this(gridType, grids, false);
+ }
+
+ public IndexedGridPartitioner(List<Envelope> grids, Boolean
preserveUncontainedGeometries) {
+ this(null, grids, preserveUncontainedGeometries);
+ }
+
+ public IndexedGridPartitioner(List<Envelope> grids) {
+ this(null, grids);
+ }
+
+ public STRtree getIndex() {
+ return index;
+ }
+
+ @Override
+ public Iterator<Tuple2<Integer, Geometry>> placeObject(Geometry
spatialObject) throws Exception {
+ List results = index.query(spatialObject.getEnvelopeInternal());
+ if (preserveUncontainedGeometries) {
+ // borrowed from EqualPartitioning.placeObject
+ final int overflowContainerID = grids.size();
+ final Envelope envelope = spatialObject.getEnvelopeInternal();
+
+ Set<Tuple2<Integer, Geometry>> result = new HashSet();
+ boolean containFlag = false;
+ for (Object queryResult : results) {
+ Integer i = (Integer) queryResult;
+ final Envelope grid = grids.get(i);
+ if (grid.covers(envelope)) {
+ result.add(new Tuple2(i, spatialObject));
+ containFlag = true;
+ } else if (grid.intersects(envelope)) {
+ result.add(new Tuple2<>(i, spatialObject));
+ }
+ }
+
+ if (!containFlag) {
+ result.add(new Tuple2<>(overflowContainerID, spatialObject));
+ }
+
+ return result.iterator();
+ } else {
+ return results.stream().map(i -> new Tuple2(i,
spatialObject)).iterator();
+ }
+ }
+}
diff --git
a/spark/common/src/test/java/org/apache/sedona/core/spatialPartitioning/IndexedGridPartitionerTest.java
b/spark/common/src/test/java/org/apache/sedona/core/spatialPartitioning/IndexedGridPartitionerTest.java
new file mode 100644
index 0000000000..cedd94eadb
--- /dev/null
+++
b/spark/common/src/test/java/org/apache/sedona/core/spatialPartitioning/IndexedGridPartitionerTest.java
@@ -0,0 +1,96 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.sedona.core.spatialPartitioning;
+
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+import junit.framework.TestCase;
+import org.junit.Assert;
+import org.junit.Test;
+import org.locationtech.jts.geom.Coordinate;
+import org.locationtech.jts.geom.Envelope;
+import org.locationtech.jts.geom.Geometry;
+import org.locationtech.jts.geom.GeometryFactory;
+import scala.Tuple2;
+
+public class IndexedGridPartitionerTest extends TestCase {
+
+ private List<Envelope> getGrids() {
+ List<Envelope> grids = new ArrayList<>();
+ grids.add(new Envelope(0, 50, 0, 50));
+ grids.add(new Envelope(50, 100, 0, 50));
+ grids.add(new Envelope(0, 50, 50, 100));
+ grids.add(new Envelope(50, 100, 50, 100));
+ return grids;
+ }
+
+ private IndexedGridPartitioner getPartitioner(Boolean
preserveUncontainedGeometries) {
+ return new IndexedGridPartitioner(getGrids(),
preserveUncontainedGeometries);
+ }
+
+ public void testPlaceObjectPreserveContainedGeometries() throws Exception {
+ IndexedGridPartitioner partitioner = getPartitioner(true);
+ GeometryFactory geometryFactory = new GeometryFactory();
+ Geometry spatialObject = geometryFactory.createPoint(new Coordinate(25,
25));
+ Iterator<Tuple2<Integer, Geometry>> result =
partitioner.placeObject(spatialObject);
+
+ List<Tuple2<Integer, Geometry>> resultList = new ArrayList<>();
+ result.forEachRemaining(resultList::add);
+
+ Assert.assertFalse(resultList.isEmpty());
+ Assert.assertEquals(1, resultList.size());
+ Assert.assertEquals(0, (int) resultList.get(0)._1());
+ }
+
+ public void testPlaceObjectDoesntPreserveUncontainedGeometries() throws
Exception {
+ IndexedGridPartitioner partitioner = getPartitioner(false);
+ GeometryFactory geometryFactory = new GeometryFactory();
+ Geometry spatialObject = geometryFactory.createPoint(new Coordinate(-25,
-25));
+ Iterator<Tuple2<Integer, Geometry>> result =
partitioner.placeObject(spatialObject);
+ Assert.assertFalse(result.hasNext());
+ }
+
+ @Test
+ public void testGetGrids() {
+ IndexedGridPartitioner partitioner = getPartitioner(true);
+ Assert.assertEquals(getGrids(), partitioner.getGrids());
+ }
+
+ @Test
+ public void testNumPartitions() {
+ IndexedGridPartitioner partitioner = getPartitioner(true);
+ Assert.assertEquals(5, partitioner.numPartitions());
+
+ partitioner = getPartitioner(false);
+ Assert.assertEquals(4, partitioner.numPartitions());
+ }
+
+ @Test
+ public void testEquals() {
+ IndexedGridPartitioner partitioner = getPartitioner(true);
+ List<Envelope> grids = new ArrayList<>();
+ grids.add(new Envelope(0, 50, 0, 50));
+ grids.add(new Envelope(50, 100, 0, 50));
+ grids.add(new Envelope(0, 50, 50, 100));
+ grids.add(new Envelope(50, 100, 50, 100));
+ IndexedGridPartitioner otherPartitioner = new
IndexedGridPartitioner(grids, true);
+ Assert.assertTrue(partitioner.equals(otherPartitioner));
+ }
+}