paleolimbot commented on code in PR #3222:
URL: https://github.com/apache/parquet-java/pull/3222#discussion_r2101024641
##########
parquet-column/src/main/java/org/apache/parquet/column/statistics/geospatial/BoundingBox.java:
##########
@@ -272,12 +298,33 @@ public void update(Geometry geometry) {
* - X bounds are only updated if both minX and maxX are not NaN
* - Y bounds are only updated if both minY and maxY are not NaN
*
- * This allows partial updates while preserving valid dimensions.
+ * Note: JTS (Java Topology Suite) does not natively support wraparound
envelopes
+ * or geometries that cross the antimeridian (±180° longitude). It operates
strictly
+ * in a 2D Cartesian coordinate space and doesn't account for the Earth's
spherical
+ * nature or longitudinal wrapping.
+ *
+ * When JTS encounters a geometry that crosses the antimeridian, it will
represent
+ * it with an envelope spanning from the westernmost to easternmost points,
often
+ * covering most of the Earth's longitude range (e.g., minX=-180, maxX=180).
+ *
+ * The wraparound check below is defensive but should never be triggered
with standard
+ * JTS geometry operations, as JTS will never produce an envelope with minX
> maxX.
+ *
+ * @throws ShouldNeverHappenException if the update creates an X wraparound
condition
*/
private void updateBounds(double minX, double maxX, double minY, double
maxY) {
if (!Double.isNaN(minX) && !Double.isNaN(maxX)) {
- xMin = Math.min(xMin, minX);
- xMax = Math.max(xMax, maxX);
+ double newXMin = Math.min(xMin, minX);
+ double newXMax = Math.max(xMax, maxX);
+
+ // Check if the update would create a wraparound condition
+ // This should never happen with standard JTS geometry operations
+ if (isWraparound(newXMin, newXMax)) {
+ throw new ShouldNeverHappenException("Wraparound X is not supported by
BoundingBox.update()");
+ }
Review Comment:
Would this check be more clear like:
```java
if (isWraparound(minX, maxX) || isWraparound(xMin, xMax)) {
SomeAppropriateException("Wraparound bounding boxes are not yet supported")
}
```
(i.e., you can't update bounds with wraparound bounds, nor can you update
wraparound bounds with anything?)
##########
parquet-column/src/main/java/org/apache/parquet/column/statistics/geospatial/BoundingBox.java:
##########
@@ -302,6 +349,16 @@ public void reset() {
valid = true;
}
+ /**
+ * The Parquet specification allows X bounds to be "wraparound" to allow for
+ * more compact bounding boxes when a geometry happens to include components
+ * on both sides of the antimeridian (e.g., the nation of Fiji). This
function
+ * checks for that case (see GeoStatistics::lower_bound/upper_bound for more
details).
Review Comment:
```suggestion
* checks for that case.
```
##########
parquet-column/src/test/java/org/apache/parquet/column/statistics/geospatial/TestBoundingBox.java:
##########
@@ -681,4 +681,103 @@ public void testMergingRowGroupBoundingBoxes() {
Assert.assertEquals(900.0, reverseMergeBox.getYMax(), 0.0);
Assert.assertTrue(reverseMergeBox.isValid());
}
+
+ @Test
+ public void testIsXValidAndIsYValid() {
+ // Test with valid X and Y
+ BoundingBox validBox = new BoundingBox(1, 2, 3, 4, 5, 6, 7, 8);
+ Assert.assertTrue(validBox.isXValid());
+ Assert.assertTrue(validBox.isYValid());
+
+ // Test with invalid X (NaN)
+ BoundingBox invalidXBox = new BoundingBox(Double.NaN, 2, 3, 4, 5, 6, 7, 8);
+ Assert.assertFalse(invalidXBox.isXValid());
+ Assert.assertTrue(invalidXBox.isYValid());
+
+ // Test with invalid Y (NaN)
+ BoundingBox invalidYBox = new BoundingBox(1, 2, Double.NaN, 4, 5, 6, 7, 8);
+ Assert.assertTrue(invalidYBox.isXValid());
+ Assert.assertFalse(invalidYBox.isYValid());
+
+ // Test with both X and Y invalid
+ BoundingBox invalidXYBox = new BoundingBox(Double.NaN, Double.NaN,
Double.NaN, Double.NaN, 5, 6, 7, 8);
+ Assert.assertFalse(invalidXYBox.isXValid());
+ Assert.assertFalse(invalidXYBox.isYValid());
+ Assert.assertFalse(invalidXYBox.isXYValid());
+ }
+
+ @Test
+ public void testIsXEmptyAndIsYEmpty() {
+ // Empty bounding box (initial state)
+ BoundingBox emptyBox = new BoundingBox();
+ Assert.assertTrue(emptyBox.isXEmpty());
+ Assert.assertTrue(emptyBox.isYEmpty());
+ Assert.assertTrue(emptyBox.isXYEmpty());
+
+ // Non-empty box
+ BoundingBox nonEmptyBox = new BoundingBox(1, 2, 3, 4, 5, 6, 7, 8);
+ Assert.assertFalse(nonEmptyBox.isXEmpty());
+ Assert.assertFalse(nonEmptyBox.isYEmpty());
+ Assert.assertFalse(nonEmptyBox.isXYEmpty());
+
+ // Box with empty X dimension only
+ GeometryFactory gf = new GeometryFactory();
+ BoundingBox emptyXBox = new BoundingBox();
+ // Only update Y dimension
+ emptyXBox.update(gf.createPoint(new Coordinate(Double.NaN, 5)));
+ Assert.assertTrue(emptyXBox.isXEmpty());
+ Assert.assertFalse(emptyXBox.isYEmpty());
+ Assert.assertTrue(emptyXBox.isXYEmpty());
+
+ // Box with empty Y dimension only
+ BoundingBox emptyYBox = new BoundingBox();
+ // Only update X dimension
+ emptyYBox.update(gf.createPoint(new Coordinate(10, Double.NaN)));
+ Assert.assertFalse(emptyYBox.isXEmpty());
+ Assert.assertTrue(emptyYBox.isYEmpty());
+ Assert.assertTrue(emptyYBox.isXYEmpty());
+ }
+
+ @Test
+ public void testIsXWraparound() {
+ // Normal bounding box (no wraparound)
+ BoundingBox normalBox = new BoundingBox(1, 2, 3, 4, 5, 6, 7, 8);
+ Assert.assertFalse(normalBox.isXWraparound());
+
+ // Wraparound box (xMin > xMax)
+ BoundingBox wraparoundBox = new BoundingBox(170, 20, 10, 20, 0, 0, 0, 0);
+ Assert.assertTrue(wraparoundBox.isXWraparound());
+
+ // Edge case: equal bounds
+ BoundingBox equalBoundsBox = new BoundingBox(10, 10, 20, 20, 0, 0, 0, 0);
+ Assert.assertFalse(equalBoundsBox.isXWraparound());
+
+ // Test static method directly
+ Assert.assertTrue(BoundingBox.isWraparound(180, -180));
+ Assert.assertFalse(BoundingBox.isWraparound(-180, 180));
Review Comment:
Should you check `Infinity, -Infinity` or the empty BoundingBox?
##########
parquet-column/src/test/java/org/apache/parquet/column/statistics/geospatial/TestBoundingBox.java:
##########
@@ -681,4 +681,103 @@ public void testMergingRowGroupBoundingBoxes() {
Assert.assertEquals(900.0, reverseMergeBox.getYMax(), 0.0);
Assert.assertTrue(reverseMergeBox.isValid());
}
+
+ @Test
+ public void testIsXValidAndIsYValid() {
+ // Test with valid X and Y
+ BoundingBox validBox = new BoundingBox(1, 2, 3, 4, 5, 6, 7, 8);
+ Assert.assertTrue(validBox.isXValid());
+ Assert.assertTrue(validBox.isYValid());
Review Comment:
Is there also an `isZValid()` and/or `isMValid()`? Should `isXYValid()` be
tested here?
##########
parquet-column/src/test/java/org/apache/parquet/column/statistics/geospatial/TestBoundingBox.java:
##########
@@ -681,4 +681,103 @@ public void testMergingRowGroupBoundingBoxes() {
Assert.assertEquals(900.0, reverseMergeBox.getYMax(), 0.0);
Assert.assertTrue(reverseMergeBox.isValid());
}
+
+ @Test
+ public void testIsXValidAndIsYValid() {
+ // Test with valid X and Y
+ BoundingBox validBox = new BoundingBox(1, 2, 3, 4, 5, 6, 7, 8);
+ Assert.assertTrue(validBox.isXValid());
+ Assert.assertTrue(validBox.isYValid());
+
+ // Test with invalid X (NaN)
+ BoundingBox invalidXBox = new BoundingBox(Double.NaN, 2, 3, 4, 5, 6, 7, 8);
+ Assert.assertFalse(invalidXBox.isXValid());
+ Assert.assertTrue(invalidXBox.isYValid());
+
+ // Test with invalid Y (NaN)
+ BoundingBox invalidYBox = new BoundingBox(1, 2, Double.NaN, 4, 5, 6, 7, 8);
+ Assert.assertTrue(invalidYBox.isXValid());
+ Assert.assertFalse(invalidYBox.isYValid());
+
+ // Test with both X and Y invalid
+ BoundingBox invalidXYBox = new BoundingBox(Double.NaN, Double.NaN,
Double.NaN, Double.NaN, 5, 6, 7, 8);
+ Assert.assertFalse(invalidXYBox.isXValid());
+ Assert.assertFalse(invalidXYBox.isYValid());
+ Assert.assertFalse(invalidXYBox.isXYValid());
+ }
+
+ @Test
+ public void testIsXEmptyAndIsYEmpty() {
+ // Empty bounding box (initial state)
+ BoundingBox emptyBox = new BoundingBox();
+ Assert.assertTrue(emptyBox.isXEmpty());
+ Assert.assertTrue(emptyBox.isYEmpty());
+ Assert.assertTrue(emptyBox.isXYEmpty());
+
+ // Non-empty box
+ BoundingBox nonEmptyBox = new BoundingBox(1, 2, 3, 4, 5, 6, 7, 8);
+ Assert.assertFalse(nonEmptyBox.isXEmpty());
+ Assert.assertFalse(nonEmptyBox.isYEmpty());
+ Assert.assertFalse(nonEmptyBox.isXYEmpty());
+
+ // Box with empty X dimension only
+ GeometryFactory gf = new GeometryFactory();
+ BoundingBox emptyXBox = new BoundingBox();
+ // Only update Y dimension
+ emptyXBox.update(gf.createPoint(new Coordinate(Double.NaN, 5)));
+ Assert.assertTrue(emptyXBox.isXEmpty());
+ Assert.assertFalse(emptyXBox.isYEmpty());
+ Assert.assertTrue(emptyXBox.isXYEmpty());
+
+ // Box with empty Y dimension only
+ BoundingBox emptyYBox = new BoundingBox();
+ // Only update X dimension
+ emptyYBox.update(gf.createPoint(new Coordinate(10, Double.NaN)));
+ Assert.assertFalse(emptyYBox.isXEmpty());
+ Assert.assertTrue(emptyYBox.isYEmpty());
+ Assert.assertTrue(emptyYBox.isXYEmpty());
+ }
+
+ @Test
+ public void testIsXWraparound() {
+ // Normal bounding box (no wraparound)
+ BoundingBox normalBox = new BoundingBox(1, 2, 3, 4, 5, 6, 7, 8);
+ Assert.assertFalse(normalBox.isXWraparound());
+
+ // Wraparound box (xMin > xMax)
+ BoundingBox wraparoundBox = new BoundingBox(170, 20, 10, 20, 0, 0, 0, 0);
+ Assert.assertTrue(wraparoundBox.isXWraparound());
+
+ // Edge case: equal bounds
+ BoundingBox equalBoundsBox = new BoundingBox(10, 10, 20, 20, 0, 0, 0, 0);
+ Assert.assertFalse(equalBoundsBox.isXWraparound());
+
+ // Test static method directly
+ Assert.assertTrue(BoundingBox.isWraparound(180, -180));
+ Assert.assertFalse(BoundingBox.isWraparound(-180, 180));
+ }
+
+ @Test
+ public void testWraparoundHandlingInMerge() {
+ // Test with two normal boxes
+ BoundingBox box1 = new BoundingBox(10, 20, 10, 20, 0, 0, 0, 0);
+ BoundingBox box2 = new BoundingBox(15, 25, 15, 25, 0, 0, 0, 0);
+ box1.merge(box2);
+
+ Assert.assertTrue(box1.isValid());
+ Assert.assertEquals(10.0, box1.getXMin(), 0.0);
+ Assert.assertEquals(25.0, box1.getXMax(), 0.0);
+
+ // Test with one wraparound box
+ BoundingBox normalBox = new BoundingBox(0, 10, 0, 10, 0, 0, 0, 0);
+ BoundingBox wraparoundBox = new BoundingBox(170, -170, 5, 15, 0, 0, 0, 0);
+
+ normalBox.merge(wraparoundBox);
Review Comment:
Should we also test `wraparoundBox.merge(normalBox)`?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]