This is an automated email from the ASF dual-hosted git repository. jiayu pushed a commit to branch fix/1979-envelope-empty in repository https://gitbox.apache.org/repos/asf/sedona.git
commit cf0ccf93a20b176a3b0ed8ec167e0bc2bff69ead Author: Jia Yu <[email protected]> AuthorDate: Sat Feb 7 13:14:42 2026 -0800 [GH-1979] Fix ST_Envelope and ST_Envelope_Aggr empty geometry handling ST_Envelope (scalar): Already correctly returns same-type EMPTY geometry for empty inputs (matching PostGIS). Added test to verify this behavior. ST_Envelope_Aggr (aggregate): Skip empty geometries during accumulation and return null when all inputs are empty, matching PostGIS ST_Extent behavior. Fixed in Spark (AggregateFunctions.scala), Flink (Aggregators.java), and Snowflake (ST_Envelope_Aggr.java, ST_Envelope_Agg.java). Added tests in Spark (aggregateFunctionTestScala: all-empty returns null, mixed empty/non-empty preserves valid envelope), Flink (AggregatorTest: all-empty returns null), and common (FunctionsTest: scalar envelope with empty geometries). Fixes #1979 --- .../java/org/apache/sedona/common/FunctionsTest.java | 19 +++++++++++++++++++ .../apache/sedona/flink/expressions/Aggregators.java | 5 ++++- .../java/org/apache/sedona/flink/AggregatorTest.java | 14 ++++++++++++++ .../snowflake/snowsql/udtfs/ST_Envelope_Agg.java | 6 ++++++ .../snowflake/snowsql/udtfs/ST_Envelope_Aggr.java | 6 ++++++ .../sedona_sql/expressions/AggregateFunctions.scala | 2 +- .../sedona/sql/aggregateFunctionTestScala.scala | 18 ++++++++++++++++++ 7 files changed, 68 insertions(+), 2 deletions(-) diff --git a/common/src/test/java/org/apache/sedona/common/FunctionsTest.java b/common/src/test/java/org/apache/sedona/common/FunctionsTest.java index e379e67d6d..5c94dca7c7 100644 --- a/common/src/test/java/org/apache/sedona/common/FunctionsTest.java +++ b/common/src/test/java/org/apache/sedona/common/FunctionsTest.java @@ -808,6 +808,25 @@ public class FunctionsTest extends TestBase { assertEquals(4326, concave.getSRID()); } + @Test + public void envelopeEmptyGeometry() throws ParseException { + // ST_Envelope of EMPTY should return same-type EMPTY (matching PostGIS behavior) + Geometry emptyLineString = Constructors.geomFromWKT("LINESTRING EMPTY", 0); + Geometry result = Functions.envelope(emptyLineString); + assertTrue(result.isEmpty()); + assertEquals("LineString", result.getGeometryType()); + + Geometry emptyPolygon = Constructors.geomFromWKT("POLYGON EMPTY", 0); + result = Functions.envelope(emptyPolygon); + assertTrue(result.isEmpty()); + assertEquals("Polygon", result.getGeometryType()); + + Geometry emptyPoint = Constructors.geomFromWKT("POINT EMPTY", 0); + result = Functions.envelope(emptyPoint); + assertTrue(result.isEmpty()); + assertEquals("Point", result.getGeometryType()); + } + @Test public void envelopeAndCentroidSRID() throws ParseException { Geometry geom = Constructors.geomFromWKT("POLYGON ((0 0, 0 1, 1 1, 1 0, 0 0))", 3857); diff --git a/flink/src/main/java/org/apache/sedona/flink/expressions/Aggregators.java b/flink/src/main/java/org/apache/sedona/flink/expressions/Aggregators.java index 84cebd6adc..244d5f4b6c 100644 --- a/flink/src/main/java/org/apache/sedona/flink/expressions/Aggregators.java +++ b/flink/src/main/java/org/apache/sedona/flink/expressions/Aggregators.java @@ -56,6 +56,7 @@ public class Aggregators { rawSerializer = GeometryTypeSerializer.class, bridgedTo = Geometry.class) public Geometry getValue(Accumulators.Envelope acc) { + if (acc.minX > acc.maxX) return null; return createPolygon(acc.minX, acc.minY, acc.maxX, acc.maxY); } @@ -66,7 +67,9 @@ public class Aggregators { rawSerializer = GeometryTypeSerializer.class, bridgedTo = Geometry.class) Object o) { - Envelope envelope = ((Geometry) o).getEnvelopeInternal(); + Geometry geometry = (Geometry) o; + if (geometry.isEmpty()) return; + Envelope envelope = geometry.getEnvelopeInternal(); acc.minX = Math.min(acc.minX, envelope.getMinX()); acc.minY = Math.min(acc.minY, envelope.getMinY()); acc.maxX = Math.max(acc.maxX, envelope.getMaxX()); diff --git a/flink/src/test/java/org/apache/sedona/flink/AggregatorTest.java b/flink/src/test/java/org/apache/sedona/flink/AggregatorTest.java index 0220ff434d..efed1e0303 100644 --- a/flink/src/test/java/org/apache/sedona/flink/AggregatorTest.java +++ b/flink/src/test/java/org/apache/sedona/flink/AggregatorTest.java @@ -20,6 +20,7 @@ package org.apache.sedona.flink; import static org.apache.flink.table.api.Expressions.*; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNull; import org.apache.flink.table.api.*; import org.apache.flink.types.Row; @@ -46,6 +47,19 @@ public class AggregatorTest extends TestBase { last.getField(0).toString()); } + @Test + public void testEnvelop_Aggr_EmptyGeometries() { + tableEnv.executeSql( + "CREATE TEMPORARY VIEW empty_geom_view AS " + + "SELECT ST_GeomFromWKT(wkt) as geom FROM (" + + "VALUES ('POINT EMPTY'), ('LINESTRING EMPTY'), ('POLYGON EMPTY')" + + ") AS t(wkt)"); + Table result = tableEnv.sqlQuery("SELECT ST_Envelope_Aggr(geom) FROM empty_geom_view"); + Row last = last(result); + assertNull(last.getField(0)); + tableEnv.executeSql("DROP VIEW empty_geom_view"); + } + @Test public void testKNN() { Table pointTable = createPointTable(testDataSize); diff --git a/snowflake/src/main/java/org/apache/sedona/snowflake/snowsql/udtfs/ST_Envelope_Agg.java b/snowflake/src/main/java/org/apache/sedona/snowflake/snowsql/udtfs/ST_Envelope_Agg.java index f02cbe1068..4a5e6bbea9 100644 --- a/snowflake/src/main/java/org/apache/sedona/snowflake/snowsql/udtfs/ST_Envelope_Agg.java +++ b/snowflake/src/main/java/org/apache/sedona/snowflake/snowsql/udtfs/ST_Envelope_Agg.java @@ -50,6 +50,9 @@ public class ST_Envelope_Agg { public Stream<OutputRow> process(byte[] geom) throws ParseException { Geometry geometry = GeometrySerde.deserialize(geom); + if (geometry.isEmpty()) { + return Stream.empty(); + } if (buffer == null) { buffer = geometry.getEnvelopeInternal(); } else { @@ -59,6 +62,9 @@ public class ST_Envelope_Agg { } public Stream<OutputRow> endPartition() { + if (buffer == null || buffer.isNull()) { + return Stream.empty(); + } // Returns the value we initialized in the constructor. Polygon poly = geometryFactory.createPolygon( diff --git a/snowflake/src/main/java/org/apache/sedona/snowflake/snowsql/udtfs/ST_Envelope_Aggr.java b/snowflake/src/main/java/org/apache/sedona/snowflake/snowsql/udtfs/ST_Envelope_Aggr.java index 10aa2d5afa..b1d05e437c 100644 --- a/snowflake/src/main/java/org/apache/sedona/snowflake/snowsql/udtfs/ST_Envelope_Aggr.java +++ b/snowflake/src/main/java/org/apache/sedona/snowflake/snowsql/udtfs/ST_Envelope_Aggr.java @@ -50,6 +50,9 @@ public class ST_Envelope_Aggr { public Stream<OutputRow> process(byte[] geom) throws ParseException { Geometry geometry = GeometrySerde.deserialize(geom); + if (geometry.isEmpty()) { + return Stream.empty(); + } if (buffer == null) { buffer = geometry.getEnvelopeInternal(); } else { @@ -59,6 +62,9 @@ public class ST_Envelope_Aggr { } public Stream<OutputRow> endPartition() { + if (buffer == null || buffer.isNull()) { + return Stream.empty(); + } // Returns the value we initialized in the constructor. Polygon poly = geometryFactory.createPolygon( diff --git a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/AggregateFunctions.scala b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/AggregateFunctions.scala index ca169a2598..608ad5c141 100644 --- a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/AggregateFunctions.scala +++ b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/AggregateFunctions.scala @@ -130,7 +130,7 @@ private[apache] class ST_Envelope_Aggr val serde = ExpressionEncoder[Geometry]() def reduce(buffer: Option[EnvelopeBuffer], input: Geometry): Option[EnvelopeBuffer] = { - if (input == null) return buffer + if (input == null || input.isEmpty) return buffer val env = input.getEnvelopeInternal val envBuffer = EnvelopeBuffer(env.getMinX, env.getMaxX, env.getMinY, env.getMaxY) buffer match { diff --git a/spark/common/src/test/scala/org/apache/sedona/sql/aggregateFunctionTestScala.scala b/spark/common/src/test/scala/org/apache/sedona/sql/aggregateFunctionTestScala.scala index cd9991b657..f38f87f07f 100644 --- a/spark/common/src/test/scala/org/apache/sedona/sql/aggregateFunctionTestScala.scala +++ b/spark/common/src/test/scala/org/apache/sedona/sql/aggregateFunctionTestScala.scala @@ -50,6 +50,24 @@ class aggregateFunctionTestScala extends TestBaseScala { assert(boundary.take(1)(0).get(0) == geometryFactory.createPolygon(coordinates)) } + it("Passed ST_Envelope_aggr with empty geometries returns null") { + val emptyDf = sparkSession.sql( + "SELECT ST_GeomFromWKT(wkt) as geom FROM VALUES ('POINT EMPTY'), ('LINESTRING EMPTY'), ('POLYGON EMPTY') AS t(wkt)") + emptyDf.createOrReplaceTempView("emptydf") + val result = sparkSession.sql("SELECT ST_Envelope_Aggr(emptydf.geom) FROM emptydf") + assert(result.take(1)(0).get(0) == null) + } + + it("Passed ST_Envelope_aggr with mixed empty and non-empty geometries") { + val mixedDf = sparkSession.sql( + "SELECT ST_GeomFromWKT(wkt) as geom FROM VALUES ('POINT EMPTY'), ('POINT (1 2)'), ('POINT (3 4)') AS t(wkt)") + mixedDf.createOrReplaceTempView("mixeddf") + val result = sparkSession.sql("SELECT ST_Envelope_Aggr(mixeddf.geom) FROM mixeddf") + val envelope = result.take(1)(0).get(0).asInstanceOf[Geometry] + assert(envelope != null) + assert(!envelope.isEmpty) + } + it("Passed ST_Union_aggr") { var polygonCsvDf = sparkSession.read
