This is an automated email from the ASF dual-hosted git repository.
petern pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/sedona-db.git
The following commit(s) were added to refs/heads/main by this push:
new c25a377f perf: Implement method to directly write GeosGeometry into
buffer and use on st_unaryunion + st_buffer (#476)
c25a377f is described below
commit c25a377fcf1dc3471c5fe6ecfabc0fc88232b9e3
Author: Peter Nguyen <[email protected]>
AuthorDate: Mon Jan 5 11:11:26 2026 -0800
perf: Implement method to directly write GeosGeometry into buffer and use
on st_unaryunion + st_buffer (#476)
---
Cargo.lock | 1 +
Cargo.toml | 3 +-
c/sedona-geos/Cargo.toml | 1 +
c/sedona-geos/src/geos_to_wkb.rs | 516 ++++++++++++++++++++++
c/sedona-geos/src/lib.rs | 1 +
c/sedona-geos/src/st_buffer.rs | 7 +-
c/sedona-geos/src/st_unaryunion.rs | 13 +-
python/sedonadb/tests/functions/test_functions.py | 59 +++
8 files changed, 588 insertions(+), 13 deletions(-)
diff --git a/Cargo.lock b/Cargo.lock
index f946c71c..33a326eb 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -5227,6 +5227,7 @@ version = "0.3.0"
dependencies = [
"arrow-array",
"arrow-schema",
+ "bytemuck",
"byteorder",
"criterion",
"datafusion-common",
diff --git a/Cargo.toml b/Cargo.toml
index 7a0d999c..d272d46e 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -71,6 +71,7 @@ arrow-json = { version = "57.0.0" }
arrow-schema = { version = "57.0.0" }
arrow-buffer = { version = "57.0.0" }
async-trait = { version = "0.1.87" }
+bytemuck = "1.14"
bytes = "1.10"
byteorder = "1"
chrono = { version = "0.4.41", default-features = false }
@@ -98,7 +99,7 @@ mimalloc = { version = "0.1", default-features = false }
libmimalloc-sys = { version = "0.1", default-features = false }
once_cell = "1.20"
-geos = { git="https://github.com/georust/geos.git",
rev="47afbad2483e489911ddb456417808340e9342c3", features = ["geo", "v3_11_0"] }
+geos = { git="https://github.com/georust/geos.git",
rev="47afbad2483e489911ddb456417808340e9342c3", features = ["geo", "v3_12_0"] }
geo-types = "0.7.17"
geo-traits = "0.3.0"
diff --git a/c/sedona-geos/Cargo.toml b/c/sedona-geos/Cargo.toml
index ce85b023..1549d7db 100644
--- a/c/sedona-geos/Cargo.toml
+++ b/c/sedona-geos/Cargo.toml
@@ -38,6 +38,7 @@ geo-types = { workspace = true }
[dependencies]
arrow-schema = { workspace = true }
arrow-array = { workspace = true }
+bytemuck = { workspace = true }
datafusion-common = { workspace = true }
datafusion-expr = { workspace = true }
geos = { workspace = true }
diff --git a/c/sedona-geos/src/geos_to_wkb.rs b/c/sedona-geos/src/geos_to_wkb.rs
new file mode 100644
index 00000000..f60fd897
--- /dev/null
+++ b/c/sedona-geos/src/geos_to_wkb.rs
@@ -0,0 +1,516 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+use std::io::Write;
+
+use byteorder::{LittleEndian, WriteBytesExt};
+use datafusion_common::{error::Result, DataFusionError};
+use geo_traits::Dimensions;
+use geos::{Geom, Geometry, GeometryTypes};
+use sedona_geometry::wkb_factory::{
+ write_wkb_geometrycollection_header, write_wkb_linestring_header,
+ write_wkb_multilinestring_header, write_wkb_multipoint_header,
write_wkb_multipolygon_header,
+ write_wkb_point_header, write_wkb_polygon_header,
+};
+
+/// Write a GEOS geometry to WKB format.
+///
+/// This is a fast, custom implementation that directly extracts coordinates
+/// from GEOS geometries and writes them in WKB format into a buffer.
+pub fn write_geos_geometry(geom: &Geometry, writer: &mut impl Write) ->
Result<()> {
+ write_geometry(geom, writer)
+}
+
+fn write_geometry(geom: &impl Geom, writer: &mut impl Write) -> Result<()> {
+ let geom_type = geom
+ .geometry_type()
+ .map_err(|e| DataFusionError::Execution(format!("Failed to get
geometry type: {e}")))?;
+
+ let has_z = geom
+ .has_z()
+ .map_err(|e| DataFusionError::Execution(format!("Failed to check
has_z: {e}")))?;
+ let has_m = geom
+ .has_m()
+ .map_err(|e| DataFusionError::Execution(format!("Failed to check
has_m: {e}")))?;
+
+ let dim = match (has_z, has_m) {
+ (false, false) => Dimensions::Xy,
+ (true, false) => Dimensions::Xyz,
+ (false, true) => Dimensions::Xym,
+ (true, true) => Dimensions::Xyzm,
+ };
+ match geom_type {
+ GeometryTypes::Point => write_point(geom, dim, writer),
+ GeometryTypes::LineString => write_line_string(geom, dim, writer),
+ GeometryTypes::Polygon => write_polygon(geom, dim, writer),
+ GeometryTypes::MultiPoint => write_multi_point(geom, dim, writer),
+ GeometryTypes::MultiLineString => write_multi_line_string(geom, dim,
writer),
+ GeometryTypes::MultiPolygon => write_multi_polygon(geom, dim, writer),
+ GeometryTypes::GeometryCollection => write_geometry_collection(geom,
dim, writer),
+ _ => Err(DataFusionError::Execution(format!(
+ "Unsupported geometry type: {geom_type:?}"
+ ))),
+ }
+}
+
+fn write_point(geom: &impl Geom, dim: Dimensions, writer: &mut impl Write) ->
Result<()> {
+ write_wkb_point_header(writer, dim)
+ .map_err(|e| DataFusionError::Execution(format!("Failed to write point
header: {e}")))?;
+
+ let is_empty = geom
+ .is_empty()
+ .map_err(|e| DataFusionError::Execution(format!("Failed to check if
empty: {e}")))?;
+
+ if is_empty {
+ // Write NaN coordinates for empty point
+ writer.write_f64::<LittleEndian>(f64::NAN)?; // x
+ writer.write_f64::<LittleEndian>(f64::NAN)?; // y
+ if matches!(dim, Dimensions::Xyz | Dimensions::Xyzm) {
+ writer.write_f64::<LittleEndian>(f64::NAN)?; // z
+ }
+ if matches!(dim, Dimensions::Xym | Dimensions::Xyzm) {
+ writer.write_f64::<LittleEndian>(f64::NAN)?; // m
+ }
+ } else {
+ let coord_seq = geom
+ .get_coord_seq()
+ .map_err(|e| DataFusionError::Execution(format!("Failed to get
coord seq: {e}")))?;
+
+ write_coord_seq(&coord_seq, dim, writer)?;
+ }
+
+ Ok(())
+}
+
+fn write_line_string(geom: &impl Geom, dim: Dimensions, writer: &mut impl
Write) -> Result<()> {
+ let num_points = geom
+ .get_num_points()
+ .map_err(|e| DataFusionError::Execution(format!("Failed to get num
points: {e}")))?;
+
+ write_wkb_linestring_header(writer, dim, num_points).map_err(|e| {
+ DataFusionError::Execution(format!("Failed to write linestring header:
{e}"))
+ })?;
+
+ if num_points > 0 {
+ let coord_seq = geom
+ .get_coord_seq()
+ .map_err(|e| DataFusionError::Execution(format!("Failed to get
coord seq: {e}")))?;
+
+ write_coord_seq(&coord_seq, dim, writer)?;
+ }
+
+ Ok(())
+}
+
+fn write_polygon(geom: &impl Geom, dim: Dimensions, writer: &mut impl Write)
-> Result<()> {
+ let is_empty = geom
+ .is_empty()
+ .map_err(|e| DataFusionError::Execution(format!("Failed to check if
empty: {e}")))?;
+
+ let num_interior_rings = geom.get_num_interior_rings().map_err(|e| {
+ DataFusionError::Execution(format!("Failed to get num interior rings:
{e}"))
+ })?;
+
+ let num_rings = match (is_empty, num_interior_rings) {
+ (true, _) => 0,
+ (false, 0) => 1,
+ (false, _) => num_interior_rings + 1,
+ };
+
+ write_wkb_polygon_header(writer, dim, num_rings)
+ .map_err(|e| DataFusionError::Execution(format!("Failed to write
polygon header: {e}")))?;
+
+ if num_rings > 0 {
+ let exterior = geom
+ .get_exterior_ring()
+ .map_err(|e| DataFusionError::Execution(format!("Failed to get
exterior ring: {e}")))?;
+
+ let exterior_coord_seq = exterior.get_coord_seq().map_err(|e| {
+ DataFusionError::Execution(format!("Failed to get exterior coord
seq: {e}"))
+ })?;
+
+ let exterior_size = exterior_coord_seq
+ .size()
+ .map_err(|e| DataFusionError::Execution(format!("Failed to get
exterior size: {e}")))?;
+
+ // Number of points in exterior ring
+ writer.write_u32::<LittleEndian>(exterior_size as u32)?;
+ write_coord_seq(&exterior_coord_seq, dim, writer)?;
+
+ // Write interior rings
+ for i in 0..num_interior_rings {
+ let interior = geom.get_interior_ring_n(i).map_err(|e| {
+ DataFusionError::Execution(format!("Failed to get interior
ring {i}: {e}"))
+ })?;
+
+ let interior_coord_seq = interior.get_coord_seq().map_err(|e| {
+ DataFusionError::Execution(format!("Failed to get interior
coord seq: {e}"))
+ })?;
+
+ let interior_size = interior_coord_seq.size().map_err(|e| {
+ DataFusionError::Execution(format!("Failed to get interior
size: {e}"))
+ })?;
+
+ writer.write_u32::<LittleEndian>(interior_size as u32)?;
+ write_coord_seq(&interior_coord_seq, dim, writer)?;
+ }
+ }
+
+ Ok(())
+}
+
+fn write_multi_point(geom: &impl Geom, dim: Dimensions, writer: &mut impl
Write) -> Result<()> {
+ let num_points = geom
+ .get_num_geometries()
+ .map_err(|e| DataFusionError::Execution(format!("Failed to get num
geometries: {e}")))?;
+
+ write_wkb_multipoint_header(writer, dim, num_points).map_err(|e| {
+ DataFusionError::Execution(format!("Failed to write multipoint header:
{e}"))
+ })?;
+
+ for i in 0..num_points {
+ let point = geom
+ .get_geometry_n(i)
+ .map_err(|e| DataFusionError::Execution(format!("Failed to get
point {i}: {e}")))?;
+
+ write_point(&point, dim, writer)
+ .map_err(|e| DataFusionError::Execution(format!("Failed to write
point: {e}")))?;
+ }
+
+ Ok(())
+}
+
+fn write_multi_line_string(
+ geom: &impl Geom,
+ dim: Dimensions,
+ writer: &mut impl Write,
+) -> Result<()> {
+ let num_line_strings = geom
+ .get_num_geometries()
+ .map_err(|e| DataFusionError::Execution(format!("Failed to get num
geometries: {e}")))?;
+
+ write_wkb_multilinestring_header(writer, dim,
num_line_strings).map_err(|e| {
+ DataFusionError::Execution(format!("Failed to write multilinestring
header: {e}"))
+ })?;
+
+ for i in 0..num_line_strings {
+ let line_string = geom.get_geometry_n(i).map_err(|e| {
+ DataFusionError::Execution(format!("Failed to get line string {i}:
{e}"))
+ })?;
+ write_line_string(&line_string, dim, writer)
+ .map_err(|e| DataFusionError::Execution(format!("Failed to write
line string: {e}")))?;
+ }
+
+ Ok(())
+}
+
+fn write_multi_polygon(geom: &impl Geom, dim: Dimensions, writer: &mut impl
Write) -> Result<()> {
+ let num_polygons = geom
+ .get_num_geometries()
+ .map_err(|e| DataFusionError::Execution(format!("Failed to get num
geometries: {e}")))?;
+
+ write_wkb_multipolygon_header(writer, dim, num_polygons).map_err(|e| {
+ DataFusionError::Execution(format!("Failed to write multipolygon
header: {e}"))
+ })?;
+
+ for i in 0..num_polygons {
+ let poly = geom
+ .get_geometry_n(i)
+ .map_err(|e| DataFusionError::Execution(format!("Failed to get
polygon {i}: {e}")))?;
+
+ write_polygon(&poly, dim, writer)?;
+ }
+ Ok(())
+}
+
+fn write_geometry_collection(
+ geom: &impl Geom,
+ dim: Dimensions,
+ writer: &mut impl Write,
+) -> Result<()> {
+ let num_geometries = geom
+ .get_num_geometries()
+ .map_err(|e| DataFusionError::Execution(format!("Failed to get num
geometries: {e}")))?;
+
+ write_wkb_geometrycollection_header(writer, dim,
num_geometries).map_err(|e| {
+ DataFusionError::Execution(format!("Failed to write geometry
collection header: {e}"))
+ })?;
+
+ for i in 0..num_geometries {
+ let sub_geom = geom
+ .get_geometry_n(i)
+ .map_err(|e| DataFusionError::Execution(format!("Failed to get
geometry {i}: {e}")))?;
+
+ write_geometry(&sub_geom, writer)?;
+ }
+
+ Ok(())
+}
+
+fn write_coord_seq(
+ coord_seq: &geos::CoordSeq,
+ dim: Dimensions,
+ writer: &mut impl Write,
+) -> Result<()> {
+ let coords = coord_seq
+ .as_buffer(Some(dim.size()))
+ .map_err(|e| DataFusionError::Execution(format!("Failed to get coord
seq buffer: {e}")))?;
+
+ // Cast Vec<f64> to &[u8] so we can write the bytes directly to the writer
buffer
+ let byte_slice: &[u8] = bytemuck::cast_slice(&coords);
+ writer.write_all(byte_slice)?;
+
+ Ok(())
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ /// Helper function to test WKB round-trip: create geometry from WKT,
write to WKB, read back, verify
+ fn test_wkb_round_trip(wkt: &str) {
+ let geos_geom = geos::Geometry::new_from_wkt(wkt).unwrap();
+ let expected_wkt = geos_geom.to_wkt().unwrap();
+
+ // Write to WKB from Geos object using our method
+ let mut wkb_buf = Vec::new();
+ write_geos_geometry(&geos_geom, &mut wkb_buf).unwrap();
+ let geos_from_wkb = geos::Geometry::new_from_wkb(&wkb_buf).unwrap();
+
+ // Compare them as WKT
+ let geos_from_wkb_wkt = geos_from_wkb.to_wkt().unwrap();
+ assert_eq!(geos_from_wkb_wkt, expected_wkt);
+ }
+
+ // Point tests
+ #[test]
+ fn test_write_point_xy() {
+ test_wkb_round_trip("POINT (0 1)");
+ test_wkb_round_trip("POINT (1.5 2.5)");
+ test_wkb_round_trip("POINT (-10.5 -20.5)");
+ }
+
+ #[test]
+ fn test_write_point_xyz() {
+ test_wkb_round_trip("POINT Z (0 1 10)");
+ test_wkb_round_trip("POINT Z (1.5 2.5 3.5)");
+ test_wkb_round_trip("POINT Z (-10.5 -20.5 -30.5)");
+ }
+
+ #[test]
+ fn test_write_point_xyzm() {
+ test_wkb_round_trip("POINT ZM (0 1 10 100)");
+ test_wkb_round_trip("POINT ZM (1.5 2.5 3.5 4.5)");
+ test_wkb_round_trip("POINT ZM (-10.5 -20.5 -30.5 -40.5)");
+ }
+
+ #[test]
+ fn test_write_point_empty() {
+ test_wkb_round_trip("POINT EMPTY");
+ test_wkb_round_trip("POINT Z EMPTY");
+ test_wkb_round_trip("POINT ZM EMPTY");
+ }
+
+ // LineString tests
+ #[test]
+ fn test_write_linestring_xy() {
+ test_wkb_round_trip("LINESTRING (0 0, 1 1)");
+ test_wkb_round_trip("LINESTRING (0 0, 1 1, 2 2)");
+ test_wkb_round_trip("LINESTRING (0 0, 1 1, 2 2, 3 3)");
+ }
+
+ #[test]
+ fn test_write_linestring_xyz() {
+ test_wkb_round_trip("LINESTRING Z (0 0 0, 1 1 1)");
+ test_wkb_round_trip("LINESTRING Z (0 0 0, 1 1 1, 2 2 2)");
+ test_wkb_round_trip("LINESTRING Z (0 0 10, 1 1 11, 2 2 12)");
+ }
+
+ #[test]
+ fn test_write_linestring_xyzm() {
+ test_wkb_round_trip("LINESTRING ZM (0 0 1 2, 1 1 3 4)");
+ test_wkb_round_trip("LINESTRING ZM (0 0 1 2, 1 1 3 4, 2 2 5 6)");
+ test_wkb_round_trip("LINESTRING ZM (0 0 10 20, 1 1 11 21, 2 2 12 22)");
+ }
+
+ #[test]
+ fn test_write_linestring_empty() {
+ test_wkb_round_trip("LINESTRING EMPTY");
+ test_wkb_round_trip("LINESTRING Z EMPTY");
+ test_wkb_round_trip("LINESTRING ZM EMPTY");
+ }
+
+ // Polygon tests
+ #[test]
+ fn test_write_polygon_xy() {
+ test_wkb_round_trip("POLYGON ((0 0, 4 0, 4 4, 0 4, 0 0))");
+ test_wkb_round_trip("POLYGON ((0 0, 4 0, 4 4, 0 4, 0 0), (1 1, 1 2, 2
2, 2 1, 1 1))");
+ }
+
+ #[test]
+ fn test_write_polygon_xyz() {
+ test_wkb_round_trip("POLYGON Z ((0 0 10, 4 0 10, 4 4 10, 0 4 10, 0 0
10))");
+ test_wkb_round_trip("POLYGON Z ((0 0 0, 1 0 0, 0 1 0, 0 0 0))");
+ test_wkb_round_trip(
+ "POLYGON Z ((0 0 10, 4 0 10, 4 4 10, 0 4 10, 0 0 10), (1 1 5, 1 2
5, 2 2 5, 2 1 5, 1 1 5))",
+ );
+ }
+
+ #[test]
+ fn test_write_polygon_xyzm() {
+ test_wkb_round_trip("POLYGON ZM ((0 0 10 1, 4 0 10 2, 4 4 10 3, 0 4 10
4, 0 0 10 5))");
+ test_wkb_round_trip(
+ "POLYGON ZM ((0 0 10 1, 4 0 10 2, 4 4 10 3, 0 4 10 4, 0 0 10 5),
(1 1 5 10, 1 2 5 11, 2 2 5 12, 2 1 5 13, 1 1 5 10))",
+ );
+ }
+
+ #[test]
+ fn test_write_polygon_empty() {
+ test_wkb_round_trip("POLYGON EMPTY");
+ test_wkb_round_trip("POLYGON Z EMPTY");
+ test_wkb_round_trip("POLYGON ZM EMPTY");
+ }
+
+ // MultiPoint tests
+ #[test]
+ fn test_write_multipoint_xy() {
+ test_wkb_round_trip("MULTIPOINT ((0 0), (1 1))");
+ test_wkb_round_trip("MULTIPOINT ((0 0), (1 1), (2 2))");
+ }
+
+ #[test]
+ fn test_write_multipoint_xyz() {
+ test_wkb_round_trip("MULTIPOINT Z ((0 0 0), (1 1 1))");
+ test_wkb_round_trip("MULTIPOINT Z ((0 0 0), (1 1 1), (2 2 2))");
+ }
+
+ #[test]
+ fn test_write_multipoint_xyzm() {
+ test_wkb_round_trip("MULTIPOINT ZM ((0 0 1 2), (1 1 3 4))");
+ test_wkb_round_trip("MULTIPOINT ZM ((0 0 1 2), (1 1 3 4), (2 2 5 6))");
+ }
+
+ #[test]
+ fn test_write_multipoint_empty() {
+ test_wkb_round_trip("MULTIPOINT EMPTY");
+ test_wkb_round_trip("MULTIPOINT Z EMPTY");
+ test_wkb_round_trip("MULTIPOINT ZM EMPTY");
+ }
+
+ // MultiLineString tests
+ #[test]
+ fn test_write_multilinestring_xy() {
+ test_wkb_round_trip("MULTILINESTRING ((0 0, 1 1), (2 2, 3 3))");
+ test_wkb_round_trip("MULTILINESTRING ((0 0, 1 1), (2 2, 3 3), (4 4, 5
5))");
+ }
+
+ #[test]
+ fn test_write_multilinestring_xyz() {
+ test_wkb_round_trip("MULTILINESTRING Z ((0 0 0, 1 1 1), (2 2 2, 3 3
3))");
+ test_wkb_round_trip("MULTILINESTRING Z ((0 0 0, 1 1 1), (2 2 2, 3 3
3), (4 4 4, 5 5 5))");
+ }
+
+ #[test]
+ fn test_write_multilinestring_xyzm() {
+ test_wkb_round_trip("MULTILINESTRING ZM ((0 0 1 2, 1 1 3 4), (2 2 5 6,
3 3 7 8))");
+ }
+
+ #[test]
+ fn test_write_multilinestring_empty() {
+ test_wkb_round_trip("MULTILINESTRING EMPTY");
+ test_wkb_round_trip("MULTILINESTRING Z EMPTY");
+ test_wkb_round_trip("MULTILINESTRING ZM EMPTY");
+ }
+
+ // MultiPolygon tests
+ #[test]
+ fn test_write_multipolygon_xy() {
+ test_wkb_round_trip(
+ "MULTIPOLYGON (((0 0, 4 0, 4 4, 0 4, 0 0)), ((5 5, 6 5, 6 6, 5 6,
5 5)))",
+ );
+ }
+
+ #[test]
+ fn test_write_multipolygon_xyz() {
+ test_wkb_round_trip(
+ "MULTIPOLYGON Z (((0 0 10, 4 0 10, 4 4 10, 0 4 10, 0 0 10)), ((5 5
20, 6 5 20, 6 6 20, 5 6 20, 5 5 20)))",
+ );
+ }
+
+ #[test]
+ fn test_write_multipolygon_xyzm() {
+ test_wkb_round_trip(
+ "MULTIPOLYGON ZM (((0 0 10 1, 4 0 10 2, 4 4 10 3, 0 4 10 4, 0 0 10
5)), ((5 5 20 10, 6 5 20 11, 6 6 20 12, 5 6 20 13, 5 5 20 10)))",
+ );
+ }
+
+ #[test]
+ fn test_write_multipolygon_empty() {
+ test_wkb_round_trip("MULTIPOLYGON EMPTY");
+ test_wkb_round_trip("MULTIPOLYGON Z EMPTY");
+ test_wkb_round_trip("MULTIPOLYGON ZM EMPTY");
+ }
+
+ // GeometryCollection tests
+ #[test]
+ fn test_write_geometrycollection_xy() {
+ test_wkb_round_trip("GEOMETRYCOLLECTION (POINT (1 2), LINESTRING (0 0,
1 1))");
+ test_wkb_round_trip(
+ "GEOMETRYCOLLECTION (POINT (1 2), LINESTRING (0 0, 1 1), POLYGON
((0 0, 1 0, 0 1, 0 0)))",
+ );
+ }
+
+ #[test]
+ fn test_write_geometrycollection_xyz() {
+ test_wkb_round_trip("GEOMETRYCOLLECTION Z (POINT Z (1 2 3), LINESTRING
Z (0 0 0, 1 1 1))");
+ test_wkb_round_trip(
+ "GEOMETRYCOLLECTION Z (POINT Z (1 2 3), LINESTRING Z (0 0 0, 1 1
1), POLYGON Z ((0 0 10, 4 0 10, 4 4 10, 0 4 10, 0 0 10)))",
+ );
+ }
+
+ #[test]
+ fn test_write_geometrycollection_xyzm() {
+ test_wkb_round_trip(
+ "GEOMETRYCOLLECTION ZM (POINT ZM (1 2 3 4), LINESTRING ZM (0 0 1
2, 1 1 3 4))",
+ );
+ }
+
+ #[test]
+ fn test_write_geometrycollection_mixed_dimensions() {
+ // Test that dimension is inferred from nested geometries when not
specified on collection
+ test_wkb_round_trip("GEOMETRYCOLLECTION (POINT Z (1 2 3), LINESTRING Z
(0 0 0, 1 1 1))");
+ test_wkb_round_trip(
+ "GEOMETRYCOLLECTION (POINT ZM (1 2 3 4), LINESTRING ZM (0 0 1 2, 1
1 3 4))",
+ );
+ }
+
+ #[test]
+ fn test_write_geometrycollection_empty() {
+ test_wkb_round_trip("GEOMETRYCOLLECTION EMPTY");
+ test_wkb_round_trip("GEOMETRYCOLLECTION Z EMPTY");
+ test_wkb_round_trip("GEOMETRYCOLLECTION ZM EMPTY");
+ }
+
+ #[test]
+ fn test_write_geometrycollection_nested() {
+ test_wkb_round_trip(
+ "GEOMETRYCOLLECTION (GEOMETRYCOLLECTION (POINT (1 2), POINT (3
4)), POINT (5 6))",
+ );
+ test_wkb_round_trip(
+ "GEOMETRYCOLLECTION Z (GEOMETRYCOLLECTION Z (POINT Z (1 2 3),
POINT Z (4 5 6)), POINT Z (7 8 9))",
+ );
+ }
+}
diff --git a/c/sedona-geos/src/lib.rs b/c/sedona-geos/src/lib.rs
index 963fefe9..e471372b 100644
--- a/c/sedona-geos/src/lib.rs
+++ b/c/sedona-geos/src/lib.rs
@@ -18,6 +18,7 @@ mod binary_predicates;
mod distance;
mod executor;
mod geos;
+mod geos_to_wkb;
mod overlay;
pub mod register;
mod st_area;
diff --git a/c/sedona-geos/src/st_buffer.rs b/c/sedona-geos/src/st_buffer.rs
index e152a405..15300a0e 100644
--- a/c/sedona-geos/src/st_buffer.rs
+++ b/c/sedona-geos/src/st_buffer.rs
@@ -31,6 +31,7 @@ use sedona_schema::{
};
use crate::executor::GeosExecutor;
+use crate::geos_to_wkb::write_geos_geometry;
/// ST_Buffer() implementation using the geos crate
///
@@ -149,11 +150,7 @@ fn invoke_scalar(
.buffer_with_params(distance, params)
.map_err(|e| DataFusionError::External(Box::new(e)))?;
- let wkb = geometry
- .to_wkb()
- .map_err(|e| DataFusionError::Execution(format!("Failed to convert to
wkb: {e}")))?;
-
- writer.write_all(wkb.as_ref())?;
+ write_geos_geometry(&geometry, writer)?;
Ok(())
}
diff --git a/c/sedona-geos/src/st_unaryunion.rs
b/c/sedona-geos/src/st_unaryunion.rs
index 57467783..f57338d8 100644
--- a/c/sedona-geos/src/st_unaryunion.rs
+++ b/c/sedona-geos/src/st_unaryunion.rs
@@ -27,6 +27,7 @@ use sedona_schema::datatypes::SedonaType;
use sedona_schema::{datatypes::WKB_GEOMETRY, matchers::ArgMatcher};
use crate::executor::GeosExecutor;
+use crate::geos_to_wkb::write_geos_geometry;
/// ST_UnaryUnion() implementation using the geos crate
pub fn st_unary_union_impl() -> ScalarKernelRef {
@@ -55,8 +56,8 @@ impl SedonaScalarKernel for STUnaryUnion {
executor.execute_wkb_void(|maybe_wkb| {
match maybe_wkb {
Some(wkb) => {
- let result_wkb = invoke_scalar(&wkb)?;
- builder.append_value(&result_wkb);
+ invoke_scalar(&wkb, &mut builder)?;
+ builder.append_value([]);
}
_ => builder.append_null(),
}
@@ -68,16 +69,14 @@ impl SedonaScalarKernel for STUnaryUnion {
}
}
-fn invoke_scalar(geos_geom: &geos::Geometry) -> Result<Vec<u8>> {
+fn invoke_scalar(geos_geom: &geos::Geometry, writer: &mut impl std::io::Write)
-> Result<()> {
let geometry = geos_geom
.unary_union()
.map_err(|e| DataFusionError::Execution(format!("Failed to perform
unary union: {e}")))?;
- let wkb = geometry
- .to_wkb()
- .map_err(|e| DataFusionError::Execution(format!("Failed to convert to
wkb: {e}")))?;
+ write_geos_geometry(&geometry, writer)?;
- Ok(wkb)
+ Ok(())
}
#[cfg(test)]
diff --git a/python/sedonadb/tests/functions/test_functions.py
b/python/sedonadb/tests/functions/test_functions.py
index dd102437..d50606ea 100644
--- a/python/sedonadb/tests/functions/test_functions.py
+++ b/python/sedonadb/tests/functions/test_functions.py
@@ -892,6 +892,65 @@ def test_st_unaryunion(eng, geom, expected):
)
[email protected]("eng", [SedonaDB, PostGIS])
[email protected](
+ ("geom", "expected"),
+ [
+ # Skip M tests because geos rust isn't capable of writing XYM
geometries yet
+ # https://github.com/apache/sedona-db/issues/481
+ ("POINT Z EMPTY", "POINT Z EMPTY"),
+ ("POINT ZM EMPTY", "POINT ZM EMPTY"),
+ ("POINT Z (0 0 0)", "POINT Z(0 0 0)"),
+ ("POINT ZM (1 2 3 4)", "POINT ZM(1 2 3 4)"),
+ ("LINESTRING Z (0 0 0, 1 1 1)", "LINESTRING Z(0 0 0,1 1 1)"),
+ ("LINESTRING ZM (0 0 1 2, 1 1 3 4)", "LINESTRING ZM(0 0 1 2,1 1 3 4)"),
+ (
+ "POLYGON Z ((0 0 10, 4 0 10, 4 4 10, 0 4 10, 0 0 10))",
+ "POLYGON Z((0 0 10,4 0 10,4 4 10,0 4 10,0 0 10))",
+ ),
+ (
+ "POLYGON ZM ((0 0 10 1, 4 0 10 2, 4 4 10 3, 0 4 10 4, 0 0 10 5))",
+ "POLYGON ZM((0 0 10 1,4 0 10 2,4 4 10 3,0 4 10 4,0 0 10 5))",
+ ),
+ ("MULTIPOINT Z ((0 0 0), (1 1 1))", "MULTIPOINT Z((0 0 0),(1 1 1))"),
+ ("MULTIPOINT ZM ((0 0 1 2), (1 1 3 4))", "MULTIPOINT ZM((0 0 1 2),(1 1
3 4))"),
+ # Polygons overlap, so it's reduced to a single one
+ (
+ "MULTIPOLYGON Z (((0 0 10, 4 0 10, 4 4 10, 0 4 10, 0 0 10)), ((1 1
5, 1 2 5, 2 2 5, 2 1 5, 1 1 5)))",
+ "POLYGON Z((0 4 10,4 4 10,4 0 10,0 0 10,0 4 10))",
+ ),
+ ("GEOMETRYCOLLECTION Z EMPTY", "GEOMETRYCOLLECTION Z EMPTY"),
+ ("GEOMETRYCOLLECTION ZM EMPTY", "GEOMETRYCOLLECTION ZM EMPTY"),
+ (
+ "GEOMETRYCOLLECTION Z(POINT Z(1 2 3), LINESTRING Z(0 0 0,1 1 1))",
+ "GEOMETRYCOLLECTION Z(POINT Z(1 2 3),LINESTRING Z(0 0 0,1 1 1))",
+ ),
+ # dimension specified on nested geometries, but not outer
geometrycollection
+ (
+ "GEOMETRYCOLLECTION (POINT Z(1 2 3), LINESTRING Z(0 0 0,1 1 1))",
+ "GEOMETRYCOLLECTION Z(POINT Z(1 2 3),LINESTRING Z(0 0 0,1 1 1))",
+ ),
+ # Skipping GeometryCollection ZM tests because geos unary_union()
doesn't seem to work properly for them yet.
+ ],
+)
+def test_st_unaryunion_zm(eng, geom, expected):
+ is_postgis = eng == PostGIS
+ eng = eng.create_or_skip()
+ if "EMPTY" in expected.upper():
+ eng.assert_query_result(
+ f"SELECT ST_IsEmpty(ST_UnaryUnion({geom_or_null(geom)}))", True
+ )
+ elif is_postgis and ("M(" in expected or "M (" in expected):
+ pytest.skip("PostGIS doesn't support M dimensions")
+ else:
+ # Test for exact string equality
+ # Remove all spaces from both the actual and expected results to
ignore formatting differences
+ eng.assert_query_result(
+ f"SELECT replace(ST_AsText(ST_UnaryUnion({geom_or_null(geom)})), '
', '')",
+ expected.replace(" ", ""),
+ )
+
+
@pytest.mark.parametrize("eng", [SedonaDB, PostGIS])
def test_st_makeline(eng):
eng = eng.create_or_skip()