This is an automated email from the ASF dual-hosted git repository.
paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/sedona-db.git
The following commit(s) were added to refs/heads/main by this push:
new 4523cd4 perf(rust/sedona-geometry,rust/sedona-functions): Optimize
st_has(z/m) using WKBBytesExecutor + Implement new WKBHeader (#171)
4523cd4 is described below
commit 4523cd4021e46338143604ffd2c24264b1fcce0a
Author: Peter Nguyen <[email protected]>
AuthorDate: Wed Oct 29 11:14:13 2025 -0700
perf(rust/sedona-geometry,rust/sedona-functions): Optimize st_has(z/m)
using WKBBytesExecutor + Implement new WKBHeader (#171)
Co-authored-by: Dewey Dunnington <[email protected]>
---
Cargo.lock | 1 +
benchmarks/test_functions.py | 36 +
python/sedonadb/tests/functions/test_functions.py | 5 +
rust/sedona-functions/src/st_haszm.rs | 100 +-
rust/sedona-geometry/Cargo.toml | 1 +
rust/sedona-geometry/src/lib.rs | 1 +
rust/sedona-geometry/src/wkb_header.rs | 1012 +++++++++++++++++++++
rust/sedona-testing/src/fixtures.rs | 186 ++++
8 files changed, 1313 insertions(+), 29 deletions(-)
diff --git a/Cargo.lock b/Cargo.lock
index 4537b8c..291859e 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -5024,6 +5024,7 @@ dependencies = [
"geo-types",
"lru",
"rstest",
+ "sedona-testing",
"serde",
"serde_json",
"serde_with",
diff --git a/benchmarks/test_functions.py b/benchmarks/test_functions.py
index cf0efd6..4c30cd4 100644
--- a/benchmarks/test_functions.py
+++ b/benchmarks/test_functions.py
@@ -167,6 +167,42 @@ class TestBenchFunctions(TestBenchBase):
benchmark(queries)
+ @pytest.mark.parametrize(
+ "eng", [SedonaDBSingleThread, PostGISSingleThread, DuckDBSingleThread]
+ )
+ @pytest.mark.parametrize(
+ "table",
+ [
+ "collections_simple",
+ "collections_complex",
+ ],
+ )
+ def test_st_hasm(self, benchmark, eng, table):
+ eng = self._get_eng(eng)
+
+ def queries():
+ eng.execute_and_collect(f"SELECT ST_HasM(geom1) from {table}")
+
+ benchmark(queries)
+
+ @pytest.mark.parametrize(
+ "eng", [SedonaDBSingleThread, PostGISSingleThread, DuckDBSingleThread]
+ )
+ @pytest.mark.parametrize(
+ "table",
+ [
+ "collections_simple",
+ "collections_complex",
+ ],
+ )
+ def test_st_hasz(self, benchmark, eng, table):
+ eng = self._get_eng(eng)
+
+ def queries():
+ eng.execute_and_collect(f"SELECT ST_HasZ(geom1) from {table}")
+
+ benchmark(queries)
+
@pytest.mark.parametrize(
"eng", [SedonaDBSingleThread, PostGISSingleThread, DuckDBSingleThread]
)
diff --git a/python/sedonadb/tests/functions/test_functions.py
b/python/sedonadb/tests/functions/test_functions.py
index a219e4f..addc774 100644
--- a/python/sedonadb/tests/functions/test_functions.py
+++ b/python/sedonadb/tests/functions/test_functions.py
@@ -719,8 +719,13 @@ def test_st_geomfromwkb(eng, geom):
("LINESTRING Z (0 0 0, 1 1 1)", True),
("POLYGON EMPTY", False),
("MULTIPOINT ((0 0), (1 1))", False),
+ ("MULTIPOINT Z ((0 0 0))", True),
+ ("MULTIPOINT ZM ((0 0 0 0))", True),
("GEOMETRYCOLLECTION EMPTY", False),
+ # Z-dim specified only in the nested geometry
("GEOMETRYCOLLECTION (POINT Z (0 0 0))", True),
+ # Z-dim specified on both levels
+ ("GEOMETRYCOLLECTION Z (POINT Z (0 0 0))", True),
("GEOMETRYCOLLECTION (GEOMETRYCOLLECTION (POINT Z (0 0 0)))", True),
],
)
diff --git a/rust/sedona-functions/src/st_haszm.rs
b/rust/sedona-functions/src/st_haszm.rs
index a4a20ff..2832013 100644
--- a/rust/sedona-functions/src/st_haszm.rs
+++ b/rust/sedona-functions/src/st_haszm.rs
@@ -16,19 +16,17 @@
// under the License.
use std::sync::Arc;
-use crate::executor::WkbExecutor;
+use crate::executor::WkbBytesExecutor;
use arrow_array::builder::BooleanBuilder;
use arrow_schema::DataType;
-use datafusion_common::error::Result;
+use datafusion_common::{error::Result, DataFusionError};
use datafusion_expr::{
scalar_doc_sections::DOC_SECTION_OTHER, ColumnarValue, Documentation,
Volatility,
};
-use geo_traits::GeometryCollectionTrait;
-use geo_traits::{Dimensions, GeometryTrait};
-use sedona_common::sedona_internal_err;
+use geo_traits::Dimensions;
use sedona_expr::scalar_udf::{SedonaScalarKernel, SedonaScalarUDF};
+use sedona_geometry::wkb_header::WkbHeader;
use sedona_schema::{datatypes::SedonaType, matchers::ArgMatcher};
-use wkb::reader::Wkb;
pub fn st_hasz_udf() -> SedonaScalarUDF {
SedonaScalarUDF::new(
@@ -91,13 +89,13 @@ impl SedonaScalarKernel for STHasZm {
_ => unreachable!(),
};
- let executor = WkbExecutor::new(arg_types, args);
+ let executor = WkbBytesExecutor::new(arg_types, args);
let mut builder =
BooleanBuilder::with_capacity(executor.num_iterations());
executor.execute_wkb_void(|maybe_item| {
match maybe_item {
Some(item) => {
- builder.append_option(invoke_scalar(&item, dim_index)?);
+ builder.append_option(invoke_scalar(item, dim_index)?);
}
None => builder.append_null(),
}
@@ -108,27 +106,34 @@ impl SedonaScalarKernel for STHasZm {
}
}
-fn invoke_scalar(item: &Wkb, dim_index: usize) -> Result<Option<bool>> {
- match item.as_type() {
- geo_traits::GeometryType::GeometryCollection(collection) => {
- if collection.num_geometries() == 0 {
- Ok(Some(false))
- } else {
- // PostGIS doesn't allow creating a GeometryCollection with
geometries of different dimensions
- // so we can just check the dimension of the first one
- let first_geom = unsafe { collection.geometry_unchecked(0) };
- invoke_scalar(first_geom, dim_index)
- }
- }
- _ => {
- let geom_dim = item.dim();
- match dim_index {
- 2 => Ok(Some(matches!(geom_dim, Dimensions::Xyz |
Dimensions::Xyzm))),
- 3 => Ok(Some(matches!(geom_dim, Dimensions::Xym |
Dimensions::Xyzm))),
- _ => sedona_internal_err!("unexpected dim_index"),
- }
- }
+fn invoke_scalar(buf: &[u8], dim_index: usize) -> Result<Option<bool>> {
+ let header = WkbHeader::try_new(buf).map_err(|e|
DataFusionError::External(Box::new(e)))?;
+ let top_level_dimensions = header
+ .dimensions()
+ .map_err(|e| DataFusionError::External(Box::new(e)))?;
+
+ // Infer dimension based on first coordinate dimension for cases where it
differs from top-level
+ // e.g GEOMETRYCOLLECTION (POINT Z (1 2 3))
+ let dimensions;
+ if let Some(first_geom_dimensions) = header.first_geom_dimensions() {
+ dimensions = first_geom_dimensions;
+ } else {
+ dimensions = top_level_dimensions;
+ }
+
+ if dim_index == 2 {
+ return Ok(Some(matches!(
+ dimensions,
+ Dimensions::Xyz | Dimensions::Xyzm
+ )));
+ }
+ if dim_index == 3 {
+ return Ok(Some(matches!(
+ dimensions,
+ Dimensions::Xym | Dimensions::Xyzm
+ )));
}
+ Ok(Some(false))
}
#[cfg(test)]
@@ -137,7 +142,9 @@ mod tests {
use datafusion_expr::ScalarUDF;
use rstest::rstest;
use sedona_schema::datatypes::{WKB_GEOMETRY, WKB_VIEW_GEOMETRY};
- use sedona_testing::testers::ScalarUdfTester;
+ use sedona_testing::{
+ fixtures::MULTIPOINT_WITH_INFERRED_Z_DIMENSION_WKB,
testers::ScalarUdfTester,
+ };
use super::*;
@@ -184,11 +191,19 @@ mod tests {
let result = m_tester.invoke_wkb_scalar(None).unwrap();
m_tester.assert_scalar_result_equals(result, ScalarValue::Null);
+ // Z-dimension specified only in the nested geometry, but not the geom
collection level
let result = z_tester
.invoke_wkb_scalar(Some("GEOMETRYCOLLECTION (POINT Z (1 2 3))"))
.unwrap();
z_tester.assert_scalar_result_equals(result,
ScalarValue::Boolean(Some(true)));
+ // Z-dimension specified on both the geom collection and nested
geometry level
+ // Geometry collection with Z dimension both on the geom collection
and nested geometry level
+ let result = z_tester
+ .invoke_wkb_scalar(Some("GEOMETRYCOLLECTION Z (POINT Z (1 2 3))"))
+ .unwrap();
+ z_tester.assert_scalar_result_equals(result,
ScalarValue::Boolean(Some(true)));
+
let result = m_tester
.invoke_wkb_scalar(Some("GEOMETRYCOLLECTION (POINT M (1 2 3))"))
.unwrap();
@@ -203,5 +218,32 @@ mod tests {
.invoke_wkb_scalar(Some("GEOMETRYCOLLECTION EMPTY"))
.unwrap();
m_tester.assert_scalar_result_equals(result,
ScalarValue::Boolean(Some(false)));
+
+ // Empty geometry collections with Z or M dimensions
+ let result = z_tester
+ .invoke_wkb_scalar(Some("GEOMETRYCOLLECTION Z EMPTY"))
+ .unwrap();
+ z_tester.assert_scalar_result_equals(result,
ScalarValue::Boolean(Some(true)));
+
+ let result = m_tester
+ .invoke_wkb_scalar(Some("GEOMETRYCOLLECTION M EMPTY"))
+ .unwrap();
+ m_tester.assert_scalar_result_equals(result,
ScalarValue::Boolean(Some(true)));
+ }
+
+ #[test]
+ fn multipoint_with_inferred_z_dimension() {
+ let z_tester = ScalarUdfTester::new(st_hasz_udf().into(),
vec![WKB_GEOMETRY]);
+ let m_tester = ScalarUdfTester::new(st_hasm_udf().into(),
vec![WKB_GEOMETRY]);
+
+ let scalar =
ScalarValue::Binary(Some(MULTIPOINT_WITH_INFERRED_Z_DIMENSION_WKB.to_vec()));
+ assert_eq!(
+ z_tester.invoke_scalar(scalar.clone()).unwrap(),
+ ScalarValue::Boolean(Some(true))
+ );
+ assert_eq!(
+ m_tester.invoke_scalar(scalar.clone()).unwrap(),
+ ScalarValue::Boolean(Some(false))
+ );
}
}
diff --git a/rust/sedona-geometry/Cargo.toml b/rust/sedona-geometry/Cargo.toml
index 8f12758..7d3a45c 100644
--- a/rust/sedona-geometry/Cargo.toml
+++ b/rust/sedona-geometry/Cargo.toml
@@ -30,6 +30,7 @@ result_large_err = "allow"
[dev-dependencies]
geo-types = { workspace = true }
rstest = { workspace = true }
+sedona-testing = { path = "../sedona-testing" }
serde_json = { workspace = true }
wkt = { workspace = true }
diff --git a/rust/sedona-geometry/src/lib.rs b/rust/sedona-geometry/src/lib.rs
index 65cc593..f189ec7 100644
--- a/rust/sedona-geometry/src/lib.rs
+++ b/rust/sedona-geometry/src/lib.rs
@@ -24,3 +24,4 @@ pub mod point_count;
pub mod transform;
pub mod types;
pub mod wkb_factory;
+pub mod wkb_header;
diff --git a/rust/sedona-geometry/src/wkb_header.rs
b/rust/sedona-geometry/src/wkb_header.rs
new file mode 100644
index 0000000..22f49a3
--- /dev/null
+++ b/rust/sedona-geometry/src/wkb_header.rs
@@ -0,0 +1,1012 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use geo_traits::Dimensions;
+
+use crate::error::SedonaGeometryError;
+use crate::types::GeometryTypeId;
+
+const Z_FLAG_BIT: u32 = 0x80000000;
+const M_FLAG_BIT: u32 = 0x40000000;
+const SRID_FLAG_BIT: u32 = 0x20000000;
+
+/// Fast-path WKB header parser
+/// Performs operations lazily and caches them after the first computation
+#[derive(Debug)]
+pub struct WkbHeader {
+ geometry_type: u32,
+ // Not applicable for a point
+ // number of points for a linestring
+ // number of rings for a polygon
+ // number of geometries for a MULTIPOINT, MULTILINESTRING, MULTIPOLYGON,
or GEOMETRYCOLLECTION
+ size: u32,
+ // SRID if given buffer was EWKB. Otherwise, 0.
+ srid: u32,
+ // First x,y coordinates for a point. Otherwise (f64::NAN, f64::NAN) if
empty
+ first_xy: (f64, f64),
+ // Dimensions of the first nested geometry of a collection or None if empty
+ // For POINT, LINESTRING, POLYGON, returns the dimensions of the geometry
+ first_geom_dimensions: Option<Dimensions>,
+}
+
+impl WkbHeader {
+ /// Creates a new [WkbHeader] from a buffer
+ pub fn try_new(buf: &[u8]) -> Result<Self, SedonaGeometryError> {
+ let mut wkb_buffer = WkbBuffer::new(buf);
+
+ wkb_buffer.read_endian()?;
+
+ let geometry_type = wkb_buffer.read_u32()?;
+
+ let geometry_type_id = GeometryTypeId::try_from_wkb_id(geometry_type &
0x7)?;
+
+ let mut srid = 0;
+ // if EWKB
+ if geometry_type & SRID_FLAG_BIT != 0 {
+ srid = wkb_buffer.read_u32()?;
+ }
+
+ let size = if geometry_type_id == GeometryTypeId::Point {
+ // Dummy value for a point
+ 1
+ } else {
+ wkb_buffer.read_u32()?
+ };
+
+ // Default values for empty geometries
+ let first_x;
+ let first_y;
+ let first_geom_dimensions: Option<Dimensions>;
+
+ wkb_buffer.set_offset(0);
+
+ let first_geom_idx = wkb_buffer.first_geom_idx()?;
+ if let Some(i) = first_geom_idx {
+ // Reset to first_geom_idx and parse the dimensions
+ wkb_buffer.set_offset(i);
+ // Parse dimension
+ wkb_buffer.read_endian()?;
+ let code = wkb_buffer.read_u32()?;
+ first_geom_dimensions = Some(calc_dimensions(code)?);
+
+ // For first_xy_coord, we need to pass the buffer starting from
the geometry header
+ wkb_buffer.set_offset(i);
+ (first_x, first_y) = wkb_buffer.first_xy_coord()?;
+ } else {
+ first_geom_dimensions = None;
+ first_x = f64::NAN;
+ first_y = f64::NAN;
+ }
+
+ Ok(Self {
+ geometry_type,
+ srid,
+ size,
+ first_xy: (first_x, first_y),
+ first_geom_dimensions,
+ })
+ }
+
+ /// Returns the [GeometryTypeId] of the WKB by only parsing the header
instead of the entire WKB
+ pub fn geometry_type_id(&self) -> Result<GeometryTypeId,
SedonaGeometryError> {
+ // Only low 3 bits is for the base type, high bits include additional
info
+ let code = self.geometry_type & 0x7;
+
+ let geometry_type_id = GeometryTypeId::try_from_wkb_id(code)?;
+
+ Ok(geometry_type_id)
+ }
+
+ /// Returns the size of the geometry
+ ///
+ /// - 1 for Points
+ /// - Number of points for a linestring
+ /// - Number of rings for a polygon
+ /// - Number of geometries for a MULTIPOINT, MULTILINESTRING,
MULTIPOLYGON, or GEOMETRYCOLLECTION
+ pub fn size(&self) -> u32 {
+ self.size
+ }
+
+ /// Returns the SRID if given buffer was EWKB. Otherwise, 0.
+ pub fn srid(&self) -> u32 {
+ self.srid
+ }
+
+ /// Returns the first x, y coordinates for a point. Otherwise (f64::NAN,
f64::NAN) if empty
+ pub fn first_xy(&self) -> (f64, f64) {
+ self.first_xy
+ }
+
+ /// Returns the top-level dimension of the WKB
+ pub fn dimensions(&self) -> Result<Dimensions, SedonaGeometryError> {
+ calc_dimensions(self.geometry_type)
+ }
+
+ /// Returns the dimensions of the first coordinate of the geometry
+ pub fn first_geom_dimensions(&self) -> Option<Dimensions> {
+ self.first_geom_dimensions
+ }
+}
+
+// A helper struct for calculating the WKBHeader
+struct WkbBuffer<'a> {
+ buf: &'a [u8],
+ offset: usize,
+ remaining: usize,
+ last_endian: u8,
+}
+
+impl<'a> WkbBuffer<'a> {
+ fn new(buf: &'a [u8]) -> Self {
+ Self {
+ buf,
+ offset: 0,
+ remaining: buf.len(),
+ last_endian: 0,
+ }
+ }
+
+ // For MULITPOINT, MULTILINESTRING, MULTIPOLYGON, or GEOMETRYCOLLECTION,
returns the index to the first nested
+ // non-collection geometry (POINT, LINESTRING, or POLYGON), or None if
empty
+ // For POINT, LINESTRING, POLYGON, returns 0 as it already is a
non-collection geometry
+ fn first_geom_idx(&mut self) -> Result<Option<usize>, SedonaGeometryError>
{
+ // Record the start of this geometry header so we can return an
absolute index
+ let start_offset = self.offset;
+
+ self.read_endian()?;
+ let geometry_type = self.read_u32()?;
+ let geometry_type_id = GeometryTypeId::try_from_wkb_id(geometry_type &
0x7)?;
+
+ match geometry_type_id {
+ GeometryTypeId::Point | GeometryTypeId::LineString |
GeometryTypeId::Polygon => {
+ // Return absolute offset to the start of this geometry header
+ Ok(Some(start_offset))
+ }
+ GeometryTypeId::MultiPoint
+ | GeometryTypeId::MultiLineString
+ | GeometryTypeId::MultiPolygon
+ | GeometryTypeId::GeometryCollection => {
+ if geometry_type & SRID_FLAG_BIT != 0 {
+ // Skip the SRID
+ self.read_u32()?;
+ }
+
+ let num_geometries = self.read_u32()?;
+
+ if num_geometries == 0 {
+ return Ok(None);
+ }
+
+ // Recursive call to get first non-collection geometry
+ self.first_geom_idx()
+ }
+ _ => Err(SedonaGeometryError::Invalid(format!(
+ "Unexpected geometry type: {geometry_type_id:?}"
+ ))),
+ }
+ }
+
+ // Given a point, linestring, or polygon, return the first xy coordinate
+ // If the geometry, is empty, (NaN, NaN) is returned
+ fn first_xy_coord(&mut self) -> Result<(f64, f64), SedonaGeometryError> {
+ self.read_endian()?;
+ let geometry_type = self.read_u32()?;
+
+ let geometry_type_id = GeometryTypeId::try_from_wkb_id(geometry_type &
0x7)?;
+
+ // Skip the SRID if it's present
+ if geometry_type & SRID_FLAG_BIT != 0 {
+ self.read_u32()?;
+ }
+
+ match geometry_type_id {
+ GeometryTypeId::LineString => {
+ let size = self.read_u32()?;
+ if size == 0 {
+ return Ok((f64::NAN, f64::NAN));
+ }
+ }
+ GeometryTypeId::Polygon => {
+ let size = self.read_u32()?;
+ if size == 0 {
+ return Ok((f64::NAN, f64::NAN));
+ }
+ let ring0_num_points = self.read_u32()?;
+ if ring0_num_points == 0 {
+ return Ok((f64::NAN, f64::NAN));
+ }
+ }
+ _ => {}
+ }
+
+ let x = self.read_coord()?;
+ let y = self.read_coord()?;
+ Ok((x, y))
+ }
+
+ fn read_endian(&mut self) -> Result<(), SedonaGeometryError> {
+ if self.remaining < 1 {
+ return Err(SedonaGeometryError::Invalid(format!(
+ "Invalid WKB: buffer too small. At offset: {}. Need 1 byte.",
+ self.offset
+ )));
+ }
+ self.last_endian = self.buf[self.offset];
+ self.remaining -= 1;
+ self.offset += 1;
+ Ok(())
+ }
+
+ fn read_u32(&mut self) -> Result<u32, SedonaGeometryError> {
+ if self.remaining < 4 {
+ return Err(SedonaGeometryError::Invalid(format!(
+ "Invalid WKB: buffer too small. At offset: {}. Need 4 bytes.",
+ self.offset
+ )));
+ }
+
+ let off = self.offset;
+ let num = match self.last_endian {
+ 0 => u32::from_be_bytes([
+ self.buf[off],
+ self.buf[off + 1],
+ self.buf[off + 2],
+ self.buf[off + 3],
+ ]),
+ 1 => u32::from_le_bytes([
+ self.buf[off],
+ self.buf[off + 1],
+ self.buf[off + 2],
+ self.buf[off + 3],
+ ]),
+ other => {
+ return Err(SedonaGeometryError::Invalid(format!(
+ "Unexpected byte order: {other:?}"
+ )))
+ }
+ };
+ self.remaining -= 4;
+ self.offset += 4;
+ Ok(num)
+ }
+
+ // Given a buffer starting at the coordinate itself, parse the x and y
coordinates
+ fn read_coord(&mut self) -> Result<f64, SedonaGeometryError> {
+ if self.remaining < 8 {
+ return Err(SedonaGeometryError::Invalid(format!(
+ "Invalid WKB: buffer too small. At offset: {}. Need 8 bytes.",
+ self.offset
+ )));
+ }
+
+ let buf = &self.buf;
+ let off = self.offset;
+ let coord: f64 = match self.last_endian {
+ 0 => f64::from_be_bytes([
+ buf[off],
+ buf[off + 1],
+ buf[off + 2],
+ buf[off + 3],
+ buf[off + 4],
+ buf[off + 5],
+ buf[off + 6],
+ buf[off + 7],
+ ]),
+ 1 => f64::from_le_bytes([
+ buf[off],
+ buf[off + 1],
+ buf[off + 2],
+ buf[off + 3],
+ buf[off + 4],
+ buf[off + 5],
+ buf[off + 6],
+ buf[off + 7],
+ ]),
+ other => {
+ return Err(SedonaGeometryError::Invalid(format!(
+ "Unexpected byte order: {other:?}"
+ )))
+ }
+ };
+ self.remaining -= 8;
+ self.offset += 8;
+
+ Ok(coord)
+ }
+
+ fn set_offset(&mut self, offset: usize) {
+ self.offset = offset;
+ self.remaining = self.buf.len() - offset;
+ }
+}
+
+fn calc_dimensions(code: u32) -> Result<Dimensions, SedonaGeometryError> {
+ // Check for EWKB Z and M flags
+ let hasz = (code & Z_FLAG_BIT) != 0;
+ let hasm = (code & M_FLAG_BIT) != 0;
+
+ match (hasz, hasm) {
+ (false, false) => {}
+ // If either flag is set, this must be EWKB (and not ISO WKB)
+ (true, false) => return Ok(Dimensions::Xyz),
+ (false, true) => return Ok(Dimensions::Xym),
+ (true, true) => return Ok(Dimensions::Xyzm),
+ }
+
+ // if SRID flag is set, then it must be EWKB with no z or m
+ if code & SRID_FLAG_BIT != 0 {
+ return Ok(Dimensions::Xy);
+ }
+
+ // Interpret as ISO WKB
+ match code / 1000 {
+ 0 => Ok(Dimensions::Xy),
+ 1 => Ok(Dimensions::Xyz),
+ 2 => Ok(Dimensions::Xym),
+ 3 => Ok(Dimensions::Xyzm),
+ _ => Err(SedonaGeometryError::Invalid(format!(
+ "Unexpected code: {:?}",
+ code
+ ))),
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+ use std::str::FromStr;
+ use wkb::writer::{write_geometry, WriteOptions};
+ use wkt::Wkt;
+
+ fn make_wkb(wkt_value: &'static str) -> Vec<u8> {
+ let geom = Wkt::<f64>::from_str(wkt_value).unwrap();
+ let mut buf: Vec<u8> = vec![];
+ write_geometry(&mut buf, &geom, &WriteOptions::default()).unwrap();
+ buf
+ }
+
+ #[test]
+ fn geometry_type_id() {
+ let wkb = make_wkb("POINT (1 2)");
+ let header = WkbHeader::try_new(&wkb).unwrap();
+ assert_eq!(header.geometry_type_id().unwrap(), GeometryTypeId::Point);
+
+ let wkb = make_wkb("LINESTRING (1 2, 3 4)");
+ let header = WkbHeader::try_new(&wkb).unwrap();
+ assert_eq!(
+ header.geometry_type_id().unwrap(),
+ GeometryTypeId::LineString
+ );
+
+ let wkb = make_wkb("POLYGON ((0 0, 0 1, 1 0, 0 0))");
+ let header = WkbHeader::try_new(&wkb).unwrap();
+ assert_eq!(header.geometry_type_id().unwrap(),
GeometryTypeId::Polygon);
+
+ let wkb = make_wkb("MULTIPOINT ((1 2), (3 4))");
+ let header = WkbHeader::try_new(&wkb).unwrap();
+ assert_eq!(
+ header.geometry_type_id().unwrap(),
+ GeometryTypeId::MultiPoint
+ );
+
+ let wkb = make_wkb("MULTILINESTRING ((1 2, 3 4))");
+ let header = WkbHeader::try_new(&wkb).unwrap();
+ assert_eq!(
+ header.geometry_type_id().unwrap(),
+ GeometryTypeId::MultiLineString
+ );
+
+ let wkb = make_wkb("MULTIPOLYGON (((0 0, 0 1, 1 0, 0 0)))");
+ let header = WkbHeader::try_new(&wkb).unwrap();
+ assert_eq!(
+ header.geometry_type_id().unwrap(),
+ GeometryTypeId::MultiPolygon
+ );
+
+ let wkb = make_wkb("GEOMETRYCOLLECTION (POINT (1 2))");
+ let header = WkbHeader::try_new(&wkb).unwrap();
+ assert_eq!(
+ header.geometry_type_id().unwrap(),
+ GeometryTypeId::GeometryCollection
+ );
+
+ // Some cases with z and m dimensions
+ let wkb = make_wkb("POINT Z (1 2 3)");
+ let header = WkbHeader::try_new(&wkb).unwrap();
+ assert_eq!(header.geometry_type_id().unwrap(), GeometryTypeId::Point);
+
+ let wkb = make_wkb("LINESTRING Z (1 2 3, 4 5 6)");
+ let header = WkbHeader::try_new(&wkb).unwrap();
+ assert_eq!(
+ header.geometry_type_id().unwrap(),
+ GeometryTypeId::LineString
+ );
+
+ let wkb = make_wkb("POLYGON M ((0 0 0, 0 1 0, 1 0 0, 0 0 0))");
+ let header = WkbHeader::try_new(&wkb).unwrap();
+ assert_eq!(header.geometry_type_id().unwrap(),
GeometryTypeId::Polygon);
+ }
+
+ #[test]
+ fn size() {
+ let wkb = make_wkb("LINESTRING (1 2, 3 4)");
+ let header = WkbHeader::try_new(&wkb).unwrap();
+ assert_eq!(header.size(), 2);
+
+ let wkb = make_wkb("POLYGON ((0 0, 0 1, 1 0, 0 0), (1 1, 1 2, 2 1, 1
1))");
+ let header = WkbHeader::try_new(&wkb).unwrap();
+ assert_eq!(header.size(), 2);
+
+ let wkb = make_wkb("MULTIPOINT ((1 2), (3 4))");
+ let header = WkbHeader::try_new(&wkb).unwrap();
+ assert_eq!(header.size(), 2);
+
+ let wkb = make_wkb("MULTILINESTRING ((1 2, 3 4, 5 6), (7 8, 9 10, 11
12))");
+ let header = WkbHeader::try_new(&wkb).unwrap();
+ assert_eq!(header.size(), 2);
+
+ let wkb = make_wkb("MULTIPOLYGON (((0 0, 0 1, 1 0, 0 0)), ((1 1, 1 2,
2 1, 1 1)))");
+ let header = WkbHeader::try_new(&wkb).unwrap();
+ assert_eq!(header.size(), 2);
+
+ let wkb = make_wkb("GEOMETRYCOLLECTION (POINT (1 2))");
+ let header = WkbHeader::try_new(&wkb).unwrap();
+ assert_eq!(header.size(), 1);
+
+ let wkb = make_wkb("GEOMETRYCOLLECTION (POINT (1 2), LINESTRING (1 2,
3 4), POLYGON ((0 0, 0 1, 1 0, 0 0)))");
+ let header = WkbHeader::try_new(&wkb).unwrap();
+ assert_eq!(header.size(), 3);
+ }
+
+ #[test]
+ fn empty_size() {
+ let wkb = make_wkb("LINESTRING EMPTY");
+ let header = WkbHeader::try_new(&wkb).unwrap();
+ assert_eq!(header.size(), 0);
+
+ let wkb = make_wkb("POLYGON EMPTY");
+ let header = WkbHeader::try_new(&wkb).unwrap();
+ assert_eq!(header.size(), 0);
+
+ let wkb = make_wkb("MULTIPOINT EMPTY");
+ let header = WkbHeader::try_new(&wkb).unwrap();
+ assert_eq!(header.size(), 0);
+
+ let wkb = make_wkb("MULTILINESTRING EMPTY");
+ let header = WkbHeader::try_new(&wkb).unwrap();
+ assert_eq!(header.size(), 0);
+
+ let wkb = make_wkb("MULTIPOLYGON EMPTY");
+ let header = WkbHeader::try_new(&wkb).unwrap();
+ assert_eq!(header.size(), 0);
+
+ let wkb = make_wkb("GEOMETRYCOLLECTION EMPTY");
+ let header = WkbHeader::try_new(&wkb).unwrap();
+ assert_eq!(header.size(), 0);
+
+ let wkb = make_wkb("GEOMETRYCOLLECTION Z EMPTY");
+ let header = WkbHeader::try_new(&wkb).unwrap();
+ assert_eq!(header.size(), 0);
+ }
+
+ #[test]
+ fn ewkb() {
+ use sedona_testing::fixtures::*;
+
+ // Test POINT with SRID 4326
+ let header = WkbHeader::try_new(&POINT_WITH_SRID_4326_EWKB).unwrap();
+ assert_eq!(header.srid(), 4326);
+ assert_eq!(header.geometry_type_id().unwrap(), GeometryTypeId::Point);
+ assert_eq!(header.first_xy(), (1.0, 2.0));
+ assert_eq!(header.dimensions().unwrap(), Dimensions::Xy);
+
+ // Test POINT Z with SRID 3857
+ let header = WkbHeader::try_new(&POINT_Z_WITH_SRID_3857_EWKB).unwrap();
+ assert_eq!(header.srid(), 3857);
+ assert_eq!(header.geometry_type_id().unwrap(), GeometryTypeId::Point);
+ assert_eq!(header.first_xy(), (1.0, 2.0));
+ assert_eq!(header.dimensions().unwrap(), Dimensions::Xyz);
+
+ // Test POINT M with SRID 4326
+ let header = WkbHeader::try_new(&POINT_M_WITH_SRID_4326_EWKB).unwrap();
+ assert_eq!(header.srid(), 4326);
+ assert_eq!(header.geometry_type_id().unwrap(), GeometryTypeId::Point);
+ assert_eq!(header.first_xy(), (1.0, 2.0));
+ assert_eq!(header.dimensions().unwrap(), Dimensions::Xym);
+
+ // Test POINT ZM with SRID 4326
+ let header =
WkbHeader::try_new(&POINT_ZM_WITH_SRID_4326_EWKB).unwrap();
+ assert_eq!(header.srid(), 4326);
+ assert_eq!(header.geometry_type_id().unwrap(), GeometryTypeId::Point);
+ assert_eq!(header.first_xy(), (1.0, 2.0));
+ assert_eq!(header.dimensions().unwrap(), Dimensions::Xyzm);
+
+ // Test GEOMETRYCOLLECTION with SRID 4326
+ let header =
WkbHeader::try_new(&GEOMETRYCOLLECTION_POINT_WITH_SRID_4326_EWKB).unwrap();
+ assert_eq!(header.srid(), 4326);
+ assert_eq!(
+ header.geometry_type_id().unwrap(),
+ GeometryTypeId::GeometryCollection
+ );
+ assert_eq!(header.size(), 1);
+ assert_eq!(header.first_xy(), (1.0, 2.0));
+ assert_eq!(header.dimensions().unwrap(), Dimensions::Xy);
+ assert_eq!(header.first_geom_dimensions().unwrap(), Dimensions::Xy);
+
+ // Test GEOMETRYCOLLECTION Z with SRID 4326
+ let header =
WkbHeader::try_new(&GEOMETRYCOLLECTION_POINT_Z_WITH_SRID_4326_EWKB).unwrap();
+ assert_eq!(header.srid(), 4326);
+ assert_eq!(
+ header.geometry_type_id().unwrap(),
+ GeometryTypeId::GeometryCollection
+ );
+ assert_eq!(header.size(), 1);
+ assert_eq!(header.first_xy(), (1.0, 2.0));
+ // Outer dimension specified as Xy, but inner dimension is Xyz
+ assert_eq!(header.dimensions().unwrap(), Dimensions::Xy);
+ assert_eq!(header.first_geom_dimensions().unwrap(), Dimensions::Xyz);
+
+ // Test GEOMETRYCOLLECTION M with SRID 4326
+ let header =
WkbHeader::try_new(&GEOMETRYCOLLECTION_POINT_M_WITH_SRID_4326_EWKB).unwrap();
+ assert_eq!(header.srid(), 4326);
+ assert_eq!(
+ header.geometry_type_id().unwrap(),
+ GeometryTypeId::GeometryCollection
+ );
+ assert_eq!(header.size(), 1);
+ assert_eq!(header.first_xy(), (1.0, 2.0));
+ // Outer dimension specified as Xy, but inner dimension is Xym
+ assert_eq!(header.dimensions().unwrap(), Dimensions::Xy);
+ assert_eq!(header.first_geom_dimensions().unwrap(), Dimensions::Xym);
+
+ // Test GEOMETRYCOLLECTION ZM with SRID 4326
+ let header =
WkbHeader::try_new(&GEOMETRYCOLLECTION_POINT_ZM_WITH_SRID_4326_EWKB).unwrap();
+ assert_eq!(header.srid(), 4326);
+ assert_eq!(
+ header.geometry_type_id().unwrap(),
+ GeometryTypeId::GeometryCollection
+ );
+ assert_eq!(header.size(), 1);
+ assert_eq!(header.first_xy(), (1.0, 2.0));
+ // Outer dimension specified as Xy, but inner dimension is Xyzm
+ assert_eq!(header.dimensions().unwrap(), Dimensions::Xy);
+ assert_eq!(header.first_geom_dimensions().unwrap(), Dimensions::Xyzm);
+ }
+
+ #[test]
+ fn srid_linestring() {
+ use sedona_testing::fixtures::*;
+
+ let header =
WkbHeader::try_new(&LINESTRING_WITH_SRID_4326_EWKB).unwrap();
+ assert_eq!(header.srid(), 4326);
+ assert_eq!(
+ header.geometry_type_id().unwrap(),
+ GeometryTypeId::LineString
+ );
+ assert_eq!(header.size(), 2);
+ assert_eq!(header.first_xy(), (1.0, 2.0));
+ assert_eq!(header.dimensions().unwrap(), Dimensions::Xy);
+ }
+
+ #[test]
+ fn srid_polygon() {
+ use sedona_testing::fixtures::*;
+
+ let header = WkbHeader::try_new(&POLYGON_WITH_SRID_4326_EWKB).unwrap();
+ assert_eq!(header.srid(), 4326);
+ assert_eq!(header.geometry_type_id().unwrap(),
GeometryTypeId::Polygon);
+ assert_eq!(header.size(), 1);
+ assert_eq!(header.first_xy(), (0.0, 0.0));
+ assert_eq!(header.dimensions().unwrap(), Dimensions::Xy);
+ }
+
+ #[test]
+ fn multipoint_with_srid() {
+ use sedona_testing::fixtures::*;
+
+ let header =
WkbHeader::try_new(&MULTIPOINT_WITH_SRID_4326_EWKB).unwrap();
+ assert_eq!(header.srid(), 4326);
+ assert_eq!(
+ header.geometry_type_id().unwrap(),
+ GeometryTypeId::MultiPoint
+ );
+ assert_eq!(header.size(), 2);
+ assert_eq!(header.first_xy(), (1.0, 2.0));
+ assert_eq!(header.dimensions().unwrap(), Dimensions::Xy);
+ }
+
+ #[test]
+ fn srid_empty_geometries_with_srid() {
+ use sedona_testing::fixtures::*;
+
+ // Test POINT EMPTY with SRID
+ let header =
WkbHeader::try_new(&POINT_EMPTY_WITH_SRID_4326_EWKB).unwrap();
+ assert_eq!(header.srid(), 4326);
+ assert_eq!(header.geometry_type_id().unwrap(), GeometryTypeId::Point);
+ assert_eq!(header.dimensions().unwrap(), Dimensions::Xy);
+
+ // Test GEOMETRYCOLLECTION EMPTY with SRID
+ let header =
WkbHeader::try_new(&GEOMETRYCOLLECTION_EMPTY_WITH_SRID_4326_EWKB).unwrap();
+ assert_eq!(header.srid(), 4326);
+ assert_eq!(
+ header.geometry_type_id().unwrap(),
+ GeometryTypeId::GeometryCollection
+ );
+ assert_eq!(header.size(), 0);
+ assert_eq!(header.dimensions().unwrap(), Dimensions::Xy);
+ assert_eq!(header.first_geom_dimensions(), None);
+ }
+
+ #[test]
+ fn srid_no_srid_flag() {
+ // Test that regular WKB (without SRID flag) returns 0 for SRID
+ let wkb = make_wkb("POINT (1 2)");
+ let header = WkbHeader::try_new(&wkb).unwrap();
+ assert_eq!(header.srid(), 0);
+ }
+
+ #[test]
+ fn first_xy() {
+ let wkb = make_wkb("POINT (-5 -2)");
+ let header = WkbHeader::try_new(&wkb).unwrap();
+ assert_eq!(header.first_xy(), (-5.0, -2.0));
+
+ let wkb = make_wkb("LINESTRING (1 2, 3 4)");
+ let header = WkbHeader::try_new(&wkb).unwrap();
+ assert_eq!(header.first_xy(), (1.0, 2.0));
+
+ let wkb = make_wkb("POLYGON ((0 0, 0 1, 1 0, 0 0))");
+ let header = WkbHeader::try_new(&wkb).unwrap();
+ assert_eq!(header.first_xy(), (0.0, 0.0));
+
+ // Another polygon test since that logic is more complicated
+ let wkb = make_wkb("POLYGON ((1.5 0.5, 1.5 1.5, 1.5 0.5), (0 0, 0 1, 1
0, 0 0))");
+ let header = WkbHeader::try_new(&wkb).unwrap();
+ assert_eq!(header.first_xy(), (1.5, 0.5));
+
+ let wkb = make_wkb("MULTIPOINT ((1 2), (3 4))");
+ let header = WkbHeader::try_new(&wkb).unwrap();
+ assert_eq!(header.first_xy(), (1.0, 2.0));
+
+ let wkb = make_wkb("MULTILINESTRING ((3 4, 1 2), (5 6, 7 8))");
+ let header = WkbHeader::try_new(&wkb).unwrap();
+ assert_eq!(header.first_xy(), (3.0, 4.0));
+
+ let wkb = make_wkb("MULTIPOLYGON (((-1 -1, 0 1, 1 -1, -1 -1)), ((0 0,
0 1, 1 0, 0 0)))");
+ let header = WkbHeader::try_new(&wkb).unwrap();
+ assert_eq!(header.first_xy(), (-1.0, -1.0));
+
+ let wkb = make_wkb("GEOMETRYCOLLECTION (POINT (1 2))");
+ let header = WkbHeader::try_new(&wkb).unwrap();
+ assert_eq!(header.first_xy(), (1.0, 2.0));
+
+ let wkb = make_wkb("GEOMETRYCOLLECTION (POINT (1 2), LINESTRING (1 2,
3 4), POLYGON ((0 0, 0 1, 1 0, 0 0)))");
+ let header = WkbHeader::try_new(&wkb).unwrap();
+ assert_eq!(header.first_xy(), (1.0, 2.0));
+ }
+
+ #[test]
+ fn empty_first_xy() {
+ let wkb = make_wkb("POINT EMPTY");
+ let header = WkbHeader::try_new(&wkb).unwrap();
+ let (x, y) = header.first_xy();
+ assert!(x.is_nan());
+ assert!(y.is_nan());
+
+ let wkb = make_wkb("LINESTRING EMPTY");
+ let header = WkbHeader::try_new(&wkb).unwrap();
+ let (x, y) = header.first_xy();
+ assert!(x.is_nan());
+ assert!(y.is_nan());
+
+ let wkb = make_wkb("GEOMETRYCOLLECTION Z EMPTY");
+ let header = WkbHeader::try_new(&wkb).unwrap();
+ let (x, y) = header.first_xy();
+ assert!(x.is_nan());
+ assert!(y.is_nan());
+ }
+
+ #[test]
+ fn empty_geometry_type_id() {
+ let wkb = make_wkb("POINT EMPTY");
+ let header = WkbHeader::try_new(&wkb).unwrap();
+ assert_eq!(header.geometry_type_id().unwrap(), GeometryTypeId::Point);
+
+ let wkb = make_wkb("LINESTRING EMPTY");
+ let header = WkbHeader::try_new(&wkb).unwrap();
+ assert_eq!(
+ header.geometry_type_id().unwrap(),
+ GeometryTypeId::LineString
+ );
+
+ let wkb = make_wkb("POLYGON EMPTY");
+ let header = WkbHeader::try_new(&wkb).unwrap();
+ assert_eq!(header.geometry_type_id().unwrap(),
GeometryTypeId::Polygon);
+
+ let wkb = make_wkb("MULTIPOINT EMPTY");
+ let header = WkbHeader::try_new(&wkb).unwrap();
+ assert_eq!(
+ header.geometry_type_id().unwrap(),
+ GeometryTypeId::MultiPoint
+ );
+
+ let wkb = make_wkb("MULTILINESTRING EMPTY");
+ let header = WkbHeader::try_new(&wkb).unwrap();
+ assert_eq!(
+ header.geometry_type_id().unwrap(),
+ GeometryTypeId::MultiLineString
+ );
+
+ let wkb = make_wkb("MULTIPOLYGON EMPTY");
+ let header = WkbHeader::try_new(&wkb).unwrap();
+ assert_eq!(
+ header.geometry_type_id().unwrap(),
+ GeometryTypeId::MultiPolygon
+ );
+
+ let wkb = make_wkb("GEOMETRYCOLLECTION EMPTY");
+ let header = WkbHeader::try_new(&wkb).unwrap();
+ assert_eq!(
+ header.geometry_type_id().unwrap(),
+ GeometryTypeId::GeometryCollection
+ );
+
+ // z, m cases
+ let wkb = make_wkb("POINT Z EMPTY");
+ let header = WkbHeader::try_new(&wkb).unwrap();
+ assert_eq!(header.geometry_type_id().unwrap(), GeometryTypeId::Point);
+
+ let wkb = make_wkb("POINT M EMPTY");
+ let header = WkbHeader::try_new(&wkb).unwrap();
+ assert_eq!(header.geometry_type_id().unwrap(), GeometryTypeId::Point);
+
+ let wkb = make_wkb("LINESTRING ZM EMPTY");
+ let header = WkbHeader::try_new(&wkb).unwrap();
+ assert_eq!(
+ header.geometry_type_id().unwrap(),
+ GeometryTypeId::LineString
+ );
+ }
+
+ #[test]
+ fn dimensions() {
+ let wkb = make_wkb("POINT (1 2)");
+ let header = WkbHeader::try_new(&wkb).unwrap();
+ assert_eq!(header.dimensions().unwrap(), Dimensions::Xy);
+
+ let wkb = make_wkb("POINT Z (1 2 3)");
+ let header = WkbHeader::try_new(&wkb).unwrap();
+ assert_eq!(header.dimensions().unwrap(), Dimensions::Xyz);
+
+ let wkb = make_wkb("POINT M (1 2 3)");
+ let header = WkbHeader::try_new(&wkb).unwrap();
+ assert_eq!(header.dimensions().unwrap(), Dimensions::Xym);
+
+ let wkb = make_wkb("POINT ZM (1 2 3 4)");
+ let header = WkbHeader::try_new(&wkb).unwrap();
+ assert_eq!(header.dimensions().unwrap(), Dimensions::Xyzm);
+ }
+
+ #[test]
+ fn empty_geometry_dimensions() {
+ // POINTs
+ let wkb = make_wkb("POINT EMPTY");
+ let header = WkbHeader::try_new(&wkb).unwrap();
+ assert_eq!(header.dimensions().unwrap(), Dimensions::Xy);
+
+ let wkb = make_wkb("POINT Z EMPTY");
+ let header = WkbHeader::try_new(&wkb).unwrap();
+ assert_eq!(header.dimensions().unwrap(), Dimensions::Xyz);
+
+ let wkb = make_wkb("POINT M EMPTY");
+ let header = WkbHeader::try_new(&wkb).unwrap();
+ assert_eq!(header.dimensions().unwrap(), Dimensions::Xym);
+
+ let wkb = make_wkb("POINT ZM EMPTY");
+ let header = WkbHeader::try_new(&wkb).unwrap();
+ assert_eq!(header.dimensions().unwrap(), Dimensions::Xyzm);
+
+ // GEOMETRYCOLLECTIONs
+ let wkb = make_wkb("GEOMETRYCOLLECTION EMPTY");
+ let header = WkbHeader::try_new(&wkb).unwrap();
+ assert_eq!(header.dimensions().unwrap(), Dimensions::Xy);
+
+ let wkb = make_wkb("GEOMETRYCOLLECTION Z EMPTY");
+ let header = WkbHeader::try_new(&wkb).unwrap();
+ assert_eq!(header.dimensions().unwrap(), Dimensions::Xyz);
+
+ let wkb = make_wkb("GEOMETRYCOLLECTION M EMPTY");
+ let header = WkbHeader::try_new(&wkb).unwrap();
+ assert_eq!(header.dimensions().unwrap(), Dimensions::Xym);
+
+ let wkb = make_wkb("GEOMETRYCOLLECTION ZM EMPTY");
+ let header = WkbHeader::try_new(&wkb).unwrap();
+ assert_eq!(header.dimensions().unwrap(), Dimensions::Xyzm);
+ }
+
+ #[test]
+ fn first_geom_dimensions() {
+ // Top-level dimension is xy, while nested geometry is xyz
+ let wkb = make_wkb("GEOMETRYCOLLECTION (POINT Z (1 2 3))");
+ let header = WkbHeader::try_new(&wkb).unwrap();
+ assert_eq!(header.first_geom_dimensions().unwrap(), Dimensions::Xyz);
+ assert_eq!(header.dimensions().unwrap(), Dimensions::Xy);
+
+ let wkb = make_wkb("GEOMETRYCOLLECTION (POINT ZM (1 2 3 4))");
+ let header = WkbHeader::try_new(&wkb).unwrap();
+ assert_eq!(header.first_geom_dimensions().unwrap(), Dimensions::Xyzm);
+
+ let wkb = make_wkb("GEOMETRYCOLLECTION (POINT M (1 2 3))");
+ let header = WkbHeader::try_new(&wkb).unwrap();
+ assert_eq!(header.first_geom_dimensions().unwrap(), Dimensions::Xym);
+
+ let wkb = make_wkb("GEOMETRYCOLLECTION (POINT ZM (1 2 3 4))");
+ let header = WkbHeader::try_new(&wkb).unwrap();
+ assert_eq!(header.first_geom_dimensions().unwrap(), Dimensions::Xyzm);
+ }
+
+ #[test]
+ fn empty_geometry_first_geom_dimensions() {
+ let wkb = make_wkb("POINT EMPTY");
+ let header = WkbHeader::try_new(&wkb).unwrap();
+ assert_eq!(header.first_geom_dimensions(), Some(Dimensions::Xy));
+
+ let wkb = make_wkb("LINESTRING EMPTY");
+ let header = WkbHeader::try_new(&wkb).unwrap();
+ assert_eq!(header.first_geom_dimensions(), Some(Dimensions::Xy));
+
+ let wkb = make_wkb("POLYGON Z EMPTY");
+ let header = WkbHeader::try_new(&wkb).unwrap();
+ assert_eq!(header.first_geom_dimensions(), Some(Dimensions::Xyz));
+
+ // Empty collections should return None
+ let wkb = make_wkb("MULTIPOINT EMPTY");
+ let header = WkbHeader::try_new(&wkb).unwrap();
+ assert_eq!(header.first_geom_dimensions(), None);
+
+ let wkb = make_wkb("MULTILINESTRING Z EMPTY");
+ let header = WkbHeader::try_new(&wkb).unwrap();
+ assert_eq!(header.first_geom_dimensions(), None);
+
+ let wkb = make_wkb("MULTIPOLYGON M EMPTY");
+ let header = WkbHeader::try_new(&wkb).unwrap();
+ assert_eq!(header.first_geom_dimensions(), None);
+
+ let wkb = make_wkb("GEOMETRYCOLLECTION ZM EMPTY");
+ let header = WkbHeader::try_new(&wkb).unwrap();
+ assert_eq!(header.first_geom_dimensions(), None);
+ }
+
+ #[test]
+ fn incomplete_buffers() {
+ // Test various incomplete buffer scenarios to ensure proper error
handling
+
+ // Empty buffer
+ let result = WkbHeader::try_new(&[]);
+ assert!(result.is_err());
+
+ // Test truncation of a simple POINT
+ let wkb = make_wkb("POINT (1 2)");
+ for i in 1..wkb.len() - 1 {
+ assert!(
+ WkbHeader::try_new(&wkb[0..i]).is_err(),
+ "0..{} unexpectedly succeeded",
+ i
+ );
+ }
+
+ // Test truncation of a POINT ZM
+ // Iterate through all i that is less than the number needed for the
first_xy coord
+ // 1 byte_order + 4 geometry type + 8 x + 8 y
+ let last_i = 1 + 4 + 8 + 8;
+ let wkb = make_wkb("POINT ZM (1 2 3 4)");
+ for i in 1..last_i {
+ assert!(
+ WkbHeader::try_new(&wkb[0..i]).is_err(),
+ "0..{} unexpectedly succeeded",
+ i
+ );
+ }
+
+ // Test truncation of a GEOMETRYCOLLECTION with nested geometries
+ // Iterate through all i that is less than the number needed for the
first_xy coord
+ // 1 byte_order + 4 geometry type + 4 size + 8 x + 8 y
+ let last_i = 1 + 4 + 4 + 8 + 8;
+ let wkb = make_wkb("GEOMETRYCOLLECTION (POINT (1 2), LINESTRING (1 2,
3 4))");
+ for i in 1..last_i {
+ assert!(
+ WkbHeader::try_new(&wkb[0..i]).is_err(),
+ "0..{} unexpectedly succeeded",
+ i
+ );
+ }
+ }
+
+ #[test]
+ fn incomplete_ewkb_buffers() {
+ use sedona_testing::fixtures::*;
+ // Test incomplete EWKB buffers
+
+ // 1 byte_order + 4 geometry type + 4 srid + 8 x + 8 y
+ let wkb = POINT_WITH_SRID_4326_EWKB;
+ let last_i = 1 + 4 + 4 + 8 + 8;
+ for i in 1..last_i {
+ assert!(
+ WkbHeader::try_new(&wkb[0..i]).is_err(),
+ "0..{} unexpectedly succeeded",
+ i
+ );
+ }
+
+ // 1 byte_order + 4 geometry type + 4 srid + 4 size + 1 byte_order + 4
geometry type + 8 x + 8 y
+ let last_i = 1 + 4 + 4 + 4 + 1 + 4 + 8 + 8;
+ let wkb = MULTIPOINT_WITH_SRID_4326_EWKB;
+ for i in 1..last_i {
+ assert!(
+ WkbHeader::try_new(&wkb[0..i]).is_err(),
+ "0..{} unexpectedly succeeded",
+ i
+ );
+ }
+
+ // 1 byte_order + 4 geometry type + 4 srid + 4 size + 1 byte_order + 4
geometry type + 8 x + 8 y
+ let last_i = 1 + 4 + 4 + 4 + 1 + 4 + 8 + 8;
+ let wkb = GEOMETRYCOLLECTION_POINT_ZM_WITH_SRID_4326_EWKB;
+ for i in 1..last_i {
+ assert!(
+ WkbHeader::try_new(&wkb[0..i]).is_err(),
+ "0..{} unexpectedly succeeded",
+ i
+ );
+ }
+ }
+
+ #[test]
+ fn invalid_byte_order() {
+ // Test invalid byte order values
+ let result = WkbHeader::try_new(&[0x02, 0x01, 0x00, 0x00, 0x00]);
+ assert!(result.is_err());
+
+ let result = WkbHeader::try_new(&[0xff, 0x01, 0x00, 0x00, 0x00]);
+ assert!(result.is_err());
+ }
+
+ #[test]
+ fn nested_geometry_collections() {
+ let wkb = make_wkb("GEOMETRYCOLLECTION (GEOMETRYCOLLECTION (POINT (1
2)))");
+ let header = WkbHeader::try_new(&wkb).unwrap();
+ assert_eq!(
+ header.geometry_type_id().unwrap(),
+ GeometryTypeId::GeometryCollection
+ );
+ assert_eq!(header.size(), 1);
+ assert_eq!(header.first_xy(), (1.0, 2.0));
+ assert_eq!(header.dimensions().unwrap(), Dimensions::Xy);
+ assert_eq!(header.first_geom_dimensions().unwrap(), Dimensions::Xy);
+
+ let wkb = make_wkb("GEOMETRYCOLLECTION (GEOMETRYCOLLECTION (POINT ZM
(1 2 3 4)))");
+ let header = WkbHeader::try_new(&wkb).unwrap();
+ assert_eq!(
+ header.geometry_type_id().unwrap(),
+ GeometryTypeId::GeometryCollection
+ );
+ assert_eq!(header.size(), 1);
+ assert_eq!(header.first_xy(), (1.0, 2.0));
+ assert_eq!(header.dimensions().unwrap(), Dimensions::Xy);
+ assert_eq!(header.first_geom_dimensions().unwrap(), Dimensions::Xyzm);
+ }
+}
diff --git a/rust/sedona-testing/src/fixtures.rs
b/rust/sedona-testing/src/fixtures.rs
index 13011bf..b4c1bb1 100644
--- a/rust/sedona-testing/src/fixtures.rs
+++ b/rust/sedona-testing/src/fixtures.rs
@@ -28,6 +28,192 @@ pub const MULTIPOINT_WITH_EMPTY_CHILD_WKB: [u8; 30] = [
0x00, 0x00, 0x00, 0x00, 0xf8, 0x7f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xf8, 0x7f,
];
+/// A well-known binary blob of MULTIPOINT ((1 2 3)) where outer dimension is
specified for xy
+/// while inner point's dimension is actually xyz
+pub const MULTIPOINT_WITH_INFERRED_Z_DIMENSION_WKB: [u8; 38] = [
+ 0x01, // byte-order
+ 0x04, 0x00, 0x00, 0x00, // multipoint with xy-dimension specified
+ 0x01, 0x00, 0x00, 0x00, // 1 point
+ // nested point geom
+ 0x01, // byte-order
+ 0xe9, 0x03, 0x00, 0x00, // point with xyz-dimension specified
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf0, 0x3f, // x-coordinate of point
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, // y-coordinate of point
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x40, // z-coordinate of point
+];
+
+/// EWKB for POINT (1 2) with SRID 4326
+/// Little endian, geometry type 1 (POINT) with SRID flag (0x20000000)
+pub const POINT_WITH_SRID_4326_EWKB: [u8; 25] = [
+ 0x01, // byte-order
+ 0x01, 0x00, 0x00, 0x20, // geometry type 1 (POINT) with SRID flag
(0x20000000)
+ 0xe6, 0x10, 0x00, 0x00, // SRID 4326
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf0, 0x3f, // x-coordinate 1.0
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, // y-coordinate 2.0
+];
+
+/// EWKB for POINT Z (1 2 3) with SRID 3857
+/// Little endian, geometry type 1001 (POINT Z) with SRID flag
+pub const POINT_Z_WITH_SRID_3857_EWKB: [u8; 33] = [
+ 0x01, // byte-order
+ 0x01, 0x00, 0x00, 0xa0, // geometry type
+ // 0xe9, 0x03, 0x00, 0x20, // geometry type 1001 (POINT Z) with SRID flag
+ 0x11, 0x0f, 0x00, 0x00, // SRID 3857
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf0, 0x3f, // x-coordinate 1.0
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, // y-coordinate 2.0
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x40, // z-coordinate 3.0
+];
+
+pub const POINT_M_WITH_SRID_4326_EWKB: [u8; 33] = [
+ 0x01, // byte-order
+ 0x01, 0x00, 0x00, 0x60, // geometry type
+ 0xe6, 0x10, 0x00, 0x00, // SRID
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf0, 0x3f, // x-coordinate 1.0
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, // y-coordinate 2.0
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x40, // m-coordinate 3.0
+];
+
+/// EWKB for POINT ZM (1 2 3 4) with SRID 4326
+pub const POINT_ZM_WITH_SRID_4326_EWKB: [u8; 41] = [
+ 0x01, // byte-order
+ 0x01, 0x00, 0x00, 0xe0, // geometry type
+ // 0xb9, 0x0b, 0x00, 0x20, // geometry type 3001 (POINT ZM) with SRID flag
+ 0xe6, 0x10, 0x00, 0x00, // SRID 4326
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf0, 0x3f, // x = 1.0
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, // y = 2.0
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x40, // z = 3.0
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x40, // m = 4.0
+];
+
+/// EWKB for LINESTRING (1 2, 3 4) with SRID 4326
+/// Little endian, geometry type 2 (LINESTRING) with SRID flag
+pub const LINESTRING_WITH_SRID_4326_EWKB: [u8; 45] = [
+ 0x01, // byte-order
+ 0x02, 0x00, 0x00, 0x20, // geometry type
+ 0xe6, 0x10, 0x00, 0x00, // SRID 4326
+ 0x02, 0x00, 0x00, 0x00, // number of points (2)
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf0, 0x3f, // x1 = 1.0
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, // y1 = 2.0
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x40, // x2 = 3.0
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x40, // y2 = 4.0
+];
+
+/// EWKB for POLYGON ((0 0, 0 1, 1 0, 0 0)) with SRID 4326
+/// Little endian, geometry type 3 (POLYGON) with SRID flag
+pub const POLYGON_WITH_SRID_4326_EWKB: [u8; 81] = [
+ 0x01, // byte-order
+ 0x03, 0x00, 0x00, 0x20, // geometry type 3 (POLYGON) with SRID flag
+ 0xe6, 0x10, 0x00, 0x00, // SRID 4326
+ 0x01, 0x00, 0x00, 0x00, // number of rings (1)
+ 0x04, 0x00, 0x00, 0x00, // number of points in exterior ring (4)
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // x1 = 0.0
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // y1 = 0.0
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // x2 = 0.0
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf0, 0x3f, // y2 = 1.0
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf0, 0x3f, // x3 = 1.0
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // y3 = 0.0
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // x4 = 0.0
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // y4 = 0.0
+];
+
+/// EWKB for MULTIPOINT ((1 2), (3 4)) with SRID 4326
+/// Little endian, geometry type 4 (MULTIPOINT) with SRID flag
+pub const MULTIPOINT_WITH_SRID_4326_EWKB: [u8; 55] = [
+ 0x01, // byte-order
+ 0x04, 0x00, 0x00, 0x20, // geometry type 4 (MULTIPOINT) with SRID flag
+ 0xe6, 0x10, 0x00, 0x00, // SRID 4326
+ 0x02, 0x00, 0x00, 0x00, // number of points (2)
+ // First point
+ 0x01, // byte-order
+ 0x01, 0x00, 0x00, 0x00, // geometry type 1 (POINT) - no SRID flag
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf0, 0x3f, // x1 = 1.0
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, // y1 = 2.0
+ // Second point
+ 0x01, // byte-order
+ 0x01, 0x00, 0x00, 0x00, // geometry type 1 (POINT) - no SRID flag
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x40, // x2 = 3.0
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x40, // y2 = 4.0
+];
+
+/// EWKB for GEOMETRYCOLLECTION (POINT (1 2)) with SRID 4326
+/// Little endian, geometry type 7 (GEOMETRYCOLLECTION) with SRID flag
+pub const GEOMETRYCOLLECTION_POINT_WITH_SRID_4326_EWKB: [u8; 34] = [
+ 0x01, // byte-order
+ 0x07, 0x00, 0x00, 0x20, // geometry type 7 (GEOMETRYCOLLECTION) with SRID
flag
+ 0xe6, 0x10, 0x00, 0x00, // SRID 4326
+ 0x01, 0x00, 0x00, 0x00, // number of geometries (1)
+ // Nested POINT
+ 0x01, // byte-order
+ 0x01, 0x00, 0x00, 0x00, // geometry type 1 (POINT) - no SRID flag
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf0, 0x3f, // x = 1.0
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, // y = 2.0
+];
+
+/// EWKB for GEOMETRYCOLLECTION (POINT Z (1 2 3)) with SRID 4326
+/// Little endian, geometry type 7 (GEOMETRYCOLLECTION) with SRID flag; nested
POINT Z (Z flag set)
+pub const GEOMETRYCOLLECTION_POINT_Z_WITH_SRID_4326_EWKB: [u8; 42] = [
+ 0x01, // byte-order
+ 0x07, 0x00, 0x00, 0x20, // geometry type 7 (GEOMETRYCOLLECTION) with SRID
flag
+ 0xe6, 0x10, 0x00, 0x00, // SRID 4326
+ 0x01, 0x00, 0x00, 0x00, // number of geometries (1)
+ // Nested POINT Z
+ 0x01, // byte-order
+ 0x01, 0x00, 0x00, 0x80, // geometry type 1 (POINT) with Z flag
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf0, 0x3f, // x = 1.0
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, // y = 2.0
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x40, // z = 3.0
+];
+
+/// EWKB for GEOMETRYCOLLECTION (POINT M (1 2 4)) with SRID 4326
+/// Little endian, geometry type 7 (GEOMETRYCOLLECTION) with SRID flag; nested
POINT M (M flag set)
+pub const GEOMETRYCOLLECTION_POINT_M_WITH_SRID_4326_EWKB: [u8; 42] = [
+ 0x01, // byte-order
+ 0x07, 0x00, 0x00, 0x20, // geometry type 7 (GEOMETRYCOLLECTION) with SRID
flag
+ 0xe6, 0x10, 0x00, 0x00, // SRID 4326
+ 0x01, 0x00, 0x00, 0x00, // number of geometries (1)
+ // Nested POINT M
+ 0x01, // byte-order
+ 0x01, 0x00, 0x00, 0x40, // geometry type 1 (POINT) with M flag
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf0, 0x3f, // x = 1.0
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, // y = 2.0
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x40, // m = 4.0
+];
+
+/// EWKB for GEOMETRYCOLLECTION (POINT ZM (1 2 3 4)) with SRID 4326
+/// Little endian, geometry type 7 (GEOMETRYCOLLECTION) with SRID flag; nested
POINT ZM (Z and M flags set)
+pub const GEOMETRYCOLLECTION_POINT_ZM_WITH_SRID_4326_EWKB: [u8; 50] = [
+ 0x01, // byte-order
+ 0x07, 0x00, 0x00, 0x20, // geometry type 7 (GEOMETRYCOLLECTION) with SRID
flag
+ 0xe6, 0x10, 0x00, 0x00, // SRID 4326
+ 0x01, 0x00, 0x00, 0x00, // number of geometries (1)
+ // Nested POINT ZM
+ 0x01, // byte-order
+ 0x01, 0x00, 0x00, 0xc0, // geometry type 1 (POINT) with Z and M flags
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf0, 0x3f, // x = 1.0
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, // y = 2.0
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x40, // z = 3.0
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x40, // m = 4.0
+];
+
+/// EWKB for POINT EMPTY with SRID 4326
+/// Little endian, geometry type 1 (POINT) with SRID flag
+pub const POINT_EMPTY_WITH_SRID_4326_EWKB: [u8; 25] = [
+ 0x01, // byte-order
+ 0x01, 0x00, 0x00, 0x20, // geometry type 1 (POINT) with SRID flag
+ 0xe6, 0x10, 0x00, 0x00, // SRID 4326
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf8, 0x7f, // x = NaN
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf8, 0x7f, // y = NaN
+];
+
+/// EWKB for GEOMETRYCOLLECTION EMPTY with SRID 4326
+/// Little endian, geometry type 7 (GEOMETRYCOLLECTION) with SRID flag
+pub const GEOMETRYCOLLECTION_EMPTY_WITH_SRID_4326_EWKB: [u8; 13] = [
+ 0x01, // byte-order
+ 0x07, 0x00, 0x00, 0x20, // geometry type 7 (GEOMETRYCOLLECTION) with SRID
flag
+ 0xe6, 0x10, 0x00, 0x00, // SRID 4326
+ 0x00, 0x00, 0x00, 0x00, // number of geometries (0)
+];
+
pub fn louisiana<T>() -> LineString<T>
where
T: WktFloat + Default + FromStr,