This is an automated email from the ASF dual-hosted git repository.
paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/sedona-db.git
The following commit(s) were added to refs/heads/main by this push:
new ace5542a feat(rust/sedona-functions): Implement ST_AsEWKB with item
CRS support (#535)
ace5542a is described below
commit ace5542add43a79b7eeceaf175287a9a8bcd5304
Author: Dewey Dunnington <[email protected]>
AuthorDate: Fri Jan 23 11:56:26 2026 -0600
feat(rust/sedona-functions): Implement ST_AsEWKB with item CRS support
(#535)
Co-authored-by: Copilot <[email protected]>
---
python/sedonadb/python/sedonadb/testing.py | 8 +-
python/sedonadb/tests/functions/test_wkb.py | 93 +++++++
rust/sedona-common/src/error.rs | 17 +-
rust/sedona-functions/src/executor.rs | 14 ++
rust/sedona-functions/src/lib.rs | 1 +
rust/sedona-functions/src/register.rs | 1 +
rust/sedona-functions/src/st_asewkb.rs | 368 ++++++++++++++++++++++++++++
rust/sedona-geometry/src/ewkb_factory.rs | 353 ++++++++++++++++++++++++++
rust/sedona-geometry/src/lib.rs | 1 +
rust/sedona-schema/src/crs.rs | 11 +-
10 files changed, 854 insertions(+), 13 deletions(-)
diff --git a/python/sedonadb/python/sedonadb/testing.py
b/python/sedonadb/python/sedonadb/testing.py
index ec8b870a..eb271060 100644
--- a/python/sedonadb/python/sedonadb/testing.py
+++ b/python/sedonadb/python/sedonadb/testing.py
@@ -635,11 +635,15 @@ class PostGISSingleThread(PostGIS):
cur.execute("SET max_parallel_workers_per_gather TO 0")
-def geom_or_null(arg):
+def geom_or_null(arg, srid=None):
"""Format SQL expression for a geometry object or NULL"""
if arg is None:
return "NULL"
- return f"ST_GeomFromText('{arg}')"
+
+ if srid is None:
+ return f"ST_GeomFromText('{arg}')"
+ else:
+ return f"ST_GeomFromEWKT('SRID={srid};{arg}')"
def geog_or_null(arg):
diff --git a/python/sedonadb/tests/functions/test_wkb.py
b/python/sedonadb/tests/functions/test_wkb.py
new file mode 100644
index 00000000..424d9a36
--- /dev/null
+++ b/python/sedonadb/tests/functions/test_wkb.py
@@ -0,0 +1,93 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import pytest
+import shapely
+from sedonadb.testing import PostGIS, SedonaDB, geom_or_null
+
+
[email protected]("eng", [SedonaDB, PostGIS])
[email protected]("srid", [None, 4326])
[email protected](
+ "geom",
+ [
+ # XY dimensions
+ "POINT (1 2)",
+ "LINESTRING (1 2, 3 4, 5 6)",
+ "POLYGON ((0 1, 2 0, 2 3, 0 3, 0 1))",
+ "MULTIPOINT ((1 2), (3 4))",
+ "MULTILINESTRING ((1 2, 3 4), (5 6, 7 8))",
+ "MULTIPOLYGON (((0 1, 2 0, 2 3, 0 3, 0 1)))",
+ "GEOMETRYCOLLECTION (POINT (1 2), LINESTRING (3 4, 5 6))",
+ # XYZ dimensions
+ "POINT Z (1 2 3)",
+ "LINESTRING Z (1 2 3, 4 5 6)",
+ "POLYGON Z ((0 1 2, 3 0 2, 3 4 2, 0 4 2, 0 1 2))",
+ "MULTIPOINT Z ((1 2 3), (4 5 6))",
+ "MULTILINESTRING Z ((1 2 3, 4 5 6), (7 8 9, 10 11 12))",
+ "MULTIPOLYGON Z (((0 1 2, 3 0 2, 3 4 2, 0 4 2, 0 1 2)))",
+ "GEOMETRYCOLLECTION Z (POINT Z (1 2 3))",
+ # XYM dimensions
+ "POINT M (1 2 3)",
+ "LINESTRING M (1 2 3, 4 5 6)",
+ "POLYGON M ((0 1 2, 3 0 2, 3 4 2, 0 4 2, 0 1 2))",
+ "MULTIPOINT M ((1 2 3), (4 5 6))",
+ "MULTILINESTRING M ((1 2 3, 4 5 6), (7 8 9, 10 11 12))",
+ "MULTIPOLYGON M (((0 1 2, 3 0 2, 3 4 2, 0 4 2, 0 1 2)))",
+ "GEOMETRYCOLLECTION M (POINT M (1 2 3))",
+ # XYZM dimensions
+ "POINT ZM (1 2 3 4)",
+ "LINESTRING ZM (1 2 3 4, 5 6 7 8)",
+ "POLYGON ZM ((0 1 2 3, 4 0 2 3, 4 5 2 3, 0 5 2 3, 0 1 2 3))",
+ "MULTIPOINT ZM ((1 2 3 4), (5 6 7 8))",
+ "MULTILINESTRING ZM ((1 2 3 4, 5 6 7 8), (9 10 11 12, 13 14 15 16))",
+ "MULTIPOLYGON ZM (((0 1 2 3, 4 0 2 3, 4 5 2 3, 0 5 2 3, 0 1 2 3)))",
+ "GEOMETRYCOLLECTION ZM (POINT ZM (1 2 3 4))",
+ # Empty geometries
+ "POINT EMPTY",
+ "LINESTRING EMPTY",
+ "POLYGON EMPTY",
+ "MULTIPOINT EMPTY",
+ "MULTILINESTRING EMPTY",
+ "MULTIPOLYGON EMPTY",
+ "GEOMETRYCOLLECTION EMPTY",
+ # NULL
+ None,
+ ],
+)
+def test_st_asewkb(eng, srid, geom):
+ eng = eng.create_or_skip()
+
+ if geom is not None:
+ shapely_geom = shapely.from_wkt(geom)
+ if srid is not None:
+ shapely_geom = shapely.set_srid(shapely_geom, srid)
+ write_srid = True
+ else:
+ write_srid = False
+
+ expected = shapely.to_wkb(
+ shapely_geom,
+ output_dimension=4,
+ byte_order=1,
+ flavor="extended",
+ include_srid=write_srid,
+ )
+ else:
+ expected = None
+
+ eng.assert_query_result(f"SELECT ST_AsEWKB({geom_or_null(geom, srid)})",
expected)
diff --git a/rust/sedona-common/src/error.rs b/rust/sedona-common/src/error.rs
index 101db2c4..5b7c04b0 100644
--- a/rust/sedona-common/src/error.rs
+++ b/rust/sedona-common/src/error.rs
@@ -15,10 +15,10 @@
// specific language governing permissions and limitations
// under the License.
-/// Macro to create Sedona Internal Error that avoids the misleading error
message from
-/// DataFusionError::Internal.
+/// Macro to create Sedona Internal Error from places such as `map_err()` that
+/// require a DataFusionError instead of an Err
#[macro_export]
-macro_rules! sedona_internal_err {
+macro_rules! sedona_internal_datafusion_err {
($($args:expr),*) => {{
let msg = std::format!(
"SedonaDB internal error: {}{}.\nThis issue was likely caused by a
bug in SedonaDB's code. \
@@ -28,7 +28,16 @@ macro_rules! sedona_internal_err {
datafusion_common::DataFusionError::get_back_trace(),
);
// We avoid using Internal to avoid the message suggesting it's
internal to DataFusion
- Err(datafusion_common::DataFusionError::External(msg.into()))
+ datafusion_common::DataFusionError::External(msg.into())
+ }};
+}
+
+/// Macro to create Sedona Internal Error that avoids the misleading error
message from
+/// DataFusionError::Internal.
+#[macro_export]
+macro_rules! sedona_internal_err {
+ ($($args:expr),*) => {{
+ Err($crate::sedona_internal_datafusion_err!($($args),*))
}};
}
diff --git a/rust/sedona-functions/src/executor.rs
b/rust/sedona-functions/src/executor.rs
index f1d679d2..8809ac0e 100644
--- a/rust/sedona-functions/src/executor.rs
+++ b/rust/sedona-functions/src/executor.rs
@@ -383,6 +383,20 @@ impl ScalarGeo for ScalarValue {
| ScalarValue::BinaryView(maybe_item)
| ScalarValue::LargeBinary(maybe_item) =>
Ok(maybe_item.as_deref()),
ScalarValue::Null => Ok(None),
+ ScalarValue::Struct(s)
+ if s.fields().len() == 2
+ && s.fields()[0].name() == "item"
+ && s.fields()[1].name() == "crs" =>
+ {
+ let item_type =
SedonaType::from_storage_field(&s.fields()[0])?;
+ let mut out = None;
+ s.column(0).iter_as_wkb_bytes(&item_type, 1, |v| {
+ out = v;
+ Ok(())
+ })?;
+
+ Ok(out)
+ }
_ => sedona_internal_err!("Can't iterate over {:?} ScalarValue as
&[u8]", self),
}
}
diff --git a/rust/sedona-functions/src/lib.rs b/rust/sedona-functions/src/lib.rs
index 6e38e426..6e8f884b 100644
--- a/rust/sedona-functions/src/lib.rs
+++ b/rust/sedona-functions/src/lib.rs
@@ -28,6 +28,7 @@ mod st_affine_helpers;
pub mod st_analyze_agg;
mod st_area;
mod st_asbinary;
+mod st_asewkb;
mod st_asgeojson;
mod st_astext;
mod st_azimuth;
diff --git a/rust/sedona-functions/src/register.rs
b/rust/sedona-functions/src/register.rs
index 8161923a..14405409 100644
--- a/rust/sedona-functions/src/register.rs
+++ b/rust/sedona-functions/src/register.rs
@@ -66,6 +66,7 @@ pub fn default_function_set() -> FunctionSet {
crate::st_affine::st_affine_udf,
crate::st_area::st_area_udf,
crate::st_asbinary::st_asbinary_udf,
+ crate::st_asewkb::st_asewkb_udf,
crate::st_asgeojson::st_asgeojson_udf,
crate::st_astext::st_astext_udf,
crate::st_azimuth::st_azimuth_udf,
diff --git a/rust/sedona-functions/src/st_asewkb.rs
b/rust/sedona-functions/src/st_asewkb.rs
new file mode 100644
index 00000000..76cab162
--- /dev/null
+++ b/rust/sedona-functions/src/st_asewkb.rs
@@ -0,0 +1,368 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+use std::{sync::Arc, vec};
+
+use arrow_array::builder::BinaryBuilder;
+use arrow_schema::DataType;
+use datafusion_common::{
+ cast::{as_string_view_array, as_struct_array},
+ error::Result,
+ exec_datafusion_err, exec_err, ScalarValue,
+};
+use datafusion_expr::{
+ scalar_doc_sections::DOC_SECTION_OTHER, ColumnarValue, Documentation,
Volatility,
+};
+use sedona_common::sedona_internal_err;
+use sedona_expr::scalar_udf::{SedonaScalarKernel, SedonaScalarUDF};
+use sedona_geometry::{ewkb_factory::write_ewkb_geometry,
wkb_factory::WKB_MIN_PROBABLE_BYTES};
+use sedona_schema::{crs::deserialize_crs, datatypes::SedonaType,
matchers::ArgMatcher};
+
+use crate::executor::WkbExecutor;
+
+/// ST_AsEWKB() scalar UDF implementation
+///
+/// An implementation of EWKB writing using Sedona's geometry EWKB/WKB
facilities.
+pub fn st_asewkb_udf() -> SedonaScalarUDF {
+ SedonaScalarUDF::new(
+ "st_asewkb",
+ vec![Arc::new(STAsEWKBItemCrs {}), Arc::new(STAsEWKB {})],
+ Volatility::Immutable,
+ Some(st_asewkb_doc()),
+ )
+}
+
+fn st_asewkb_doc() -> Documentation {
+ Documentation::builder(
+ DOC_SECTION_OTHER,
+ r#"Return the Extended Well-Known Binary (EWKB) representation of a
geometry or geography.
+
+Compared to ST_AsBinary(), this function embeds an integer SRID derived from
the type or derived
+from the item-level CRS for item CRS types. This is particularly useful for
integration with
+PostGIS"#,
+ "ST_AsEWKB (A: Geometry)",
+ )
+ .with_argument("geom", "geometry: Input geometry or geography")
+ .with_sql_example("SELECT ST_AsEWKB(ST_Point(1.0, 2.0, 4326))")
+ .build()
+}
+
+#[derive(Debug)]
+struct STAsEWKB {}
+
+impl SedonaScalarKernel for STAsEWKB {
+ fn return_type(&self, args: &[SedonaType]) -> Result<Option<SedonaType>> {
+ let matcher = ArgMatcher::new(
+ vec![ArgMatcher::is_geometry_or_geography()],
+ SedonaType::Arrow(DataType::Binary),
+ );
+
+ matcher.match_args(args)
+ }
+
+ fn invoke_batch(
+ &self,
+ arg_types: &[SedonaType],
+ args: &[ColumnarValue],
+ ) -> Result<ColumnarValue> {
+ let executor = WkbExecutor::new(arg_types, args);
+ let mut builder = BinaryBuilder::with_capacity(
+ executor.num_iterations(),
+ WKB_MIN_PROBABLE_BYTES * executor.num_iterations(),
+ );
+
+ let maybe_srid = match &arg_types[0] {
+ SedonaType::Wkb(_, crs) | SedonaType::WkbView(_, crs) => match crs
{
+ Some(crs) => match crs.srid()? {
+ Some(0) => None,
+ Some(srid) => Some(srid),
+ _ => return exec_err!("CRS {crs} cannot be represented by
a single SRID"),
+ },
+ None => None,
+ },
+ SedonaType::Arrow(DataType::Null) => None,
+ _ => return sedona_internal_err!("Unexpected input to invoke_batch
in ST_AsEWKB"),
+ };
+
+ executor.execute_wkb_void(|maybe_wkb| {
+ match maybe_wkb {
+ Some(wkb) => {
+ write_ewkb_geometry(&mut builder, &wkb, maybe_srid)
+ .map_err(|e| exec_datafusion_err!("EWKB writer error
{e}"))?;
+ builder.append_value([]);
+ }
+ None => builder.append_null(),
+ }
+
+ Ok(())
+ })?;
+
+ executor.finish(Arc::new(builder.finish()))
+ }
+}
+
+#[derive(Debug)]
+struct STAsEWKBItemCrs {}
+
+impl SedonaScalarKernel for STAsEWKBItemCrs {
+ fn return_type(&self, args: &[SedonaType]) -> Result<Option<SedonaType>> {
+ let matcher = ArgMatcher::new(
+ vec![ArgMatcher::is_item_crs()],
+ SedonaType::Arrow(DataType::Binary),
+ );
+
+ matcher.match_args(args)
+ }
+
+ fn invoke_batch(
+ &self,
+ arg_types: &[SedonaType],
+ args: &[ColumnarValue],
+ ) -> Result<ColumnarValue> {
+ let executor = WkbExecutor::new(arg_types, args);
+ let mut builder = BinaryBuilder::with_capacity(
+ executor.num_iterations(),
+ WKB_MIN_PROBABLE_BYTES * executor.num_iterations(),
+ );
+
+ let crs_array_ref = match &args[0] {
+ ColumnarValue::Array(array) => {
+ let struct_array = as_struct_array(array)?;
+ struct_array.column(1).clone()
+ }
+ ColumnarValue::Scalar(ScalarValue::Struct(struct_array)) => {
+ struct_array.column(1).clone()
+ }
+ _ => return sedona_internal_err!("Unexpected item_crs type"),
+ };
+
+ let crs_array = as_string_view_array(&crs_array_ref)?;
+ let mut srid_iter = crs_array
+ .into_iter()
+ .map(|maybe_crs_str| match maybe_crs_str {
+ None => Ok(None),
+ Some(crs_str) => {
+ match deserialize_crs(crs_str)
+ .map_err(|e| exec_datafusion_err!("{}", e.message()))?
+ {
+ None => Ok(None),
+ Some(crs) => match crs.srid()? {
+ Some(0) => Ok(None),
+ Some(srid) => Ok(Some(srid)),
+ _ => {
+ exec_err!("CRS {crs} cannot be represented by
a single SRID")
+ }
+ },
+ }
+ }
+ });
+
+ executor.execute_wkb_void(|maybe_wkb| {
+ let maybe_srid = srid_iter.next().unwrap()?;
+ match maybe_wkb {
+ Some(wkb) => {
+ write_ewkb_geometry(&mut builder, &wkb, maybe_srid)
+ .map_err(|e| exec_datafusion_err!("EWKB writer error
{e}"))?;
+ builder.append_value([]);
+ }
+ None => builder.append_null(),
+ }
+
+ Ok(())
+ })?;
+
+ executor.finish(Arc::new(builder.finish()))
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use arrow_array::{ArrayRef, BinaryArray};
+ use datafusion_common::scalar::ScalarValue;
+ use datafusion_expr::ScalarUDF;
+ use rstest::rstest;
+ use sedona_schema::{
+ crs::lnglat,
+ datatypes::{
+ Edges, WKB_GEOGRAPHY, WKB_GEOGRAPHY_ITEM_CRS, WKB_GEOMETRY,
WKB_GEOMETRY_ITEM_CRS,
+ WKB_VIEW_GEOGRAPHY, WKB_VIEW_GEOMETRY,
+ },
+ };
+ use sedona_testing::{
+ create::{create_array_item_crs, create_scalar_item_crs},
+ testers::ScalarUdfTester,
+ };
+
+ use super::*;
+
+ const POINT12: [u8; 21] = [
+ 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xf0, 0x3f, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x40,
+ ];
+
+ const POINT12_LNGLAT: [u8; 25] = [
+ 0x01, 0x01, 0x00, 0x00, 0x20, 0xe6, 0x10, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
+ 0xf0, 0x3f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40,
+ ];
+
+ #[test]
+ fn udf_metadata() {
+ let udf: ScalarUDF = st_asewkb_udf().into();
+ assert_eq!(udf.name(), "st_asewkb");
+ assert!(udf.documentation().is_some())
+ }
+
+ #[rstest]
+ fn udf_no_srid(
+ #[values(
+ WKB_GEOMETRY,
+ WKB_GEOGRAPHY,
+ WKB_VIEW_GEOMETRY,
+ WKB_VIEW_GEOGRAPHY,
+ WKB_GEOMETRY_ITEM_CRS.clone(),
+ WKB_GEOGRAPHY_ITEM_CRS.clone(),
+ )]
+ sedona_type: SedonaType,
+ ) {
+ let udf = st_asewkb_udf();
+ let tester = ScalarUdfTester::new(udf.into(), vec![sedona_type]);
+
+ assert_eq!(
+ tester.invoke_wkb_scalar(Some("POINT (1 2)")).unwrap(),
+ ScalarValue::Binary(Some(POINT12.to_vec()))
+ );
+
+ assert_eq!(
+ tester.invoke_wkb_scalar(None).unwrap(),
+ ScalarValue::Binary(None)
+ );
+
+ let expected_array: BinaryArray = [Some(POINT12), None,
Some(POINT12)].iter().collect();
+ assert_eq!(
+ &tester
+ .invoke_wkb_array(vec![Some("POINT (1 2)"), None, Some("POINT
(1 2)")])
+ .unwrap(),
+ &(Arc::new(expected_array) as ArrayRef)
+ );
+ }
+
+ #[rstest]
+ fn udf_srid_from_type(
+ #[values(
+ SedonaType::Wkb(Edges::Planar, lnglat()),
+ SedonaType::Wkb(Edges::Spherical, lnglat())
+ )]
+ sedona_type: SedonaType,
+ ) {
+ let udf = st_asewkb_udf();
+ let tester = ScalarUdfTester::new(udf.into(), vec![sedona_type]);
+
+ assert_eq!(
+ tester.invoke_wkb_scalar(Some("POINT (1 2)")).unwrap(),
+ ScalarValue::Binary(Some(POINT12_LNGLAT.to_vec()))
+ );
+
+ assert_eq!(
+ tester.invoke_wkb_scalar(None).unwrap(),
+ ScalarValue::Binary(None)
+ );
+
+ let expected_array: BinaryArray = [Some(POINT12_LNGLAT), None,
Some(POINT12_LNGLAT)]
+ .iter()
+ .collect();
+ assert_eq!(
+ &tester
+ .invoke_wkb_array(vec![Some("POINT (1 2)"), None, Some("POINT
(1 2)")])
+ .unwrap(),
+ &(Arc::new(expected_array) as ArrayRef)
+ );
+ }
+
+ #[test]
+ fn udf_srid_from_item_crs() {
+ let udf = st_asewkb_udf();
+ let tester = ScalarUdfTester::new(udf.into(),
vec![WKB_GEOMETRY_ITEM_CRS.clone()]);
+
+ let scalar_with_srid =
+ create_scalar_item_crs(Some("POINT (1 2)"), Some("EPSG:4326"),
&WKB_GEOMETRY);
+ assert_eq!(
+ tester.invoke_scalar(scalar_with_srid).unwrap(),
+ ScalarValue::Binary(Some(POINT12_LNGLAT.to_vec()))
+ );
+
+ let array_with_srid = create_array_item_crs(
+ &[Some("POINT (1 2)"), None, Some("POINT (1 2)")],
+ [Some("EPSG:4326"), None, Some("EPSG:4326")],
+ &WKB_GEOMETRY,
+ );
+ let expected_array: BinaryArray = [Some(POINT12_LNGLAT), None,
Some(POINT12_LNGLAT)]
+ .iter()
+ .collect();
+ assert_eq!(
+ &tester.invoke_array(array_with_srid).unwrap(),
+ &(Arc::new(expected_array) as ArrayRef)
+ );
+ }
+
+ #[test]
+ fn udf_invalid_type_crs() {
+ let udf = st_asewkb_udf();
+
+ let crs_where_srid_returns_none =
deserialize_crs("EPSG:9999999999").unwrap();
+ let sedona_type = SedonaType::Wkb(Edges::Planar,
crs_where_srid_returns_none);
+
+ let tester = ScalarUdfTester::new(udf.into(), vec![sedona_type]);
+ let err = tester.invoke_wkb_scalar(Some("POINT (1 2)")).unwrap_err();
+ assert_eq!(
+ err.message(),
+ "CRS epsg:9999999999 cannot be represented by a single SRID"
+ );
+ }
+
+ #[test]
+ fn udf_invalid_item_crs() {
+ let udf = st_asewkb_udf();
+ let tester = ScalarUdfTester::new(udf.into(),
vec![WKB_GEOMETRY_ITEM_CRS.clone()]);
+
+ // Very large SRID
+ let scalar_with_srid_outside_u32 = create_scalar_item_crs(
+ Some("POINT (1 2)"),
+ Some("EPSG:999999999999"),
+ &WKB_GEOMETRY,
+ );
+ let err = tester
+ .invoke_scalar(scalar_with_srid_outside_u32)
+ .unwrap_err();
+ assert_eq!(
+ err.message(),
+ "CRS epsg:999999999999 cannot be represented by a single SRID"
+ );
+
+ // CRS that fails to parse in deserialize_crs()
+ let scalar_with_unparsable_crs = create_scalar_item_crs(
+ Some("POINT (1 2)"),
+ Some("This is invalid JSON and also not auth:code"),
+ &WKB_GEOMETRY,
+ );
+ let err = tester
+ .invoke_scalar(scalar_with_unparsable_crs)
+ .unwrap_err();
+ assert_eq!(
+ err.message(),
+ "Error deserializing PROJJSON Crs: expected value at line 1 column
1"
+ );
+ }
+}
diff --git a/rust/sedona-geometry/src/ewkb_factory.rs
b/rust/sedona-geometry/src/ewkb_factory.rs
new file mode 100644
index 00000000..9a54fa73
--- /dev/null
+++ b/rust/sedona-geometry/src/ewkb_factory.rs
@@ -0,0 +1,353 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::io::Write;
+
+use geo_traits::{
+ GeometryCollectionTrait, GeometryTrait, LineStringTrait,
MultiLineStringTrait, MultiPointTrait,
+ MultiPolygonTrait, PointTrait, PolygonTrait,
+};
+use wkb::reader::Wkb;
+
+use crate::{error::SedonaGeometryError, wkb_factory::write_wkb_coord_trait};
+
+const EWKB_Z_BIT: u32 = 0x80000000;
+const EWKB_M_BIT: u32 = 0x40000000;
+const EWKB_SRID_BIT: u32 = 0x20000000;
+
+pub fn write_ewkb_geometry(
+ buf: &mut impl Write,
+ geom: &Wkb,
+ srid: Option<u32>,
+) -> Result<(), SedonaGeometryError> {
+ match geom.as_type() {
+ geo_traits::GeometryType::Point(p) => write_ewkb_point(buf, p, srid),
+ geo_traits::GeometryType::LineString(ls) =>
write_ewkb_line_string(buf, ls, srid),
+ geo_traits::GeometryType::Polygon(poly) => write_ewkb_polygon(buf,
poly, srid),
+ geo_traits::GeometryType::MultiPoint(mp) =>
write_ewkb_multi_point(buf, mp, srid),
+ geo_traits::GeometryType::MultiLineString(mls) => {
+ write_ewkb_multi_line_string(buf, mls, srid)
+ }
+ geo_traits::GeometryType::MultiPolygon(mpoly) =>
write_ewkb_multi_polygon(buf, mpoly, srid),
+ geo_traits::GeometryType::GeometryCollection(gc) => {
+ write_ewkb_geometry_collection(buf, gc, srid)
+ }
+ _ => Err(SedonaGeometryError::Invalid(
+ "Unsupported EWKB geometry type".to_string(),
+ )),
+ }
+}
+
+fn write_ewkb_point(
+ buf: &mut impl Write,
+ geom: &wkb::reader::Point,
+ srid: Option<u32>,
+) -> Result<(), SedonaGeometryError> {
+ write_geometry_type_and_srid(1, geom.dimension(), srid, buf)?;
+ match geom.byte_order() {
+ wkb::Endianness::BigEndian => match geom.coord() {
+ Some(c) => write_wkb_coord_trait(buf, &c)?,
+ None => {
+ for _ in 0..geom.dim().size() {
+ buf.write_all(&f64::NAN.to_le_bytes())?;
+ }
+ }
+ },
+ wkb::Endianness::LittleEndian => buf.write_all(geom.coord_slice())?,
+ }
+
+ Ok(())
+}
+
+fn write_ewkb_line_string(
+ buf: &mut impl Write,
+ geom: &wkb::reader::LineString,
+ srid: Option<u32>,
+) -> Result<(), SedonaGeometryError> {
+ write_geometry_type_and_srid(2, geom.dimension(), srid, buf)?;
+ let num_coords = geom.num_coords() as u32;
+ buf.write_all(&num_coords.to_le_bytes())?;
+ match geom.byte_order() {
+ wkb::Endianness::BigEndian => {
+ for c in geom.coords() {
+ write_wkb_coord_trait(buf, &c)?;
+ }
+ }
+ wkb::Endianness::LittleEndian => buf.write_all(geom.coords_slice())?,
+ }
+
+ Ok(())
+}
+
+fn write_linearring(
+ buf: &mut impl Write,
+ geom: &wkb::reader::LinearRing,
+) -> Result<(), SedonaGeometryError> {
+ let num_coords = geom.num_coords() as u32;
+ buf.write_all(&num_coords.to_le_bytes())?;
+ match geom.byte_order() {
+ wkb::Endianness::BigEndian => {
+ for c in geom.coords() {
+ write_wkb_coord_trait(buf, &c)?;
+ }
+ }
+ wkb::Endianness::LittleEndian => buf.write_all(geom.coords_slice())?,
+ }
+
+ Ok(())
+}
+
+fn write_ewkb_polygon(
+ buf: &mut impl Write,
+ geom: &wkb::reader::Polygon,
+ srid: Option<u32>,
+) -> Result<(), SedonaGeometryError> {
+ write_geometry_type_and_srid(3, geom.dimension(), srid, buf)?;
+ let num_rings = geom.num_interiors() as u32 + geom.exterior().is_some() as
u32;
+ buf.write_all(&num_rings.to_le_bytes())?;
+
+ if let Some(exterior) = geom.exterior() {
+ write_linearring(buf, exterior)?;
+ }
+
+ for interior in geom.interiors() {
+ write_linearring(buf, interior)?;
+ }
+
+ Ok(())
+}
+
+fn write_ewkb_multi_point(
+ buf: &mut impl Write,
+ geom: &wkb::reader::MultiPoint,
+ srid: Option<u32>,
+) -> Result<(), SedonaGeometryError> {
+ write_geometry_type_and_srid(4, geom.dimension(), srid, buf)?;
+ let num_children = geom.num_points() as u32;
+ buf.write_all(&num_children.to_le_bytes())?;
+
+ for child in geom.points() {
+ write_ewkb_point(buf, &child, None)?;
+ }
+
+ Ok(())
+}
+
+fn write_ewkb_multi_line_string(
+ buf: &mut impl Write,
+ geom: &wkb::reader::MultiLineString,
+ srid: Option<u32>,
+) -> Result<(), SedonaGeometryError> {
+ write_geometry_type_and_srid(5, geom.dimension(), srid, buf)?;
+ let num_children = geom.num_line_strings() as u32;
+ buf.write_all(&num_children.to_le_bytes())?;
+
+ for child in geom.line_strings() {
+ write_ewkb_line_string(buf, child, None)?;
+ }
+
+ Ok(())
+}
+
+fn write_ewkb_multi_polygon(
+ buf: &mut impl Write,
+ geom: &wkb::reader::MultiPolygon,
+ srid: Option<u32>,
+) -> Result<(), SedonaGeometryError> {
+ write_geometry_type_and_srid(6, geom.dimension(), srid, buf)?;
+ let num_children = geom.num_polygons() as u32;
+ buf.write_all(&num_children.to_le_bytes())?;
+
+ for child in geom.polygons() {
+ write_ewkb_polygon(buf, child, None)?;
+ }
+
+ Ok(())
+}
+
+fn write_ewkb_geometry_collection(
+ buf: &mut impl Write,
+ geom: &wkb::reader::GeometryCollection,
+ srid: Option<u32>,
+) -> Result<(), SedonaGeometryError> {
+ write_geometry_type_and_srid(7, geom.dimension(), srid, buf)?;
+ let num_children = geom.num_geometries() as u32;
+ buf.write_all(&num_children.to_le_bytes())?;
+
+ for child in geom.geometries() {
+ write_ewkb_geometry(buf, child, None)?;
+ }
+
+ Ok(())
+}
+
+fn write_geometry_type_and_srid(
+ mut base_type: u32,
+ dimensions: wkb::reader::Dimension,
+ srid: Option<u32>,
+ buf: &mut impl Write,
+) -> Result<(), SedonaGeometryError> {
+ buf.write_all(&[0x01])?;
+
+ match dimensions {
+ wkb::reader::Dimension::Xy => {}
+ wkb::reader::Dimension::Xyz => base_type |= EWKB_Z_BIT,
+ wkb::reader::Dimension::Xym => base_type |= EWKB_M_BIT,
+ wkb::reader::Dimension::Xyzm => {
+ base_type |= EWKB_Z_BIT;
+ base_type |= EWKB_M_BIT;
+ }
+ }
+
+ if let Some(srid) = srid {
+ base_type |= EWKB_SRID_BIT;
+ buf.write_all(&base_type.to_le_bytes())?;
+ buf.write_all(&srid.to_le_bytes())?;
+ } else {
+ buf.write_all(&base_type.to_le_bytes())?;
+ }
+
+ Ok(())
+}
+
+#[cfg(test)]
+mod test {
+
+ use rstest::rstest;
+ use wkb::{writer::WriteOptions, Endianness};
+
+ use super::*;
+
+ #[rstest]
+ fn test_roundtrip(
+ #[values(Endianness::LittleEndian, Endianness::BigEndian)] endianness:
Endianness,
+ ) {
+ for wkt_str in ROUNDTRIP_CASES {
+ let wkt: wkt::Wkt<f64> = wkt_str.parse().unwrap();
+
+ let mut iso_wkb = Vec::new();
+ wkb::writer::write_geometry(&mut iso_wkb, &wkt, &WriteOptions {
endianness }).unwrap();
+ let wkb_geom = wkb::reader::read_wkb(&iso_wkb).unwrap();
+
+ let mut ewkb_no_srid = Vec::new();
+ write_ewkb_geometry(&mut ewkb_no_srid, &wkb_geom, None).unwrap();
+
+ let mut ewkb_with_srid = Vec::new();
+ write_ewkb_geometry(&mut ewkb_with_srid, &wkb_geom,
Some(4326)).unwrap();
+
+ // Check that the ewkbs have the correct number of bytes
+ assert_eq!(
+ ewkb_no_srid.len(),
+ iso_wkb.len(),
+ "incorrect number of bytes for case {wkt_str} without srid"
+ );
+ assert_eq!(
+ ewkb_with_srid.len(),
+ ewkb_no_srid.len() + size_of::<u32>(),
+ "incorrect number of bytes for case {wkt_str} with srid"
+ );
+
+ // Check the rendered WKT of the no srid EWKB
+ let wkb_geom_roundtrip_no_srid =
wkb::reader::read_wkb(&ewkb_no_srid).unwrap();
+ let mut wkt_roundtrip_no_srid = String::new();
+ wkt::to_wkt::write_geometry(&mut wkt_roundtrip_no_srid,
&wkb_geom_roundtrip_no_srid)
+ .unwrap();
+ assert_eq!(wkt_roundtrip_no_srid, wkt_str);
+
+ // Check the rendered WKT of the srid EWKB
+ let wkb_geom_roundtrip_with_srid =
wkb::reader::read_wkb(&ewkb_with_srid).unwrap();
+ let mut wkt_roundtrip_with_srid = String::new();
+ wkt::to_wkt::write_geometry(
+ &mut wkt_roundtrip_with_srid,
+ &wkb_geom_roundtrip_with_srid,
+ )
+ .unwrap();
+ assert_eq!(wkt_roundtrip_with_srid, wkt_str);
+ }
+ }
+
+ const ROUNDTRIP_CASES: [&str; 60] = [
+ // XY dimensions
+ "POINT(1 2)",
+ "LINESTRING(1 2,3 4,5 6)",
+ "POLYGON((0 1,2 0,2 3,0 3,0 1))",
+ "MULTIPOINT((1 2),(3 4))",
+ "MULTILINESTRING((1 2,3 4),(5 6,7 8))",
+ "MULTIPOLYGON(((0 1,2 0,2 3,0 3,0 1)))",
+ "GEOMETRYCOLLECTION(POINT(1 2),LINESTRING(3 4,5 6))",
+ "GEOMETRYCOLLECTION(GEOMETRYCOLLECTION(POINT(1 2)))",
+ // XYZ dimensions
+ "POINT Z(1 2 3)",
+ "LINESTRING Z(1 2 3,4 5 6)",
+ "POLYGON Z((0 1 2,3 0 2,3 4 2,0 4 2,0 1 2))",
+ "MULTIPOINT Z((1 2 3),(4 5 6))",
+ "MULTILINESTRING Z((1 2 3,4 5 6),(7 8 9,10 11 12))",
+ "MULTIPOLYGON Z(((0 1 2,3 0 2,3 4 2,0 4 2,0 1 2)))",
+ "GEOMETRYCOLLECTION Z(POINT Z(1 2 3))",
+ "GEOMETRYCOLLECTION Z(GEOMETRYCOLLECTION Z(POINT Z(1 2 3)))",
+ // XYM dimensions
+ "POINT M(1 2 3)",
+ "LINESTRING M(1 2 3,4 5 6)",
+ "POLYGON M((0 1 2,3 0 2,3 4 2,0 4 2,0 1 2))",
+ "MULTIPOINT M((1 2 3),(4 5 6))",
+ "MULTILINESTRING M((1 2 3,4 5 6),(7 8 9,10 11 12))",
+ "MULTIPOLYGON M(((0 1 2,3 0 2,3 4 2,0 4 2,0 1 2)))",
+ "GEOMETRYCOLLECTION M(POINT M(1 2 3))",
+ "GEOMETRYCOLLECTION M(GEOMETRYCOLLECTION M(POINT M(1 2 3)))",
+ // XYZM dimensions
+ "POINT ZM(1 2 3 4)",
+ "LINESTRING ZM(1 2 3 4,5 6 7 8)",
+ "POLYGON ZM((0 1 2 3,4 0 2 3,4 5 2 3,0 5 2 3,0 1 2 3))",
+ "MULTIPOINT ZM((1 2 3 4),(5 6 7 8))",
+ "MULTILINESTRING ZM((1 2 3 4,5 6 7 8),(9 10 11 12,13 14 15 16))",
+ "MULTIPOLYGON ZM(((0 1 2 3,4 0 2 3,4 5 2 3,0 5 2 3,0 1 2 3)))",
+ "GEOMETRYCOLLECTION ZM(POINT ZM(1 2 3 4))",
+ "GEOMETRYCOLLECTION ZM(GEOMETRYCOLLECTION ZM(POINT ZM(1 2 3 4)))",
+ // Empty geometries
+ "POINT EMPTY",
+ "LINESTRING EMPTY",
+ "POLYGON EMPTY",
+ "MULTIPOINT EMPTY",
+ "MULTILINESTRING EMPTY",
+ "MULTIPOLYGON EMPTY",
+ "GEOMETRYCOLLECTION EMPTY",
+ // Empty geometries Z
+ "POINT Z EMPTY",
+ "LINESTRING Z EMPTY",
+ "POLYGON Z EMPTY",
+ "MULTIPOINT Z EMPTY",
+ "MULTILINESTRING Z EMPTY",
+ "MULTIPOLYGON Z EMPTY",
+ "GEOMETRYCOLLECTION Z EMPTY",
+ // Empty geometries M
+ "POINT M EMPTY",
+ "LINESTRING M EMPTY",
+ "POLYGON M EMPTY",
+ "MULTIPOINT M EMPTY",
+ "MULTILINESTRING M EMPTY",
+ "MULTIPOLYGON M EMPTY",
+ "GEOMETRYCOLLECTION M EMPTY",
+ // Empty geometries ZM
+ "POINT ZM EMPTY",
+ "LINESTRING ZM EMPTY",
+ "POLYGON ZM EMPTY",
+ "MULTIPOINT ZM EMPTY",
+ "MULTILINESTRING ZM EMPTY",
+ "MULTIPOLYGON ZM EMPTY",
+ "GEOMETRYCOLLECTION ZM EMPTY",
+ ];
+}
diff --git a/rust/sedona-geometry/src/lib.rs b/rust/sedona-geometry/src/lib.rs
index f189ec7b..5de9f65e 100644
--- a/rust/sedona-geometry/src/lib.rs
+++ b/rust/sedona-geometry/src/lib.rs
@@ -18,6 +18,7 @@ pub mod analyze;
pub mod bounding_box;
pub mod bounds;
pub mod error;
+pub mod ewkb_factory;
pub mod interval;
pub mod is_empty;
pub mod point_count;
diff --git a/rust/sedona-schema/src/crs.rs b/rust/sedona-schema/src/crs.rs
index a4649b15..3813df11 100644
--- a/rust/sedona-schema/src/crs.rs
+++ b/rust/sedona-schema/src/crs.rs
@@ -14,7 +14,7 @@
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
-use datafusion_common::{DataFusionError, Result};
+use datafusion_common::{plan_datafusion_err, plan_err, DataFusionError,
Result};
use lru::LruCache;
use std::cell::RefCell;
use std::fmt::{Debug, Display};
@@ -318,9 +318,8 @@ impl FromStr for ProjJSON {
type Err = DataFusionError;
fn from_str(s: &str) -> Result<Self> {
- let value: Value = serde_json::from_str(s).map_err(|err| {
- DataFusionError::Internal(format!("Error deserializing PROJJSON
Crs: {err}"))
- })?;
+ let value: Value = serde_json::from_str(s)
+ .map_err(|err| plan_datafusion_err!("Error deserializing PROJJSON
Crs: {err}"))?;
Self::try_new(value)
}
@@ -329,9 +328,7 @@ impl FromStr for ProjJSON {
impl ProjJSON {
pub fn try_new(value: Value) -> Result<Self> {
if !value.is_object() {
- return Err(DataFusionError::Internal(format!(
- "Can't create PROJJSON from non-object: {value}"
- )));
+ return plan_err!("Can't create PROJJSON from non-object: {value}");
}
Ok(Self { value })