This is an automated email from the ASF dual-hosted git repository. imbruced pushed a commit to branch add-sedona-serializer in repository https://gitbox.apache.org/repos/asf/sedona-db.git
commit 1bf02f69968dc4e40aea1bd4e64100b6c4fe4692 Author: pawelkocinski <[email protected]> AuthorDate: Wed Dec 17 20:07:07 2025 +0100 add geometry to sedona binary --- rust/sedona-functions/src/lib.rs | 1 + rust/sedona-functions/src/register.rs | 1 + rust/sedona-functions/src/st_to_sedona_spark.rs | 72 +++++++++++++++++++++++++ rust/sedona-serde/src/lib.rs | 1 + rust/sedona-serde/src/serialize.rs | 15 ++++++ 5 files changed, 90 insertions(+) diff --git a/rust/sedona-functions/src/lib.rs b/rust/sedona-functions/src/lib.rs index 7f9864ae..a600358d 100644 --- a/rust/sedona-functions/src/lib.rs +++ b/rust/sedona-functions/src/lib.rs @@ -68,3 +68,4 @@ pub mod st_union_agg; mod st_xyzm; mod st_xyzm_minmax; mod st_zmflag; +mod st_to_sedona_spark; diff --git a/rust/sedona-functions/src/register.rs b/rust/sedona-functions/src/register.rs index e9bc4a14..2a94da23 100644 --- a/rust/sedona-functions/src/register.rs +++ b/rust/sedona-functions/src/register.rs @@ -121,6 +121,7 @@ pub fn default_function_set() -> FunctionSet { crate::st_xyzm::st_z_udf, crate::st_zmflag::st_zmflag_udf, crate::st_from_sedona_spark::st_geomfromsedona_udf, + crate::st_to_sedona_spark::st_geomtosedona_udf, ); register_aggregate_udfs!( diff --git a/rust/sedona-functions/src/st_to_sedona_spark.rs b/rust/sedona-functions/src/st_to_sedona_spark.rs new file mode 100644 index 00000000..6ddf7b45 --- /dev/null +++ b/rust/sedona-functions/src/st_to_sedona_spark.rs @@ -0,0 +1,72 @@ +use std::sync::Arc; +use arrow_schema::DataType; +use datafusion_expr::scalar_doc_sections::DOC_SECTION_OTHER; +use sedona_expr::scalar_udf::{SedonaScalarKernel, SedonaScalarUDF}; +use sedona_schema::datatypes::{SedonaType, WKB_GEOMETRY}; +use datafusion_expr::{ColumnarValue, Documentation, Volatility}; +use sedona_schema::matchers::ArgMatcher; + +#[derive(Debug)] +struct STGeomToSedonaSpark { + // out_type: SedonaType, +} + +impl SedonaScalarKernel for STGeomToSedonaSpark { + fn return_type(&self, args: &[SedonaType]) -> datafusion_common::Result<Option<SedonaType>> { + let matcher = ArgMatcher::new( + vec![ArgMatcher::is_geometry()], + SedonaType::Arrow(DataType::BinaryView), + ); + + matcher.match_args(args) + } + + fn invoke_batch(&self, arg_types: &[SedonaType], args: &[ColumnarValue]) -> datafusion_common::Result<ColumnarValue> { + todo!() + } +} + +pub fn st_geomtosedona_udf() -> SedonaScalarUDF { + let kernel = Arc::new(STGeomToSedonaSpark { + // out_type: WKB_GEOMETRY, + }); + + SedonaScalarUDF::new( + "st_geomtosedonaspark", + vec![kernel], + Volatility::Immutable, + Some(doc()), + ) +} + +fn doc() -> Documentation { + Documentation::builder( + DOC_SECTION_OTHER, + "Internal only, it's function used in the vectorized UDFs to translate WKB to Sedona Spark binary format", + "ST_GeomToSedonaSpark (geom: Geometry, crs: string)", + ) + .with_argument("geom", "wkb geometry") + .with_argument("crs", "crs: coordinate reference system") + .with_sql_example("SELECT ST_GeomToSedonaSpark(geom, 'EPSG:4326')") + .build() +} + + +#[cfg(test)] +mod tests { + use arrow_schema::DataType; + use sedona_schema::datatypes::{Edges, SedonaType}; + use sedona_testing::testers::ScalarUdfTester; + use crate::st_from_sedona_spark::st_geomfromsedona_udf; + use crate::st_to_sedona_spark::st_geomtosedona_udf; + + fn get_tester() -> ScalarUdfTester { + ScalarUdfTester::new( + st_geomtosedona_udf().into(), + vec![ + SedonaType::Wkb(Edges::Planar, None), + SedonaType::Arrow(DataType::Utf8), + ], + ) + } +} \ No newline at end of file diff --git a/rust/sedona-serde/src/lib.rs b/rust/sedona-serde/src/lib.rs index 1b96b08c..f9791efa 100644 --- a/rust/sedona-serde/src/lib.rs +++ b/rust/sedona-serde/src/lib.rs @@ -20,3 +20,4 @@ mod linestring; mod point; mod polygon; mod wkb; +mod serialize; diff --git a/rust/sedona-serde/src/serialize.rs b/rust/sedona-serde/src/serialize.rs new file mode 100644 index 00000000..8a1a4bdd --- /dev/null +++ b/rust/sedona-serde/src/serialize.rs @@ -0,0 +1,15 @@ +use arrow_array::builder::BinaryBuilder; +use byteorder::LittleEndian; +use datafusion_common::DataFusionError; +use crate::deserialize::parse_geometry; + +pub fn serialize(builder: &mut BinaryBuilder) -> datafusion_common::Result<()> { + use std::io::Cursor; + + // let mut reader = Cursor::new(bytes); + + // parse_geometry::<LittleEndian, LittleEndian>(builder, &mut reader, bytes) + Err(DataFusionError::NotImplemented( + "Serialization is not yet implemented".to_string(), + )) +} \ No newline at end of file
