This is an automated email from the ASF dual-hosted git repository.

imbruced pushed a commit to branch add-sedona-serializer
in repository https://gitbox.apache.org/repos/asf/sedona-db.git


The following commit(s) were added to refs/heads/add-sedona-serializer by this 
push:
     new c46b6bf3 restructure files
c46b6bf3 is described below

commit c46b6bf3bd402594382867188a376ca02ecc83d9
Author: pawelkocinski <[email protected]>
AuthorDate: Fri Dec 19 01:05:37 2025 +0100

    restructure files
---
 rust/sedona-functions/src/st_to_sedona_spark.rs |  2 +-
 rust/sedona-serde/src/deserialize.rs            | 34 +++++++-----
 rust/sedona-serde/src/linestring.rs             |  5 +-
 rust/sedona-serde/src/point.rs                  |  8 +--
 rust/sedona-serde/src/polygon.rs                |  8 +--
 rust/sedona-serde/src/serialize.rs              | 12 ++---
 rust/sedona/src/context.rs                      | 70 +++++++++++++++++++++++++
 7 files changed, 107 insertions(+), 32 deletions(-)

diff --git a/rust/sedona-functions/src/st_to_sedona_spark.rs 
b/rust/sedona-functions/src/st_to_sedona_spark.rs
index e5a77e41..e50ec875 100644
--- a/rust/sedona-functions/src/st_to_sedona_spark.rs
+++ b/rust/sedona-functions/src/st_to_sedona_spark.rs
@@ -17,7 +17,7 @@ impl SedonaScalarKernel for STGeomToSedonaSpark {
     fn return_type(&self, args: &[SedonaType]) -> 
datafusion_common::Result<Option<SedonaType>> {
         let matcher = ArgMatcher::new(
             vec![ArgMatcher::is_geometry()],
-            SedonaType::Arrow(DataType::BinaryView),
+            SedonaType::Arrow(DataType::Binary),
         );
 
         matcher.match_args(args)
diff --git a/rust/sedona-serde/src/deserialize.rs 
b/rust/sedona-serde/src/deserialize.rs
index dd004412..ead99dd5 100644
--- a/rust/sedona-serde/src/deserialize.rs
+++ b/rust/sedona-serde/src/deserialize.rs
@@ -15,9 +15,9 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use crate::linestring::{parse_linestring, parse_multilinestring};
-use crate::point::{parse_multipoint, parse_point, write_empty_point};
-use crate::polygon::{parse_multipolygon, parse_polygon, write_empty_polygon};
+use crate::linestring::{deserialize_linestring, deserialize_multilinestring};
+use crate::point::{deserialize_multipoint, deserialize_point, 
deserialize_empty_point};
+use crate::polygon::{deserialize_multipolygon, deserialize_polygon, 
deserialize_empty_polygon};
 use crate::wkb::write_wkb_byte_order_marker;
 use arrow_array::builder::BinaryBuilder;
 use byteorder::{ByteOrder, LittleEndian, ReadBytesExt, WriteBytesExt};
@@ -36,10 +36,10 @@ pub fn deserialize(builder: &mut BinaryBuilder, bytes: 
&[u8]) -> datafusion_comm
 
     let mut reader = Cursor::new(bytes);
 
-    parse_geometry::<LittleEndian, LittleEndian>(builder, &mut reader, bytes)
+    deserialize_geometry::<LittleEndian, LittleEndian>(builder, &mut reader, 
bytes)
 }
 
-pub fn parse_geometry<IN: ByteOrder, OUT: ByteOrder>(
+pub fn deserialize_geometry<IN: ByteOrder, OUT: ByteOrder>(
     builder: &mut BinaryBuilder,
     cursor: &mut Cursor<&[u8]>,
     bytes: &[u8],
@@ -50,6 +50,12 @@ pub fn parse_geometry<IN: ByteOrder, OUT: ByteOrder>(
 
     let dimension = get_dimension((preamble_byte) >> 1);
 
+    if dimension != Dimension::XY {
+        return Err(DataFusionError::Execution(
+            "Only 2D geometries (XY) are supported".to_string(),
+        ));
+    }
+
     let _has_srid = (preamble_byte & 0x01) != 0;
 
     cursor.set_position(cursor.position() + 3); // Skip 3 bytes
@@ -58,21 +64,21 @@ pub fn parse_geometry<IN: ByteOrder, OUT: ByteOrder>(
         1 => {
             let number_of_coordinates = cursor.read_u32::<IN>()?;
             if number_of_coordinates == 0 {
-                write_empty_point::<OUT>(builder, dimension)?;
+                deserialize_empty_point::<OUT>(builder, dimension)?;
                 return Ok(());
             }
 
-            parse_point::<OUT>(builder, cursor, dimension)?;
+            deserialize_point::<OUT>(builder, cursor, dimension)?;
         }
         2 => {
-            parse_linestring::<IN, OUT>(builder, cursor, dimension)?;
+            deserialize_linestring::<IN, OUT>(builder, cursor, dimension)?;
         }
         3 => {
             let mut meta_data_reader = Cursor::new(bytes);
 
             let number_of_points = cursor.read_u32::<IN>()?;
             if number_of_points == 0 {
-                write_empty_polygon::<OUT>(builder, dimension)?;
+                deserialize_empty_polygon::<OUT>(builder, dimension)?;
 
                 return Ok(());
             }
@@ -80,20 +86,20 @@ pub fn parse_geometry<IN: ByteOrder, OUT: ByteOrder>(
             let metadata_start_position = number_of_points * 8 * 2;
             meta_data_reader.set_position(cursor.position() + 
(metadata_start_position) as u64);
 
-            parse_polygon::<IN, OUT>(builder, cursor, &mut meta_data_reader, 
dimension)?;
+            deserialize_polygon::<IN, OUT>(builder, cursor, &mut 
meta_data_reader, dimension)?;
             cursor.set_position(meta_data_reader.position());
         }
         4 => {
-            parse_multipoint::<IN, OUT>(builder, cursor, dimension)?;
+            deserialize_multipoint::<IN, OUT>(builder, cursor, dimension)?;
         }
         5 => {
             let mut meta_data_reader = Cursor::new(bytes);
-            parse_multilinestring::<IN, OUT>(builder, cursor, &mut 
meta_data_reader, dimension)?;
+            deserialize_multilinestring::<IN, OUT>(builder, cursor, &mut 
meta_data_reader, dimension)?;
             cursor.set_position(meta_data_reader.position());
         }
         6 => {
             let mut meta_data_reader = Cursor::new(bytes);
-            parse_multipolygon::<IN, OUT>(builder, cursor, &mut 
meta_data_reader, dimension)?;
+            deserialize_multipolygon::<IN, OUT>(builder, cursor, &mut 
meta_data_reader, dimension)?;
             cursor.set_position(meta_data_reader.position());
         }
         7 => {
@@ -104,7 +110,7 @@ pub fn parse_geometry<IN: ByteOrder, OUT: ByteOrder>(
             builder.write_u32::<OUT>(number_of_geometries)?;
 
             for _i in 0..number_of_geometries {
-                parse_geometry::<IN, OUT>(builder, cursor, bytes)?;
+                deserialize_geometry::<IN, OUT>(builder, cursor, bytes)?;
             }
         }
         _ => {
diff --git a/rust/sedona-serde/src/linestring.rs 
b/rust/sedona-serde/src/linestring.rs
index 3fdf6b6e..14f630a0 100644
--- a/rust/sedona-serde/src/linestring.rs
+++ b/rust/sedona-serde/src/linestring.rs
@@ -30,7 +30,7 @@ fn get_linestring_marker(dimension: Dimension) -> u32 {
     }
 }
 
-pub fn parse_linestring<IN: ByteOrder, OUT: ByteOrder>(
+pub fn deserialize_linestring<IN: ByteOrder, OUT: ByteOrder>(
     builder: &mut BinaryBuilder,
     cursor: &mut Cursor<&[u8]>,
     dimension: Dimension,
@@ -53,7 +53,7 @@ pub fn parse_linestring<IN: ByteOrder, OUT: ByteOrder>(
     Ok(())
 }
 
-pub fn parse_multilinestring<IN: ByteOrder, OUT: ByteOrder>(
+pub fn deserialize_multilinestring<IN: ByteOrder, OUT: ByteOrder>(
     builder: &mut BinaryBuilder,
     cursor: &mut Cursor<&[u8]>,
     metadata_reader: &mut Cursor<&[u8]>,
@@ -145,7 +145,6 @@ pub fn serialize_multilinestring<OUT: ByteOrder>(
 
         let _number_of_points = cursor.read_u32::<OUT>()?;
         total_number_of_points += _number_of_points;
-        // number_of_points+= _number_of_points;
         metadata_cursor.write_u32::<OUT>(_number_of_points)?;
 
         for _ in 0.._number_of_points * 2 {
diff --git a/rust/sedona-serde/src/point.rs b/rust/sedona-serde/src/point.rs
index 5ca83892..b8234cc1 100644
--- a/rust/sedona-serde/src/point.rs
+++ b/rust/sedona-serde/src/point.rs
@@ -33,7 +33,7 @@ fn get_byte_type_for_point(dimension: Dimension) -> u32 {
     }
 }
 
-pub fn write_empty_point<OUT: ByteOrder>(
+pub fn deserialize_empty_point<OUT: ByteOrder>(
     builder: &mut BinaryBuilder,
     dimension: Dimension,
 ) -> Result<()> {
@@ -46,7 +46,7 @@ pub fn write_empty_point<OUT: ByteOrder>(
     Ok(())
 }
 
-pub fn parse_point<OUT: ByteOrder>(
+pub fn deserialize_point<OUT: ByteOrder>(
     builder: &mut BinaryBuilder,
     cursor: &mut Cursor<&[u8]>,
     dimension: Dimension,
@@ -65,7 +65,7 @@ pub fn parse_point<OUT: ByteOrder>(
     Ok(())
 }
 
-pub fn parse_multipoint<IN: ByteOrder, OUT: ByteOrder>(
+pub fn deserialize_multipoint<IN: ByteOrder, OUT: ByteOrder>(
     builder: &mut BinaryBuilder,
     cursor: &mut Cursor<&[u8]>,
     dimension: Dimension,
@@ -90,7 +90,7 @@ pub fn parse_multipoint<IN: ByteOrder, OUT: ByteOrder>(
     builder.write_u32::<OUT>(number_of_points)?;
 
     for _ in 0..number_of_points {
-        parse_point::<OUT>(builder, cursor, dimension)?;
+        deserialize_point::<OUT>(builder, cursor, dimension)?;
     }
 
     Ok(())
diff --git a/rust/sedona-serde/src/polygon.rs b/rust/sedona-serde/src/polygon.rs
index 08fad2b7..8d218d96 100644
--- a/rust/sedona-serde/src/polygon.rs
+++ b/rust/sedona-serde/src/polygon.rs
@@ -30,7 +30,7 @@ pub(crate) fn get_polygon_marker(dimension: Dimension) -> u32 
{
     }
 }
 
-pub fn parse_polygon<IN: ByteOrder, OUT: ByteOrder>(
+pub fn deserialize_polygon<IN: ByteOrder, OUT: ByteOrder>(
     builder: &mut BinaryBuilder,
     cursor: &mut Cursor<&[u8]>,
     metadata_reader: &mut Cursor<&[u8]>,
@@ -57,7 +57,7 @@ pub fn parse_polygon<IN: ByteOrder, OUT: ByteOrder>(
     Ok(())
 }
 
-pub(crate) fn parse_multipolygon<IN: ByteOrder, OUT: ByteOrder>(
+pub(crate) fn deserialize_multipolygon<IN: ByteOrder, OUT: ByteOrder>(
     builder: &mut BinaryBuilder,
     cursor: &mut Cursor<&[u8]>,
     metadata_reader: &mut Cursor<&[u8]>,
@@ -80,13 +80,13 @@ pub(crate) fn parse_multipolygon<IN: ByteOrder, OUT: 
ByteOrder>(
     builder.write_u32::<OUT>(number_of_geometries)?;
 
     for _ in 0..number_of_geometries {
-        parse_polygon::<IN, OUT>(builder, cursor, metadata_reader, dimension)?;
+        deserialize_polygon::<IN, OUT>(builder, cursor, metadata_reader, 
dimension)?;
     }
 
     Ok(())
 }
 
-pub(crate) fn write_empty_polygon<OUT: ByteOrder>(
+pub(crate) fn deserialize_empty_polygon<OUT: ByteOrder>(
     builder: &mut BinaryBuilder,
     dimension: Dimension,
 ) -> datafusion_common::Result<()> {
diff --git a/rust/sedona-serde/src/serialize.rs 
b/rust/sedona-serde/src/serialize.rs
index fe73a9c7..7d28440b 100644
--- a/rust/sedona-serde/src/serialize.rs
+++ b/rust/sedona-serde/src/serialize.rs
@@ -59,12 +59,12 @@ pub fn write_geometry<IN: ByteOrder, OUT: ByteOrder>(
     }
 
     match wkb_byte {
-        1 => return serialize_point::<LittleEndian>(builder, cursor),
-        2 => return serialize_linestring::<LittleEndian>(builder, cursor),
-        3 => return serialize_polygon::<LittleEndian>(builder, cursor),
-        4 => return serialize_multipoint::<LittleEndian>(builder, cursor),
-        5 => return serialize_multilinestring::<LittleEndian>(builder, cursor),
-        6 => return serialize_multipolygon::<LittleEndian>(builder, cursor),
+        1 => return serialize_point::<OUT>(builder, cursor),
+        2 => return serialize_linestring::<OUT>(builder, cursor),
+        3 => return serialize_polygon::<OUT>(builder, cursor),
+        4 => return serialize_multipoint::<OUT>(builder, cursor),
+        5 => return serialize_multilinestring::<OUT>(builder, cursor),
+        6 => return serialize_multipolygon::<OUT>(builder, cursor),
         7 => {
             let number_of_geometries = cursor.read_u32::<IN>()?;
             builder.write_u32::<OUT>(number_of_geometries)?;
diff --git a/rust/sedona/src/context.rs b/rust/sedona/src/context.rs
index 82947460..ef95e96b 100644
--- a/rust/sedona/src/context.rs
+++ b/rust/sedona/src/context.rs
@@ -758,4 +758,74 @@ mod tests {
         .await
         .expect("should succeed because aws and gcs options were stripped");
     }
+
+    #[tokio::test]
+    async fn test_sedona_spark_serde() -> Result<()> {
+        let ctx = SedonaContext::new();
+
+        let geometry_data = ctx.sql(
+            "SELECT
+                ST_AsText(
+                    
ST_GeomFromSedonaSpark(X'1200000001000000000000000000F03F000000000000F03F', 
'EPSG:4326')
+                ) AS geom"
+        )
+        .await?
+        .collect()
+        .await?;
+
+        assert_batches_eq!(
+            [
+                "+------------+",
+                "| geom       |",
+                "+------------+",
+                "| POINT(1 1) |",
+                "+------------+",
+            ],
+            &geometry_data
+        );
+
+        let srid_value = ctx.sql(
+            "SELECT
+                ST_SRID(
+                    
ST_GeomFromSedonaSpark(X'1200000001000000000000000000F03F000000000000F03F', 
'EPSG:4326')
+                ) AS srid"
+        )
+            .await?
+            .collect()
+            .await?;
+
+        assert_batches_eq!(
+            [
+                "+------+",
+                "| srid |",
+                "+------+",
+                "| 4326 |",
+                "+------+",
+            ],
+            &srid_value
+        );
+
+        let from_sedona_spark_and_reverse = ctx.sql(
+            "SELECT
+                ST_GeomToSedonaSpark(
+                    
ST_GeomFromSedonaSpark(X'1200000001000000000000000000F03F000000000000F03F', 
'EPSG:4326')
+                ) AS sedona_bytes"
+        )
+            .await?
+            .collect()
+            .await?;
+
+        assert_batches_eq!(
+            [
+                "+--------------------------------------------------+",
+                "| sedona_bytes                                     |",
+                "+--------------------------------------------------+",
+                "| 130010e601000000000000000000f03f000000000000f03f |",
+                "+--------------------------------------------------+",
+            ],
+            &from_sedona_spark_and_reverse
+        );
+
+        Ok(())
+    }
 }

Reply via email to