This is an automated email from the ASF dual-hosted git repository.
imbruced pushed a commit to branch add-sedona-serializer
in repository https://gitbox.apache.org/repos/asf/sedona-db.git
The following commit(s) were added to refs/heads/add-sedona-serializer by this
push:
new c46b6bf3 restructure files
c46b6bf3 is described below
commit c46b6bf3bd402594382867188a376ca02ecc83d9
Author: pawelkocinski <[email protected]>
AuthorDate: Fri Dec 19 01:05:37 2025 +0100
restructure files
---
rust/sedona-functions/src/st_to_sedona_spark.rs | 2 +-
rust/sedona-serde/src/deserialize.rs | 34 +++++++-----
rust/sedona-serde/src/linestring.rs | 5 +-
rust/sedona-serde/src/point.rs | 8 +--
rust/sedona-serde/src/polygon.rs | 8 +--
rust/sedona-serde/src/serialize.rs | 12 ++---
rust/sedona/src/context.rs | 70 +++++++++++++++++++++++++
7 files changed, 107 insertions(+), 32 deletions(-)
diff --git a/rust/sedona-functions/src/st_to_sedona_spark.rs
b/rust/sedona-functions/src/st_to_sedona_spark.rs
index e5a77e41..e50ec875 100644
--- a/rust/sedona-functions/src/st_to_sedona_spark.rs
+++ b/rust/sedona-functions/src/st_to_sedona_spark.rs
@@ -17,7 +17,7 @@ impl SedonaScalarKernel for STGeomToSedonaSpark {
fn return_type(&self, args: &[SedonaType]) ->
datafusion_common::Result<Option<SedonaType>> {
let matcher = ArgMatcher::new(
vec![ArgMatcher::is_geometry()],
- SedonaType::Arrow(DataType::BinaryView),
+ SedonaType::Arrow(DataType::Binary),
);
matcher.match_args(args)
diff --git a/rust/sedona-serde/src/deserialize.rs
b/rust/sedona-serde/src/deserialize.rs
index dd004412..ead99dd5 100644
--- a/rust/sedona-serde/src/deserialize.rs
+++ b/rust/sedona-serde/src/deserialize.rs
@@ -15,9 +15,9 @@
// specific language governing permissions and limitations
// under the License.
-use crate::linestring::{parse_linestring, parse_multilinestring};
-use crate::point::{parse_multipoint, parse_point, write_empty_point};
-use crate::polygon::{parse_multipolygon, parse_polygon, write_empty_polygon};
+use crate::linestring::{deserialize_linestring, deserialize_multilinestring};
+use crate::point::{deserialize_multipoint, deserialize_point,
deserialize_empty_point};
+use crate::polygon::{deserialize_multipolygon, deserialize_polygon,
deserialize_empty_polygon};
use crate::wkb::write_wkb_byte_order_marker;
use arrow_array::builder::BinaryBuilder;
use byteorder::{ByteOrder, LittleEndian, ReadBytesExt, WriteBytesExt};
@@ -36,10 +36,10 @@ pub fn deserialize(builder: &mut BinaryBuilder, bytes:
&[u8]) -> datafusion_comm
let mut reader = Cursor::new(bytes);
- parse_geometry::<LittleEndian, LittleEndian>(builder, &mut reader, bytes)
+ deserialize_geometry::<LittleEndian, LittleEndian>(builder, &mut reader,
bytes)
}
-pub fn parse_geometry<IN: ByteOrder, OUT: ByteOrder>(
+pub fn deserialize_geometry<IN: ByteOrder, OUT: ByteOrder>(
builder: &mut BinaryBuilder,
cursor: &mut Cursor<&[u8]>,
bytes: &[u8],
@@ -50,6 +50,12 @@ pub fn parse_geometry<IN: ByteOrder, OUT: ByteOrder>(
let dimension = get_dimension((preamble_byte) >> 1);
+ if dimension != Dimension::XY {
+ return Err(DataFusionError::Execution(
+ "Only 2D geometries (XY) are supported".to_string(),
+ ));
+ }
+
let _has_srid = (preamble_byte & 0x01) != 0;
cursor.set_position(cursor.position() + 3); // Skip 3 bytes
@@ -58,21 +64,21 @@ pub fn parse_geometry<IN: ByteOrder, OUT: ByteOrder>(
1 => {
let number_of_coordinates = cursor.read_u32::<IN>()?;
if number_of_coordinates == 0 {
- write_empty_point::<OUT>(builder, dimension)?;
+ deserialize_empty_point::<OUT>(builder, dimension)?;
return Ok(());
}
- parse_point::<OUT>(builder, cursor, dimension)?;
+ deserialize_point::<OUT>(builder, cursor, dimension)?;
}
2 => {
- parse_linestring::<IN, OUT>(builder, cursor, dimension)?;
+ deserialize_linestring::<IN, OUT>(builder, cursor, dimension)?;
}
3 => {
let mut meta_data_reader = Cursor::new(bytes);
let number_of_points = cursor.read_u32::<IN>()?;
if number_of_points == 0 {
- write_empty_polygon::<OUT>(builder, dimension)?;
+ deserialize_empty_polygon::<OUT>(builder, dimension)?;
return Ok(());
}
@@ -80,20 +86,20 @@ pub fn parse_geometry<IN: ByteOrder, OUT: ByteOrder>(
let metadata_start_position = number_of_points * 8 * 2;
meta_data_reader.set_position(cursor.position() +
(metadata_start_position) as u64);
- parse_polygon::<IN, OUT>(builder, cursor, &mut meta_data_reader,
dimension)?;
+ deserialize_polygon::<IN, OUT>(builder, cursor, &mut
meta_data_reader, dimension)?;
cursor.set_position(meta_data_reader.position());
}
4 => {
- parse_multipoint::<IN, OUT>(builder, cursor, dimension)?;
+ deserialize_multipoint::<IN, OUT>(builder, cursor, dimension)?;
}
5 => {
let mut meta_data_reader = Cursor::new(bytes);
- parse_multilinestring::<IN, OUT>(builder, cursor, &mut
meta_data_reader, dimension)?;
+ deserialize_multilinestring::<IN, OUT>(builder, cursor, &mut
meta_data_reader, dimension)?;
cursor.set_position(meta_data_reader.position());
}
6 => {
let mut meta_data_reader = Cursor::new(bytes);
- parse_multipolygon::<IN, OUT>(builder, cursor, &mut
meta_data_reader, dimension)?;
+ deserialize_multipolygon::<IN, OUT>(builder, cursor, &mut
meta_data_reader, dimension)?;
cursor.set_position(meta_data_reader.position());
}
7 => {
@@ -104,7 +110,7 @@ pub fn parse_geometry<IN: ByteOrder, OUT: ByteOrder>(
builder.write_u32::<OUT>(number_of_geometries)?;
for _i in 0..number_of_geometries {
- parse_geometry::<IN, OUT>(builder, cursor, bytes)?;
+ deserialize_geometry::<IN, OUT>(builder, cursor, bytes)?;
}
}
_ => {
diff --git a/rust/sedona-serde/src/linestring.rs
b/rust/sedona-serde/src/linestring.rs
index 3fdf6b6e..14f630a0 100644
--- a/rust/sedona-serde/src/linestring.rs
+++ b/rust/sedona-serde/src/linestring.rs
@@ -30,7 +30,7 @@ fn get_linestring_marker(dimension: Dimension) -> u32 {
}
}
-pub fn parse_linestring<IN: ByteOrder, OUT: ByteOrder>(
+pub fn deserialize_linestring<IN: ByteOrder, OUT: ByteOrder>(
builder: &mut BinaryBuilder,
cursor: &mut Cursor<&[u8]>,
dimension: Dimension,
@@ -53,7 +53,7 @@ pub fn parse_linestring<IN: ByteOrder, OUT: ByteOrder>(
Ok(())
}
-pub fn parse_multilinestring<IN: ByteOrder, OUT: ByteOrder>(
+pub fn deserialize_multilinestring<IN: ByteOrder, OUT: ByteOrder>(
builder: &mut BinaryBuilder,
cursor: &mut Cursor<&[u8]>,
metadata_reader: &mut Cursor<&[u8]>,
@@ -145,7 +145,6 @@ pub fn serialize_multilinestring<OUT: ByteOrder>(
let _number_of_points = cursor.read_u32::<OUT>()?;
total_number_of_points += _number_of_points;
- // number_of_points+= _number_of_points;
metadata_cursor.write_u32::<OUT>(_number_of_points)?;
for _ in 0.._number_of_points * 2 {
diff --git a/rust/sedona-serde/src/point.rs b/rust/sedona-serde/src/point.rs
index 5ca83892..b8234cc1 100644
--- a/rust/sedona-serde/src/point.rs
+++ b/rust/sedona-serde/src/point.rs
@@ -33,7 +33,7 @@ fn get_byte_type_for_point(dimension: Dimension) -> u32 {
}
}
-pub fn write_empty_point<OUT: ByteOrder>(
+pub fn deserialize_empty_point<OUT: ByteOrder>(
builder: &mut BinaryBuilder,
dimension: Dimension,
) -> Result<()> {
@@ -46,7 +46,7 @@ pub fn write_empty_point<OUT: ByteOrder>(
Ok(())
}
-pub fn parse_point<OUT: ByteOrder>(
+pub fn deserialize_point<OUT: ByteOrder>(
builder: &mut BinaryBuilder,
cursor: &mut Cursor<&[u8]>,
dimension: Dimension,
@@ -65,7 +65,7 @@ pub fn parse_point<OUT: ByteOrder>(
Ok(())
}
-pub fn parse_multipoint<IN: ByteOrder, OUT: ByteOrder>(
+pub fn deserialize_multipoint<IN: ByteOrder, OUT: ByteOrder>(
builder: &mut BinaryBuilder,
cursor: &mut Cursor<&[u8]>,
dimension: Dimension,
@@ -90,7 +90,7 @@ pub fn parse_multipoint<IN: ByteOrder, OUT: ByteOrder>(
builder.write_u32::<OUT>(number_of_points)?;
for _ in 0..number_of_points {
- parse_point::<OUT>(builder, cursor, dimension)?;
+ deserialize_point::<OUT>(builder, cursor, dimension)?;
}
Ok(())
diff --git a/rust/sedona-serde/src/polygon.rs b/rust/sedona-serde/src/polygon.rs
index 08fad2b7..8d218d96 100644
--- a/rust/sedona-serde/src/polygon.rs
+++ b/rust/sedona-serde/src/polygon.rs
@@ -30,7 +30,7 @@ pub(crate) fn get_polygon_marker(dimension: Dimension) -> u32
{
}
}
-pub fn parse_polygon<IN: ByteOrder, OUT: ByteOrder>(
+pub fn deserialize_polygon<IN: ByteOrder, OUT: ByteOrder>(
builder: &mut BinaryBuilder,
cursor: &mut Cursor<&[u8]>,
metadata_reader: &mut Cursor<&[u8]>,
@@ -57,7 +57,7 @@ pub fn parse_polygon<IN: ByteOrder, OUT: ByteOrder>(
Ok(())
}
-pub(crate) fn parse_multipolygon<IN: ByteOrder, OUT: ByteOrder>(
+pub(crate) fn deserialize_multipolygon<IN: ByteOrder, OUT: ByteOrder>(
builder: &mut BinaryBuilder,
cursor: &mut Cursor<&[u8]>,
metadata_reader: &mut Cursor<&[u8]>,
@@ -80,13 +80,13 @@ pub(crate) fn parse_multipolygon<IN: ByteOrder, OUT:
ByteOrder>(
builder.write_u32::<OUT>(number_of_geometries)?;
for _ in 0..number_of_geometries {
- parse_polygon::<IN, OUT>(builder, cursor, metadata_reader, dimension)?;
+ deserialize_polygon::<IN, OUT>(builder, cursor, metadata_reader,
dimension)?;
}
Ok(())
}
-pub(crate) fn write_empty_polygon<OUT: ByteOrder>(
+pub(crate) fn deserialize_empty_polygon<OUT: ByteOrder>(
builder: &mut BinaryBuilder,
dimension: Dimension,
) -> datafusion_common::Result<()> {
diff --git a/rust/sedona-serde/src/serialize.rs
b/rust/sedona-serde/src/serialize.rs
index fe73a9c7..7d28440b 100644
--- a/rust/sedona-serde/src/serialize.rs
+++ b/rust/sedona-serde/src/serialize.rs
@@ -59,12 +59,12 @@ pub fn write_geometry<IN: ByteOrder, OUT: ByteOrder>(
}
match wkb_byte {
- 1 => return serialize_point::<LittleEndian>(builder, cursor),
- 2 => return serialize_linestring::<LittleEndian>(builder, cursor),
- 3 => return serialize_polygon::<LittleEndian>(builder, cursor),
- 4 => return serialize_multipoint::<LittleEndian>(builder, cursor),
- 5 => return serialize_multilinestring::<LittleEndian>(builder, cursor),
- 6 => return serialize_multipolygon::<LittleEndian>(builder, cursor),
+ 1 => return serialize_point::<OUT>(builder, cursor),
+ 2 => return serialize_linestring::<OUT>(builder, cursor),
+ 3 => return serialize_polygon::<OUT>(builder, cursor),
+ 4 => return serialize_multipoint::<OUT>(builder, cursor),
+ 5 => return serialize_multilinestring::<OUT>(builder, cursor),
+ 6 => return serialize_multipolygon::<OUT>(builder, cursor),
7 => {
let number_of_geometries = cursor.read_u32::<IN>()?;
builder.write_u32::<OUT>(number_of_geometries)?;
diff --git a/rust/sedona/src/context.rs b/rust/sedona/src/context.rs
index 82947460..ef95e96b 100644
--- a/rust/sedona/src/context.rs
+++ b/rust/sedona/src/context.rs
@@ -758,4 +758,74 @@ mod tests {
.await
.expect("should succeed because aws and gcs options were stripped");
}
+
+ #[tokio::test]
+ async fn test_sedona_spark_serde() -> Result<()> {
+ let ctx = SedonaContext::new();
+
+ let geometry_data = ctx.sql(
+ "SELECT
+ ST_AsText(
+
ST_GeomFromSedonaSpark(X'1200000001000000000000000000F03F000000000000F03F',
'EPSG:4326')
+ ) AS geom"
+ )
+ .await?
+ .collect()
+ .await?;
+
+ assert_batches_eq!(
+ [
+ "+------------+",
+ "| geom |",
+ "+------------+",
+ "| POINT(1 1) |",
+ "+------------+",
+ ],
+ &geometry_data
+ );
+
+ let srid_value = ctx.sql(
+ "SELECT
+ ST_SRID(
+
ST_GeomFromSedonaSpark(X'1200000001000000000000000000F03F000000000000F03F',
'EPSG:4326')
+ ) AS srid"
+ )
+ .await?
+ .collect()
+ .await?;
+
+ assert_batches_eq!(
+ [
+ "+------+",
+ "| srid |",
+ "+------+",
+ "| 4326 |",
+ "+------+",
+ ],
+ &srid_value
+ );
+
+ let from_sedona_spark_and_reverse = ctx.sql(
+ "SELECT
+ ST_GeomToSedonaSpark(
+
ST_GeomFromSedonaSpark(X'1200000001000000000000000000F03F000000000000F03F',
'EPSG:4326')
+ ) AS sedona_bytes"
+ )
+ .await?
+ .collect()
+ .await?;
+
+ assert_batches_eq!(
+ [
+ "+--------------------------------------------------+",
+ "| sedona_bytes |",
+ "+--------------------------------------------------+",
+ "| 130010e601000000000000000000f03f000000000000f03f |",
+ "+--------------------------------------------------+",
+ ],
+ &from_sedona_spark_and_reverse
+ );
+
+ Ok(())
+ }
}