This is an automated email from the ASF dual-hosted git repository.
paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/sedona-db.git
The following commit(s) were added to refs/heads/main by this push:
new 2afc1b9b feat(rust/sedona-functions): Implement ordering framework
that includes geometry/geography (#360)
2afc1b9b is described below
commit 2afc1b9babc82c5e1adc93076d6c4b75c3cf0111
Author: Dewey Dunnington <[email protected]>
AuthorDate: Wed Nov 26 14:01:52 2025 -0600
feat(rust/sedona-functions): Implement ordering framework that includes
geometry/geography (#360)
Co-authored-by: Copilot <[email protected]>
---
c/sedona-geos/src/st_minimumclearance_line.rs | 4 +-
c/sedona-proj/src/lib.rs | 1 +
c/sedona-proj/src/sd_order_lnglat.rs | 205 +++++++++++++++++++++
c/sedona-proj/src/st_transform.rs | 4 +-
c/sedona-s2geography/src/geography_glue.cc | 8 +
c/sedona-s2geography/src/geography_glue.h | 2 +
c/sedona-s2geography/src/geography_glue_bindgen.rs | 1 +
c/sedona-s2geography/src/s2geography.rs | 26 +++
python/sedonadb/tests/functions/test_order.py | 48 +++++
rust/sedona-functions/src/executor.rs | 3 +-
rust/sedona-functions/src/lib.rs | 1 +
rust/sedona-functions/src/register.rs | 1 +
rust/sedona-functions/src/sd_order.rs | 118 ++++++++++++
rust/sedona-schema/src/crs.rs | 37 ++++
rust/sedona/src/context.rs | 18 +-
15 files changed, 470 insertions(+), 7 deletions(-)
diff --git a/c/sedona-geos/src/st_minimumclearance_line.rs
b/c/sedona-geos/src/st_minimumclearance_line.rs
index 6af8ceb7..313bf54d 100644
--- a/c/sedona-geos/src/st_minimumclearance_line.rs
+++ b/c/sedona-geos/src/st_minimumclearance_line.rs
@@ -113,7 +113,7 @@ mod tests {
Some("POLYGON((0 0,0 3,3 3,3 0,0 0),(1 1,1 2,2 2,2 1,1 1))"),
Some("POLYGON((0 0,0 1,0 1,1 1,1 0,0 0,0 0))"),
Some("LINESTRING (0 0, 1 1, 2 2)"),
- Some("MULTIPOLYGON(((0.5 0.5,0 0,0 1,0.5 0.5)),((0.5 0.5,1 1,1
0,0.5 0.5)),((2.5 2.5,2 2,2 3,2.5 2.5)),((2.5 2.5,3 3,3 2,2.5 2.5)))"),
+ Some("MULTIPOLYGON(((0.5 0.5,0 0,0 1,0.5 0.5)),((0.5 0.5,1 1,1
0,0.5 0.5)),((2.5 2.5,2 2,2 3,2.5 2.5)),((2.5 2.5,3.5 3.5,3.5 1.5,2.5 2.5)))"),
Some("POINT (1 1)"),
Some("GEOMETRYCOLLECTION(POINT(1 1),MULTIPOLYGON(((0 2,1 1,0 0,0
2)),((2 0,1 1,2 2,2 0))))"),
Some("POLYGON EMPTY"),
@@ -128,7 +128,7 @@ mod tests {
Some("LINESTRING(1 1,1 2)"),
Some("LINESTRING(0 0,0 1)"),
Some("LINESTRING(0 0,1 1)"),
- Some("LINESTRING(2.5 2.5,3 2.5)"),
+ Some("LINESTRING(2.5 2.5,2 2.5)"),
Some("LINESTRING EMPTY"),
Some("LINESTRING(1 1,2 1)"),
Some("LINESTRING EMPTY"),
diff --git a/c/sedona-proj/src/lib.rs b/c/sedona-proj/src/lib.rs
index 7dc01963..914bb031 100644
--- a/c/sedona-proj/src/lib.rs
+++ b/c/sedona-proj/src/lib.rs
@@ -18,5 +18,6 @@ pub mod error;
mod proj;
mod proj_dyn_bindgen;
pub mod register;
+pub mod sd_order_lnglat;
mod st_transform;
pub mod transform;
diff --git a/c/sedona-proj/src/sd_order_lnglat.rs
b/c/sedona-proj/src/sd_order_lnglat.rs
new file mode 100644
index 00000000..7277bb6e
--- /dev/null
+++ b/c/sedona-proj/src/sd_order_lnglat.rs
@@ -0,0 +1,205 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::{fmt::Debug, sync::Arc};
+
+use arrow_array::builder::UInt64Builder;
+use arrow_schema::DataType;
+use datafusion_common::{DataFusionError, Result};
+use datafusion_expr::ColumnarValue;
+use sedona_expr::scalar_udf::SedonaScalarKernel;
+use sedona_functions::executor::WkbBytesExecutor;
+use sedona_geometry::{transform::CrsEngine, wkb_header::WkbHeader};
+use sedona_schema::{crs::lnglat, datatypes::SedonaType, matchers::ArgMatcher};
+
+use crate::st_transform::with_global_proj_engine;
+
+/// Generic scalar kernel for sd_order based on the first coordinate
+/// of a geometry projected to lon/lat
+///
+/// This [SedonaScalarKernel] requires the actual function (e.g., S2, H3,
+/// or A5 cell identifier) to be provided but takes care of the extraction
+/// of the first coordinate and projecting to lon/lat space. The provided
+/// function must return a `u64`.
+pub struct OrderLngLat<F> {
+ order_fn: F,
+}
+
+impl<F: Fn((f64, f64)) -> u64> OrderLngLat<F> {
+ /// Create a new kernel from the required function type
+ pub fn new(order_fn: F) -> Self {
+ Self { order_fn }
+ }
+}
+
+impl<F: Fn((f64, f64)) -> u64> Debug for OrderLngLat<F> {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ f.debug_struct("OrderLngLat").finish()
+ }
+}
+
+impl<F: Fn((f64, f64)) -> u64> SedonaScalarKernel for OrderLngLat<F> {
+ fn return_type(&self, args: &[SedonaType]) -> Result<Option<SedonaType>> {
+ let matcher = ArgMatcher::new(
+ vec![ArgMatcher::is_geometry_or_geography()],
+ SedonaType::Arrow(DataType::UInt64),
+ );
+ matcher.match_args(args)
+ }
+
+ fn invoke_batch(
+ &self,
+ arg_types: &[SedonaType],
+ args: &[ColumnarValue],
+ ) -> Result<ColumnarValue> {
+ // Extract the source CRS, checking for lon/lat to see if we can avoid
+ // a transformation. If the CRS is missing we also skip any particular
+ // transform (although the resulting sort may not be effective).
+ let maybe_src_crs = match &arg_types[0] {
+ SedonaType::Wkb(_, maybe_crs) | SedonaType::WkbView(_, maybe_crs)
+ if maybe_crs != &lnglat() =>
+ {
+ maybe_crs.as_ref().map(|crs| crs.to_crs_string())
+ }
+ _ => None,
+ };
+
+ let executor = WkbBytesExecutor::new(arg_types, args);
+ let mut builder =
UInt64Builder::with_capacity(executor.num_iterations());
+
+ // If we have a source CRS (i.e., the source CRS was present and not
lon/lat already)
+ // resolve the transform and apply it to the first coord before
applying the order_fn.
+ // Otherwise, skip the transform and go straight to the order_fn. This
approach allows
+ // this to be used even if PROJ isn't available (as long as the data
were lon/lat
+ // already).
+ if let Some(src_crs) = maybe_src_crs {
+ with_global_proj_engine(|engine| {
+ let to_lnglat = engine
+ .get_transform_crs_to_crs(&src_crs, "OGC:CRS84", None, "")
+ .map_err(|e| DataFusionError::Execution(format!("{e}")))?;
+
+ executor.execute_wkb_void(|maybe_wkb| {
+ match maybe_wkb {
+ Some(wkb_bytes) => {
+ let header = WkbHeader::try_new(wkb_bytes)
+ .map_err(|e|
DataFusionError::Execution(format!("{e}")))?;
+ let mut first_xy = header.first_xy();
+ to_lnglat
+ .transform_coord(&mut first_xy)
+ .map_err(|e|
DataFusionError::Execution(format!("{e}")))?;
+ let order = (self.order_fn)(first_xy);
+ builder.append_value(order);
+ }
+ None => builder.append_null(),
+ }
+
+ Ok(())
+ })?;
+
+ Ok(())
+ })?;
+ } else {
+ executor.execute_wkb_void(|maybe_wkb| {
+ match maybe_wkb {
+ Some(wkb_bytes) => {
+ let header = WkbHeader::try_new(wkb_bytes)
+ .map_err(|e|
DataFusionError::Execution(format!("{e}")))?;
+ let first_xy = header.first_xy();
+ let order = (self.order_fn)(first_xy);
+ builder.append_value(order);
+ }
+ None => builder.append_null(),
+ }
+
+ Ok(())
+ })?;
+ }
+
+ executor.finish(Arc::new(builder.finish()))
+ }
+}
+
+#[cfg(test)]
+mod test {
+
+ use arrow_array::{create_array, ArrayRef};
+ use sedona_expr::scalar_udf::SedonaScalarUDF;
+ use sedona_schema::datatypes::{Edges, WKB_GEOMETRY};
+ use sedona_testing::{create::create_array, testers::ScalarUdfTester};
+ use serde_json::Value;
+
+ use super::*;
+
+ #[test]
+ fn order_geometry() {
+ // For testing, sort by first (rounded) x value
+ let kernel = OrderLngLat::new(|(lng, lat)| {
+ if lng.is_nan() || lat.is_nan() {
+ u64::MAX
+ } else {
+ lng as u64
+ }
+ });
+ let udf = SedonaScalarUDF::from_kernel("sd_order", Arc::new(kernel));
+
+ let array = create_array(
+ &[
+ // POINT (1 2) in EPSG:3857
+ Some("POINT (111320 222685)"),
+ Some("POINT EMPTY"),
+ None,
+ // POINT (0 1) in EPSG:3857
+ Some("POINT (0 111326)"),
+ ],
+ &WKB_GEOMETRY,
+ );
+
+ // Check the None Crs case
+ let tester = ScalarUdfTester::new(udf.clone().into(),
vec![WKB_GEOMETRY]);
+ tester.assert_return_type(DataType::UInt64);
+
+ let result = tester.invoke_array(array.clone()).unwrap();
+ let expected =
+ create_array!(UInt64, [Some(111320), Some(u64::MAX), None,
Some(0)]) as ArrayRef;
+ assert_eq!(&result, &expected);
+
+ // Check the "already lnglat" case
+ let tester = ScalarUdfTester::new(
+ udf.clone().into(),
+ vec![SedonaType::Wkb(Edges::Planar, lnglat())],
+ );
+ tester.assert_return_type(DataType::UInt64);
+
+ let result = tester.invoke_array(array.clone()).unwrap();
+ let expected =
+ create_array!(UInt64, [Some(111320), Some(u64::MAX), None,
Some(0)]) as ArrayRef;
+ assert_eq!(&result, &expected);
+
+ // Check the "not already lnglat" case
+ let crs =
+
sedona_schema::crs::deserialize_crs(&Value::String("EPSG:3857".to_string())).unwrap();
+ let tester = ScalarUdfTester::new(
+ udf.clone().into(),
+ vec![SedonaType::Wkb(Edges::Planar, crs)],
+ );
+ tester.assert_return_type(DataType::UInt64);
+
+ let result = tester.invoke_array(array.clone()).unwrap();
+ let expected = create_array!(UInt64, [Some(1), Some(u64::MAX), None,
Some(0)]) as ArrayRef;
+ assert_eq!(&result, &expected);
+ }
+}
diff --git a/c/sedona-proj/src/st_transform.rs
b/c/sedona-proj/src/st_transform.rs
index 97f0781c..8e31dfb9 100644
--- a/c/sedona-proj/src/st_transform.rs
+++ b/c/sedona-proj/src/st_transform.rs
@@ -65,7 +65,7 @@ pub fn configure_global_proj_engine(builder:
ProjCrsEngineBuilder) -> Result<()>
/// Do something with the global thread-local PROJ engine, creating it if it
has not
/// already been created.
-fn with_global_proj_engine(
+pub(crate) fn with_global_proj_engine(
mut func: impl FnMut(&CachingCrsEngine<ProjCrsEngine>) -> Result<()>,
) -> Result<()> {
PROJ_ENGINE.with(|engine_cell| {
@@ -293,7 +293,7 @@ fn invoke_scalar(wkb: &Wkb, trans: &dyn CrsTransform,
builder: &mut BinaryBuilde
fn parse_source_crs(source_type: &SedonaType) -> Result<Option<String>> {
match source_type {
SedonaType::Wkb(_, Some(crs)) | SedonaType::WkbView(_, Some(crs)) => {
- crs.to_authority_code()
+ Ok(Some(crs.to_crs_string()))
}
_ => Ok(None),
}
diff --git a/c/sedona-s2geography/src/geography_glue.cc
b/c/sedona-s2geography/src/geography_glue.cc
index 9a5cf44a..13f489cd 100644
--- a/c/sedona-s2geography/src/geography_glue.cc
+++ b/c/sedona-s2geography/src/geography_glue.cc
@@ -69,6 +69,14 @@ double SedonaGeographyGlueTestLinkage(void) {
return S2Earth::RadiusMeters() * s2_distance(index1, index2);
}
+uint64_t SedonaGeographyGlueLngLatToCellId(double lng, double lat) {
+ if (std::isnan(lng) || std::isnan(lat)) {
+ return S2CellId::Sentinel().id();
+ } else {
+ return S2CellId(S2LatLng::FromDegrees(lat,
lng).Normalized().ToPoint()).id();
+ }
+}
+
struct UdfExporter {
static void Export(std::unique_ptr<s2geography::arrow_udf::ArrowUDF> udf,
struct SedonaGeographyArrowUdf* out) {
diff --git a/c/sedona-s2geography/src/geography_glue.h
b/c/sedona-s2geography/src/geography_glue.h
index 5028eb00..1d37ca32 100644
--- a/c/sedona-s2geography/src/geography_glue.h
+++ b/c/sedona-s2geography/src/geography_glue.h
@@ -43,6 +43,8 @@ const char* SedonaGeographyGlueAbseilVersion(void);
double SedonaGeographyGlueTestLinkage(void);
+uint64_t SedonaGeographyGlueLngLatToCellId(double lng, double lat);
+
struct SedonaGeographyArrowUdf {
int (*init)(struct SedonaGeographyArrowUdf* self, struct ArrowSchema*
arg_schema,
const char* options, struct ArrowSchema* out);
diff --git a/c/sedona-s2geography/src/geography_glue_bindgen.rs
b/c/sedona-s2geography/src/geography_glue_bindgen.rs
index 0865b881..45e09b7b 100644
--- a/c/sedona-s2geography/src/geography_glue_bindgen.rs
+++ b/c/sedona-s2geography/src/geography_glue_bindgen.rs
@@ -69,6 +69,7 @@ unsafe extern "C" {
pub fn SedonaGeographyGlueS2GeometryVersion() -> *const c_char;
pub fn SedonaGeographyGlueAbseilVersion() -> *const c_char;
pub fn SedonaGeographyGlueTestLinkage() -> f64;
+ pub fn SedonaGeographyGlueLngLatToCellId(lng: f64, lat: f64) -> u64;
declare_s2_c_udfs!(
Area,
diff --git a/c/sedona-s2geography/src/s2geography.rs
b/c/sedona-s2geography/src/s2geography.rs
index f02b6353..f2a82da2 100644
--- a/c/sedona-s2geography/src/s2geography.rs
+++ b/c/sedona-s2geography/src/s2geography.rs
@@ -24,6 +24,16 @@ use arrow_schema::{ArrowError, Fields, Schema};
use crate::{error::S2GeographyError, geography_glue_bindgen::*};
+/// Compute an S2 Cell identifier from a longitude/latitude pair
+///
+/// If either longitude or latitude are NaN (e.g., an empty point),
+/// the sentinel cell (`u64::MAX`) is returned. Lon/Lat pairs are
+/// normalized such that invalid lon/lat pairs will still compute
+/// a result (even though that result may be difficult to interpret).
+pub fn s2_cell_id_from_lnglat(lnglat: (f64, f64)) -> u64 {
+ unsafe { SedonaGeographyGlueLngLatToCellId(lnglat.0, lnglat.1) }
+}
+
/// Wrapper for scalar UDFs exposed by s2geography::arrow_udf
///
/// Provides a minimal wrapper around the C callables that define
@@ -258,6 +268,22 @@ mod test {
use super::*;
+ #[test]
+ fn test_s2_cell_id_from_lnglat() {
+ // Check a single, finite cell
+ assert_eq!(s2_cell_id_from_lnglat((0.0, 0.0)), 1152921504606846977);
+
+ // Emptyish cases should return the sentinel cell
+ assert_eq!(s2_cell_id_from_lnglat((f64::NAN, 0.0)), u64::MAX);
+ assert_eq!(s2_cell_id_from_lnglat((0.0, f64::NAN)), u64::MAX);
+ assert_eq!(s2_cell_id_from_lnglat((f64::NAN, f64::NAN)), u64::MAX);
+
+ // These should both return something (even if what it returns is
difficult
+ // to interpret)
+ assert_ne!(s2_cell_id_from_lnglat((181.0, 0.0)), u64::MAX);
+ assert_ne!(s2_cell_id_from_lnglat((0.0, 91.0)), u64::MAX);
+ }
+
#[test]
fn scalar_udf() {
let mut udf = S2ScalarUDF::Length();
diff --git a/python/sedonadb/tests/functions/test_order.py
b/python/sedonadb/tests/functions/test_order.py
new file mode 100644
index 00000000..4e96d1a4
--- /dev/null
+++ b/python/sedonadb/tests/functions/test_order.py
@@ -0,0 +1,48 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import geopandas
+import geopandas.testing
+
+
+def test_order_sql(con):
+ wkt_unsorted = [
+ None,
+ "POINT EMPTY",
+ "POINT (-80 -80)",
+ "POINT (80 80)",
+ "POINT (-79 -79)",
+ ]
+ wkt_sorted = [
+ "POINT (80 80)",
+ "POINT (-80 -80)",
+ "POINT (-79 -79)",
+ "POINT EMPTY",
+ None,
+ ]
+ gdf_unsorted = geopandas.GeoDataFrame(
+ {"geometry": geopandas.GeoSeries.from_wkt(wkt_unsorted)}
+ )
+ gdf_sorted = geopandas.GeoDataFrame(
+ {"geometry": geopandas.GeoSeries.from_wkt(wkt_sorted)}
+ )
+
+ con.create_data_frame(gdf_unsorted).to_view("unsorted", overwrite=True)
+ hopefully_sorted = con.sql(
+ "SELECT * FROM unsorted ORDER BY sd_order(geometry)"
+ ).to_pandas()
+ geopandas.testing.assert_geodataframe_equal(hopefully_sorted, gdf_sorted)
diff --git a/rust/sedona-functions/src/executor.rs
b/rust/sedona-functions/src/executor.rs
index 0a98e71d..0686070f 100644
--- a/rust/sedona-functions/src/executor.rs
+++ b/rust/sedona-functions/src/executor.rs
@@ -267,8 +267,7 @@ impl GeometryFactory for WkbBytesFactory {
/// This [GenericExecutor] implementation provides more optimization
opportunities,
/// but it requires additional manual processing of the raw [Wkb] bytes
compared to
/// the [WkbExecutor].
-pub(crate) type WkbBytesExecutor<'a, 'b> =
- GenericExecutor<'a, 'b, WkbBytesFactory, WkbBytesFactory>;
+pub type WkbBytesExecutor<'a, 'b> = GenericExecutor<'a, 'b, WkbBytesFactory,
WkbBytesFactory>;
/// Trait for iterating over a container type as geometry scalars
///
diff --git a/rust/sedona-functions/src/lib.rs b/rust/sedona-functions/src/lib.rs
index a6003043..a7488716 100644
--- a/rust/sedona-functions/src/lib.rs
+++ b/rust/sedona-functions/src/lib.rs
@@ -22,6 +22,7 @@ mod predicates;
mod referencing;
pub mod register;
mod sd_format;
+pub mod sd_order;
pub mod st_analyze_agg;
mod st_area;
mod st_asbinary;
diff --git a/rust/sedona-functions/src/register.rs
b/rust/sedona-functions/src/register.rs
index cdae47b8..2b9ecb39 100644
--- a/rust/sedona-functions/src/register.rs
+++ b/rust/sedona-functions/src/register.rs
@@ -61,6 +61,7 @@ pub fn default_function_set() -> FunctionSet {
crate::referencing::st_line_interpolate_point_udf,
crate::referencing::st_line_locate_point_udf,
crate::sd_format::sd_format_udf,
+ crate::sd_order::sd_order_udf,
crate::st_area::st_area_udf,
crate::st_asbinary::st_asbinary_udf,
crate::st_astext::st_astext_udf,
diff --git a/rust/sedona-functions/src/sd_order.rs
b/rust/sedona-functions/src/sd_order.rs
new file mode 100644
index 00000000..8419b57a
--- /dev/null
+++ b/rust/sedona-functions/src/sd_order.rs
@@ -0,0 +1,118 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use datafusion_common::Result;
+use datafusion_expr::{
+ scalar_doc_sections::DOC_SECTION_OTHER, ColumnarValue, Documentation,
Volatility,
+};
+use sedona_expr::scalar_udf::{SedonaScalarKernel, SedonaScalarUDF};
+use sedona_schema::datatypes::SedonaType;
+use std::{fmt::Debug, sync::Arc};
+
+/// SD_Order() scalar UDF implementation
+///
+/// This function is invoked to obtain a proxy array whose order may be used
+/// to sort based on the value. The default implementation returns the value
+/// and a utility is provided to order geometry and/or geographies based on
+/// the first coordinate. More sophisticated sorting (e.g., XZ2) may be added
+/// in the future.
+pub fn sd_order_udf() -> SedonaScalarUDF {
+ SedonaScalarUDF::new(
+ "sd_order",
+ vec![Arc::new(SDOrderDefault {})],
+ Volatility::Immutable,
+ Some(sd_order_doc()),
+ )
+}
+
+fn sd_order_doc() -> Documentation {
+ Documentation::builder(
+ DOC_SECTION_OTHER,
+ "Return an arbitrary value that may be used to sort the input.",
+ "SD_Order (value: Any)",
+ )
+ .with_argument("value", "Any: An arbitrary value")
+ .with_sql_example("SELECT SD_Order()")
+ .build()
+}
+
+/// Default implementation that returns its input (i.e., by default, just
+/// do whatever DataFusion would have done with the value)
+#[derive(Debug)]
+struct SDOrderDefault {}
+
+impl SedonaScalarKernel for SDOrderDefault {
+ fn return_type(&self, args: &[SedonaType]) -> Result<Option<SedonaType>> {
+ if args.len() != 1 {
+ return Ok(None);
+ }
+
+ Ok(Some(args[0].clone()))
+ }
+
+ fn invoke_batch(
+ &self,
+ _arg_types: &[SedonaType],
+ args: &[ColumnarValue],
+ ) -> Result<ColumnarValue> {
+ Ok(args[0].clone())
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+ use arrow_array::{create_array, ArrayRef};
+ use arrow_schema::DataType;
+ use datafusion_common::ScalarValue;
+ use datafusion_expr::ScalarUDF;
+ use rstest::rstest;
+ use sedona_schema::datatypes::SedonaType;
+ use sedona_testing::testers::ScalarUdfTester;
+
+ #[test]
+ fn udf_metadata() {
+ let udf: ScalarUDF = sd_order_udf().into();
+ assert_eq!(udf.name(), "sd_order");
+ assert!(udf.documentation().is_some())
+ }
+
+ #[rstest]
+ fn order_not_geometry(
+ #[values(
+ SedonaType::Arrow(DataType::Utf8),
+ SedonaType::Arrow(DataType::LargeUtf8)
+ )]
+ sedona_type: SedonaType,
+ ) {
+ let udf = sd_order_udf();
+ let tester = ScalarUdfTester::new(udf.clone().into(),
vec![sedona_type.clone()]);
+ tester.assert_return_type(sedona_type.clone());
+
+ tester.assert_scalar_result_equals("foofy", "foofy");
+ tester.assert_scalar_result_equals(ScalarValue::Null,
ScalarValue::Null);
+
+ let array: ArrayRef = create_array!(Utf8, [Some("foofy"), None,
Some("other foofy")]);
+ let array_casted = ColumnarValue::Array(array)
+ .cast_to(sedona_type.storage_type(), None)
+ .unwrap()
+ .to_array(3)
+ .unwrap();
+ let result = tester.invoke_array(array_casted.clone()).unwrap();
+ assert_eq!(&array_casted, &result);
+ }
+}
diff --git a/rust/sedona-schema/src/crs.rs b/rust/sedona-schema/src/crs.rs
index 72cd727e..3d73e354 100644
--- a/rust/sedona-schema/src/crs.rs
+++ b/rust/sedona-schema/src/crs.rs
@@ -97,10 +97,37 @@ impl PartialEq<dyn CoordinateReferenceSystem + Send + Sync>
/// A trait defining the minimum required properties of a concrete coordinate
/// reference system, allowing the details of this to be implemented elsewhere.
pub trait CoordinateReferenceSystem: Debug {
+ /// Compute the representation of this Crs in the form required for JSON
output
+ ///
+ /// The output must be valid JSON (e.g., arbitrary strings must be quoted).
fn to_json(&self) -> String;
+
+ /// Compute the representation of this Crs as a string in the form
Authority:Code
+ ///
+ /// If there is no such representation, returns None.
fn to_authority_code(&self) -> Result<Option<String>>;
+
+ /// Compute CRS equality
+ ///
+ /// CRS equality is a relatively thorny topic and can be difficult to
compute;
+ /// however, this method should try to compare self and other on value
(e.g.,
+ /// comparing authority_code where possible).
fn crs_equals(&self, other: &dyn CoordinateReferenceSystem) -> bool;
+
+ /// Convert this CRS representation to an integer SRID if possible.
+ ///
+ /// For the purposes of this trait, an SRID is always equivalent to the
+ /// authority_code `"EPSG:{srid}"`. Note that other SRID representations
+ /// (e.g., GeoArrow, Parquet GEOMETRY/GEOGRAPHY) do not make any guarantees
+ /// that an SRID comes from the EPSG authority.
fn srid(&self) -> Result<Option<u32>>;
+
+ /// Compute a CRS string representation
+ ///
+ /// Unlike `to_json()`, arbitrary string values returned by this method
should
+ /// not be escaped. This is the representation expected as input to PROJ,
GDAL,
+ /// and Parquet GEOMETRY/GEOGRAPHY representations of CRS.
+ fn to_crs_string(&self) -> String;
}
/// Concrete implementation of a default longitude/latitude coordinate
reference system
@@ -235,6 +262,10 @@ impl CoordinateReferenceSystem for AuthorityCode {
Ok(None)
}
}
+
+ fn to_crs_string(&self) -> String {
+ format!("{}:{}", self.authority, self.code)
+ }
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
@@ -313,6 +344,10 @@ impl CoordinateReferenceSystem for ProjJSON {
Ok(None)
}
+
+ fn to_crs_string(&self) -> String {
+ self.to_json()
+ }
}
pub const OGC_CRS84_PROJJSON: &str =
r#"{"$schema":"https://proj.org/schemas/v0.7/projjson.schema.json","type":"GeographicCRS","name":"WGS
84 (CRS84)","datum_ensemble":{"name":"World Geodetic System 1984
ensemble","members":[{"name":"World Geodetic System 1984
(Transit)","id":{"authority":"EPSG","code":1166}},{"name":"World Geodetic
System 1984 (G730)","id":{"authority":"EPSG","code":1152}},{"name":"World
Geodetic System 1984
(G873)","id":{"authority":"EPSG","code":1153}},{"name":"World [...]
@@ -347,6 +382,7 @@ mod test {
let projjson = OGC_CRS84_PROJJSON.parse::<ProjJSON>().unwrap();
assert_eq!(projjson.to_authority_code().unwrap().unwrap(),
"OGC:CRS84");
assert_eq!(projjson.srid().unwrap(), Some(4326));
+ assert_eq!(projjson.to_json(), projjson.to_crs_string());
let json_value: Value =
serde_json::from_str(OGC_CRS84_PROJJSON).unwrap();
let json_value_roundtrip: Value =
serde_json::from_str(&projjson.to_json()).unwrap();
@@ -376,6 +412,7 @@ mod test {
assert!(auth_code.crs_equals(&auth_code));
assert!(!auth_code.crs_equals(LngLat::crs().unwrap().as_ref()));
assert_eq!(auth_code.srid().unwrap(), Some(4269));
+ assert_eq!(auth_code.to_crs_string(), "EPSG:4269");
assert_eq!(
auth_code.to_authority_code().unwrap(),
diff --git a/rust/sedona/src/context.rs b/rust/sedona/src/context.rs
index 7723c20f..82947460 100644
--- a/rust/sedona/src/context.rs
+++ b/rust/sedona/src/context.rs
@@ -151,7 +151,7 @@ impl SedonaContext {
// Register s2geography scalar kernels if built with s2geography
support
#[cfg(feature = "s2geography")]
-
out.register_scalar_kernels(sedona_s2geography::register::scalar_kernels().into_iter())?;
+ out.register_s2geography()?;
// Always register proj scalar kernels (although actually calling them
will error
// without this feature unless
sedona_proj::register::configure_global_proj_engine()
@@ -164,6 +164,22 @@ impl SedonaContext {
Ok(out)
}
+ #[cfg(feature = "s2geography")]
+ fn register_s2geography(&mut self) -> Result<()> {
+ use sedona_proj::sd_order_lnglat;
+
+
self.register_scalar_kernels(sedona_s2geography::register::scalar_kernels().into_iter())?;
+
+ let sd_order_kernel = sd_order_lnglat::OrderLngLat::new(
+ sedona_s2geography::s2geography::s2_cell_id_from_lnglat,
+ );
+ self.register_scalar_kernels(
+ [("sd_order", Arc::new(sd_order_kernel) as
ScalarKernelRef)].into_iter(),
+ )?;
+
+ Ok(())
+ }
+
/// Register all functions in a [FunctionSet] with this context
pub fn register_function_set(&mut self, function_set: FunctionSet) {
for udf in function_set.scalar_udfs() {