This is an automated email from the ASF dual-hosted git repository.

paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/sedona-db.git


The following commit(s) were added to refs/heads/main by this push:
     new 2afc1b9b feat(rust/sedona-functions): Implement ordering framework 
that includes geometry/geography (#360)
2afc1b9b is described below

commit 2afc1b9babc82c5e1adc93076d6c4b75c3cf0111
Author: Dewey Dunnington <[email protected]>
AuthorDate: Wed Nov 26 14:01:52 2025 -0600

    feat(rust/sedona-functions): Implement ordering framework that includes 
geometry/geography (#360)
    
    Co-authored-by: Copilot <[email protected]>
---
 c/sedona-geos/src/st_minimumclearance_line.rs      |   4 +-
 c/sedona-proj/src/lib.rs                           |   1 +
 c/sedona-proj/src/sd_order_lnglat.rs               | 205 +++++++++++++++++++++
 c/sedona-proj/src/st_transform.rs                  |   4 +-
 c/sedona-s2geography/src/geography_glue.cc         |   8 +
 c/sedona-s2geography/src/geography_glue.h          |   2 +
 c/sedona-s2geography/src/geography_glue_bindgen.rs |   1 +
 c/sedona-s2geography/src/s2geography.rs            |  26 +++
 python/sedonadb/tests/functions/test_order.py      |  48 +++++
 rust/sedona-functions/src/executor.rs              |   3 +-
 rust/sedona-functions/src/lib.rs                   |   1 +
 rust/sedona-functions/src/register.rs              |   1 +
 rust/sedona-functions/src/sd_order.rs              | 118 ++++++++++++
 rust/sedona-schema/src/crs.rs                      |  37 ++++
 rust/sedona/src/context.rs                         |  18 +-
 15 files changed, 470 insertions(+), 7 deletions(-)

diff --git a/c/sedona-geos/src/st_minimumclearance_line.rs 
b/c/sedona-geos/src/st_minimumclearance_line.rs
index 6af8ceb7..313bf54d 100644
--- a/c/sedona-geos/src/st_minimumclearance_line.rs
+++ b/c/sedona-geos/src/st_minimumclearance_line.rs
@@ -113,7 +113,7 @@ mod tests {
             Some("POLYGON((0 0,0 3,3 3,3 0,0 0),(1 1,1 2,2 2,2 1,1 1))"),
             Some("POLYGON((0 0,0 1,0 1,1 1,1 0,0 0,0 0))"),
             Some("LINESTRING (0 0, 1 1, 2 2)"),
-            Some("MULTIPOLYGON(((0.5 0.5,0 0,0 1,0.5 0.5)),((0.5 0.5,1 1,1 
0,0.5 0.5)),((2.5 2.5,2 2,2 3,2.5 2.5)),((2.5 2.5,3 3,3 2,2.5 2.5)))"),
+            Some("MULTIPOLYGON(((0.5 0.5,0 0,0 1,0.5 0.5)),((0.5 0.5,1 1,1 
0,0.5 0.5)),((2.5 2.5,2 2,2 3,2.5 2.5)),((2.5 2.5,3.5 3.5,3.5 1.5,2.5 2.5)))"),
             Some("POINT (1 1)"),
             Some("GEOMETRYCOLLECTION(POINT(1 1),MULTIPOLYGON(((0 2,1 1,0 0,0 
2)),((2 0,1 1,2 2,2 0))))"),
             Some("POLYGON EMPTY"),
@@ -128,7 +128,7 @@ mod tests {
                 Some("LINESTRING(1 1,1 2)"),
                 Some("LINESTRING(0 0,0 1)"),
                 Some("LINESTRING(0 0,1 1)"),
-                Some("LINESTRING(2.5 2.5,3 2.5)"),
+                Some("LINESTRING(2.5 2.5,2 2.5)"),
                 Some("LINESTRING EMPTY"),
                 Some("LINESTRING(1 1,2 1)"),
                 Some("LINESTRING EMPTY"),
diff --git a/c/sedona-proj/src/lib.rs b/c/sedona-proj/src/lib.rs
index 7dc01963..914bb031 100644
--- a/c/sedona-proj/src/lib.rs
+++ b/c/sedona-proj/src/lib.rs
@@ -18,5 +18,6 @@ pub mod error;
 mod proj;
 mod proj_dyn_bindgen;
 pub mod register;
+pub mod sd_order_lnglat;
 mod st_transform;
 pub mod transform;
diff --git a/c/sedona-proj/src/sd_order_lnglat.rs 
b/c/sedona-proj/src/sd_order_lnglat.rs
new file mode 100644
index 00000000..7277bb6e
--- /dev/null
+++ b/c/sedona-proj/src/sd_order_lnglat.rs
@@ -0,0 +1,205 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::{fmt::Debug, sync::Arc};
+
+use arrow_array::builder::UInt64Builder;
+use arrow_schema::DataType;
+use datafusion_common::{DataFusionError, Result};
+use datafusion_expr::ColumnarValue;
+use sedona_expr::scalar_udf::SedonaScalarKernel;
+use sedona_functions::executor::WkbBytesExecutor;
+use sedona_geometry::{transform::CrsEngine, wkb_header::WkbHeader};
+use sedona_schema::{crs::lnglat, datatypes::SedonaType, matchers::ArgMatcher};
+
+use crate::st_transform::with_global_proj_engine;
+
+/// Generic scalar kernel for sd_order based on the first coordinate
+/// of a geometry projected to lon/lat
+///
+/// This [SedonaScalarKernel] requires the actual function (e.g., S2, H3,
+/// or A5 cell identifier) to be provided but takes care of the extraction
+/// of the first coordinate and projecting to lon/lat space. The provided
+/// function must return a `u64`.
+pub struct OrderLngLat<F> {
+    order_fn: F,
+}
+
+impl<F: Fn((f64, f64)) -> u64> OrderLngLat<F> {
+    /// Create a new kernel from the required function type
+    pub fn new(order_fn: F) -> Self {
+        Self { order_fn }
+    }
+}
+
+impl<F: Fn((f64, f64)) -> u64> Debug for OrderLngLat<F> {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        f.debug_struct("OrderLngLat").finish()
+    }
+}
+
+impl<F: Fn((f64, f64)) -> u64> SedonaScalarKernel for OrderLngLat<F> {
+    fn return_type(&self, args: &[SedonaType]) -> Result<Option<SedonaType>> {
+        let matcher = ArgMatcher::new(
+            vec![ArgMatcher::is_geometry_or_geography()],
+            SedonaType::Arrow(DataType::UInt64),
+        );
+        matcher.match_args(args)
+    }
+
+    fn invoke_batch(
+        &self,
+        arg_types: &[SedonaType],
+        args: &[ColumnarValue],
+    ) -> Result<ColumnarValue> {
+        // Extract the source CRS, checking for lon/lat to see if we can avoid
+        // a transformation. If the CRS is missing we also skip any particular
+        // transform (although the resulting sort may not be effective).
+        let maybe_src_crs = match &arg_types[0] {
+            SedonaType::Wkb(_, maybe_crs) | SedonaType::WkbView(_, maybe_crs)
+                if maybe_crs != &lnglat() =>
+            {
+                maybe_crs.as_ref().map(|crs| crs.to_crs_string())
+            }
+            _ => None,
+        };
+
+        let executor = WkbBytesExecutor::new(arg_types, args);
+        let mut builder = 
UInt64Builder::with_capacity(executor.num_iterations());
+
+        // If we have a source CRS (i.e., the source CRS was present and not 
lon/lat already)
+        // resolve the transform and apply it to the first coord before 
applying the order_fn.
+        // Otherwise, skip the transform and go straight to the order_fn. This 
approach allows
+        // this to be used even if PROJ isn't available (as long as the data 
were lon/lat
+        // already).
+        if let Some(src_crs) = maybe_src_crs {
+            with_global_proj_engine(|engine| {
+                let to_lnglat = engine
+                    .get_transform_crs_to_crs(&src_crs, "OGC:CRS84", None, "")
+                    .map_err(|e| DataFusionError::Execution(format!("{e}")))?;
+
+                executor.execute_wkb_void(|maybe_wkb| {
+                    match maybe_wkb {
+                        Some(wkb_bytes) => {
+                            let header = WkbHeader::try_new(wkb_bytes)
+                                .map_err(|e| 
DataFusionError::Execution(format!("{e}")))?;
+                            let mut first_xy = header.first_xy();
+                            to_lnglat
+                                .transform_coord(&mut first_xy)
+                                .map_err(|e| 
DataFusionError::Execution(format!("{e}")))?;
+                            let order = (self.order_fn)(first_xy);
+                            builder.append_value(order);
+                        }
+                        None => builder.append_null(),
+                    }
+
+                    Ok(())
+                })?;
+
+                Ok(())
+            })?;
+        } else {
+            executor.execute_wkb_void(|maybe_wkb| {
+                match maybe_wkb {
+                    Some(wkb_bytes) => {
+                        let header = WkbHeader::try_new(wkb_bytes)
+                            .map_err(|e| 
DataFusionError::Execution(format!("{e}")))?;
+                        let first_xy = header.first_xy();
+                        let order = (self.order_fn)(first_xy);
+                        builder.append_value(order);
+                    }
+                    None => builder.append_null(),
+                }
+
+                Ok(())
+            })?;
+        }
+
+        executor.finish(Arc::new(builder.finish()))
+    }
+}
+
+#[cfg(test)]
+mod test {
+
+    use arrow_array::{create_array, ArrayRef};
+    use sedona_expr::scalar_udf::SedonaScalarUDF;
+    use sedona_schema::datatypes::{Edges, WKB_GEOMETRY};
+    use sedona_testing::{create::create_array, testers::ScalarUdfTester};
+    use serde_json::Value;
+
+    use super::*;
+
+    #[test]
+    fn order_geometry() {
+        // For testing, sort by first (rounded) x value
+        let kernel = OrderLngLat::new(|(lng, lat)| {
+            if lng.is_nan() || lat.is_nan() {
+                u64::MAX
+            } else {
+                lng as u64
+            }
+        });
+        let udf = SedonaScalarUDF::from_kernel("sd_order", Arc::new(kernel));
+
+        let array = create_array(
+            &[
+                // POINT (1 2) in EPSG:3857
+                Some("POINT (111320 222685)"),
+                Some("POINT EMPTY"),
+                None,
+                // POINT (0 1) in EPSG:3857
+                Some("POINT (0 111326)"),
+            ],
+            &WKB_GEOMETRY,
+        );
+
+        // Check the None Crs case
+        let tester = ScalarUdfTester::new(udf.clone().into(), 
vec![WKB_GEOMETRY]);
+        tester.assert_return_type(DataType::UInt64);
+
+        let result = tester.invoke_array(array.clone()).unwrap();
+        let expected =
+            create_array!(UInt64, [Some(111320), Some(u64::MAX), None, 
Some(0)]) as ArrayRef;
+        assert_eq!(&result, &expected);
+
+        // Check the "already lnglat" case
+        let tester = ScalarUdfTester::new(
+            udf.clone().into(),
+            vec![SedonaType::Wkb(Edges::Planar, lnglat())],
+        );
+        tester.assert_return_type(DataType::UInt64);
+
+        let result = tester.invoke_array(array.clone()).unwrap();
+        let expected =
+            create_array!(UInt64, [Some(111320), Some(u64::MAX), None, 
Some(0)]) as ArrayRef;
+        assert_eq!(&result, &expected);
+
+        // Check the "not already lnglat" case
+        let crs =
+            
sedona_schema::crs::deserialize_crs(&Value::String("EPSG:3857".to_string())).unwrap();
+        let tester = ScalarUdfTester::new(
+            udf.clone().into(),
+            vec![SedonaType::Wkb(Edges::Planar, crs)],
+        );
+        tester.assert_return_type(DataType::UInt64);
+
+        let result = tester.invoke_array(array.clone()).unwrap();
+        let expected = create_array!(UInt64, [Some(1), Some(u64::MAX), None, 
Some(0)]) as ArrayRef;
+        assert_eq!(&result, &expected);
+    }
+}
diff --git a/c/sedona-proj/src/st_transform.rs 
b/c/sedona-proj/src/st_transform.rs
index 97f0781c..8e31dfb9 100644
--- a/c/sedona-proj/src/st_transform.rs
+++ b/c/sedona-proj/src/st_transform.rs
@@ -65,7 +65,7 @@ pub fn configure_global_proj_engine(builder: 
ProjCrsEngineBuilder) -> Result<()>
 
 /// Do something with the global thread-local PROJ engine, creating it if it 
has not
 /// already been created.
-fn with_global_proj_engine(
+pub(crate) fn with_global_proj_engine(
     mut func: impl FnMut(&CachingCrsEngine<ProjCrsEngine>) -> Result<()>,
 ) -> Result<()> {
     PROJ_ENGINE.with(|engine_cell| {
@@ -293,7 +293,7 @@ fn invoke_scalar(wkb: &Wkb, trans: &dyn CrsTransform, 
builder: &mut BinaryBuilde
 fn parse_source_crs(source_type: &SedonaType) -> Result<Option<String>> {
     match source_type {
         SedonaType::Wkb(_, Some(crs)) | SedonaType::WkbView(_, Some(crs)) => {
-            crs.to_authority_code()
+            Ok(Some(crs.to_crs_string()))
         }
         _ => Ok(None),
     }
diff --git a/c/sedona-s2geography/src/geography_glue.cc 
b/c/sedona-s2geography/src/geography_glue.cc
index 9a5cf44a..13f489cd 100644
--- a/c/sedona-s2geography/src/geography_glue.cc
+++ b/c/sedona-s2geography/src/geography_glue.cc
@@ -69,6 +69,14 @@ double SedonaGeographyGlueTestLinkage(void) {
   return S2Earth::RadiusMeters() * s2_distance(index1, index2);
 }
 
+uint64_t SedonaGeographyGlueLngLatToCellId(double lng, double lat) {
+  if (std::isnan(lng) || std::isnan(lat)) {
+    return S2CellId::Sentinel().id();
+  } else {
+    return S2CellId(S2LatLng::FromDegrees(lat, 
lng).Normalized().ToPoint()).id();
+  }
+}
+
 struct UdfExporter {
   static void Export(std::unique_ptr<s2geography::arrow_udf::ArrowUDF> udf,
                      struct SedonaGeographyArrowUdf* out) {
diff --git a/c/sedona-s2geography/src/geography_glue.h 
b/c/sedona-s2geography/src/geography_glue.h
index 5028eb00..1d37ca32 100644
--- a/c/sedona-s2geography/src/geography_glue.h
+++ b/c/sedona-s2geography/src/geography_glue.h
@@ -43,6 +43,8 @@ const char* SedonaGeographyGlueAbseilVersion(void);
 
 double SedonaGeographyGlueTestLinkage(void);
 
+uint64_t SedonaGeographyGlueLngLatToCellId(double lng, double lat);
+
 struct SedonaGeographyArrowUdf {
   int (*init)(struct SedonaGeographyArrowUdf* self, struct ArrowSchema* 
arg_schema,
               const char* options, struct ArrowSchema* out);
diff --git a/c/sedona-s2geography/src/geography_glue_bindgen.rs 
b/c/sedona-s2geography/src/geography_glue_bindgen.rs
index 0865b881..45e09b7b 100644
--- a/c/sedona-s2geography/src/geography_glue_bindgen.rs
+++ b/c/sedona-s2geography/src/geography_glue_bindgen.rs
@@ -69,6 +69,7 @@ unsafe extern "C" {
     pub fn SedonaGeographyGlueS2GeometryVersion() -> *const c_char;
     pub fn SedonaGeographyGlueAbseilVersion() -> *const c_char;
     pub fn SedonaGeographyGlueTestLinkage() -> f64;
+    pub fn SedonaGeographyGlueLngLatToCellId(lng: f64, lat: f64) -> u64;
 
     declare_s2_c_udfs!(
         Area,
diff --git a/c/sedona-s2geography/src/s2geography.rs 
b/c/sedona-s2geography/src/s2geography.rs
index f02b6353..f2a82da2 100644
--- a/c/sedona-s2geography/src/s2geography.rs
+++ b/c/sedona-s2geography/src/s2geography.rs
@@ -24,6 +24,16 @@ use arrow_schema::{ArrowError, Fields, Schema};
 
 use crate::{error::S2GeographyError, geography_glue_bindgen::*};
 
+/// Compute an S2 Cell identifier from a longitude/latitude pair
+///
+/// If either longitude or latitude are NaN (e.g., an empty point),
+/// the sentinel cell (`u64::MAX`) is returned. Lon/Lat pairs are
+/// normalized such that invalid lon/lat pairs will still compute
+/// a result (even though that result may be difficult to interpret).
+pub fn s2_cell_id_from_lnglat(lnglat: (f64, f64)) -> u64 {
+    unsafe { SedonaGeographyGlueLngLatToCellId(lnglat.0, lnglat.1) }
+}
+
 /// Wrapper for scalar UDFs exposed by s2geography::arrow_udf
 ///
 /// Provides a minimal wrapper around the C callables that define
@@ -258,6 +268,22 @@ mod test {
 
     use super::*;
 
+    #[test]
+    fn test_s2_cell_id_from_lnglat() {
+        // Check a single, finite cell
+        assert_eq!(s2_cell_id_from_lnglat((0.0, 0.0)), 1152921504606846977);
+
+        // Emptyish cases should return the sentinel cell
+        assert_eq!(s2_cell_id_from_lnglat((f64::NAN, 0.0)), u64::MAX);
+        assert_eq!(s2_cell_id_from_lnglat((0.0, f64::NAN)), u64::MAX);
+        assert_eq!(s2_cell_id_from_lnglat((f64::NAN, f64::NAN)), u64::MAX);
+
+        // These should both return something (even if what it returns is 
difficult
+        // to interpret)
+        assert_ne!(s2_cell_id_from_lnglat((181.0, 0.0)), u64::MAX);
+        assert_ne!(s2_cell_id_from_lnglat((0.0, 91.0)), u64::MAX);
+    }
+
     #[test]
     fn scalar_udf() {
         let mut udf = S2ScalarUDF::Length();
diff --git a/python/sedonadb/tests/functions/test_order.py 
b/python/sedonadb/tests/functions/test_order.py
new file mode 100644
index 00000000..4e96d1a4
--- /dev/null
+++ b/python/sedonadb/tests/functions/test_order.py
@@ -0,0 +1,48 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import geopandas
+import geopandas.testing
+
+
+def test_order_sql(con):
+    wkt_unsorted = [
+        None,
+        "POINT EMPTY",
+        "POINT (-80 -80)",
+        "POINT (80 80)",
+        "POINT (-79 -79)",
+    ]
+    wkt_sorted = [
+        "POINT (80 80)",
+        "POINT (-80 -80)",
+        "POINT (-79 -79)",
+        "POINT EMPTY",
+        None,
+    ]
+    gdf_unsorted = geopandas.GeoDataFrame(
+        {"geometry": geopandas.GeoSeries.from_wkt(wkt_unsorted)}
+    )
+    gdf_sorted = geopandas.GeoDataFrame(
+        {"geometry": geopandas.GeoSeries.from_wkt(wkt_sorted)}
+    )
+
+    con.create_data_frame(gdf_unsorted).to_view("unsorted", overwrite=True)
+    hopefully_sorted = con.sql(
+        "SELECT * FROM unsorted ORDER BY sd_order(geometry)"
+    ).to_pandas()
+    geopandas.testing.assert_geodataframe_equal(hopefully_sorted, gdf_sorted)
diff --git a/rust/sedona-functions/src/executor.rs 
b/rust/sedona-functions/src/executor.rs
index 0a98e71d..0686070f 100644
--- a/rust/sedona-functions/src/executor.rs
+++ b/rust/sedona-functions/src/executor.rs
@@ -267,8 +267,7 @@ impl GeometryFactory for WkbBytesFactory {
 /// This [GenericExecutor] implementation provides more optimization 
opportunities,
 /// but it requires additional manual processing of the raw [Wkb] bytes 
compared to
 /// the [WkbExecutor].
-pub(crate) type WkbBytesExecutor<'a, 'b> =
-    GenericExecutor<'a, 'b, WkbBytesFactory, WkbBytesFactory>;
+pub type WkbBytesExecutor<'a, 'b> = GenericExecutor<'a, 'b, WkbBytesFactory, 
WkbBytesFactory>;
 
 /// Trait for iterating over a container type as geometry scalars
 ///
diff --git a/rust/sedona-functions/src/lib.rs b/rust/sedona-functions/src/lib.rs
index a6003043..a7488716 100644
--- a/rust/sedona-functions/src/lib.rs
+++ b/rust/sedona-functions/src/lib.rs
@@ -22,6 +22,7 @@ mod predicates;
 mod referencing;
 pub mod register;
 mod sd_format;
+pub mod sd_order;
 pub mod st_analyze_agg;
 mod st_area;
 mod st_asbinary;
diff --git a/rust/sedona-functions/src/register.rs 
b/rust/sedona-functions/src/register.rs
index cdae47b8..2b9ecb39 100644
--- a/rust/sedona-functions/src/register.rs
+++ b/rust/sedona-functions/src/register.rs
@@ -61,6 +61,7 @@ pub fn default_function_set() -> FunctionSet {
         crate::referencing::st_line_interpolate_point_udf,
         crate::referencing::st_line_locate_point_udf,
         crate::sd_format::sd_format_udf,
+        crate::sd_order::sd_order_udf,
         crate::st_area::st_area_udf,
         crate::st_asbinary::st_asbinary_udf,
         crate::st_astext::st_astext_udf,
diff --git a/rust/sedona-functions/src/sd_order.rs 
b/rust/sedona-functions/src/sd_order.rs
new file mode 100644
index 00000000..8419b57a
--- /dev/null
+++ b/rust/sedona-functions/src/sd_order.rs
@@ -0,0 +1,118 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use datafusion_common::Result;
+use datafusion_expr::{
+    scalar_doc_sections::DOC_SECTION_OTHER, ColumnarValue, Documentation, 
Volatility,
+};
+use sedona_expr::scalar_udf::{SedonaScalarKernel, SedonaScalarUDF};
+use sedona_schema::datatypes::SedonaType;
+use std::{fmt::Debug, sync::Arc};
+
+/// SD_Order() scalar UDF implementation
+///
+/// This function is invoked to obtain a proxy array whose order may be used
+/// to sort based on the value. The default implementation returns the value
+/// and a utility is provided to order geometry and/or geographies based on
+/// the first coordinate. More sophisticated sorting (e.g., XZ2) may be added
+/// in the future.
+pub fn sd_order_udf() -> SedonaScalarUDF {
+    SedonaScalarUDF::new(
+        "sd_order",
+        vec![Arc::new(SDOrderDefault {})],
+        Volatility::Immutable,
+        Some(sd_order_doc()),
+    )
+}
+
+fn sd_order_doc() -> Documentation {
+    Documentation::builder(
+        DOC_SECTION_OTHER,
+        "Return an arbitrary value that may be used to sort the input.",
+        "SD_Order (value: Any)",
+    )
+    .with_argument("value", "Any: An arbitrary value")
+    .with_sql_example("SELECT SD_Order()")
+    .build()
+}
+
+/// Default implementation that returns its input (i.e., by default, just
+/// do whatever DataFusion would have done with the value)
+#[derive(Debug)]
+struct SDOrderDefault {}
+
+impl SedonaScalarKernel for SDOrderDefault {
+    fn return_type(&self, args: &[SedonaType]) -> Result<Option<SedonaType>> {
+        if args.len() != 1 {
+            return Ok(None);
+        }
+
+        Ok(Some(args[0].clone()))
+    }
+
+    fn invoke_batch(
+        &self,
+        _arg_types: &[SedonaType],
+        args: &[ColumnarValue],
+    ) -> Result<ColumnarValue> {
+        Ok(args[0].clone())
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use arrow_array::{create_array, ArrayRef};
+    use arrow_schema::DataType;
+    use datafusion_common::ScalarValue;
+    use datafusion_expr::ScalarUDF;
+    use rstest::rstest;
+    use sedona_schema::datatypes::SedonaType;
+    use sedona_testing::testers::ScalarUdfTester;
+
+    #[test]
+    fn udf_metadata() {
+        let udf: ScalarUDF = sd_order_udf().into();
+        assert_eq!(udf.name(), "sd_order");
+        assert!(udf.documentation().is_some())
+    }
+
+    #[rstest]
+    fn order_not_geometry(
+        #[values(
+            SedonaType::Arrow(DataType::Utf8),
+            SedonaType::Arrow(DataType::LargeUtf8)
+        )]
+        sedona_type: SedonaType,
+    ) {
+        let udf = sd_order_udf();
+        let tester = ScalarUdfTester::new(udf.clone().into(), 
vec![sedona_type.clone()]);
+        tester.assert_return_type(sedona_type.clone());
+
+        tester.assert_scalar_result_equals("foofy", "foofy");
+        tester.assert_scalar_result_equals(ScalarValue::Null, 
ScalarValue::Null);
+
+        let array: ArrayRef = create_array!(Utf8, [Some("foofy"), None, 
Some("other foofy")]);
+        let array_casted = ColumnarValue::Array(array)
+            .cast_to(sedona_type.storage_type(), None)
+            .unwrap()
+            .to_array(3)
+            .unwrap();
+        let result = tester.invoke_array(array_casted.clone()).unwrap();
+        assert_eq!(&array_casted, &result);
+    }
+}
diff --git a/rust/sedona-schema/src/crs.rs b/rust/sedona-schema/src/crs.rs
index 72cd727e..3d73e354 100644
--- a/rust/sedona-schema/src/crs.rs
+++ b/rust/sedona-schema/src/crs.rs
@@ -97,10 +97,37 @@ impl PartialEq<dyn CoordinateReferenceSystem + Send + Sync>
 /// A trait defining the minimum required properties of a concrete coordinate
 /// reference system, allowing the details of this to be implemented elsewhere.
 pub trait CoordinateReferenceSystem: Debug {
+    /// Compute the representation of this Crs in the form required for JSON 
output
+    ///
+    /// The output must be valid JSON (e.g., arbitrary strings must be quoted).
     fn to_json(&self) -> String;
+
+    /// Compute the representation of this Crs as a string in the form 
Authority:Code
+    ///
+    /// If there is no such representation, returns None.
     fn to_authority_code(&self) -> Result<Option<String>>;
+
+    /// Compute CRS equality
+    ///
+    /// CRS equality is a relatively thorny topic and can be difficult to 
compute;
+    /// however, this method should try to compare self and other on value 
(e.g.,
+    /// comparing authority_code where possible).
     fn crs_equals(&self, other: &dyn CoordinateReferenceSystem) -> bool;
+
+    /// Convert this CRS representation to an integer SRID if possible.
+    ///
+    /// For the purposes of this trait, an SRID is always equivalent to the
+    /// authority_code `"EPSG:{srid}"`. Note that other SRID representations
+    /// (e.g., GeoArrow, Parquet GEOMETRY/GEOGRAPHY) do not make any guarantees
+    /// that an SRID comes from the EPSG authority.
     fn srid(&self) -> Result<Option<u32>>;
+
+    /// Compute a CRS string representation
+    ///
+    /// Unlike `to_json()`, arbitrary string values returned by this method 
should
+    /// not be escaped. This is the representation expected as input to PROJ, 
GDAL,
+    /// and Parquet GEOMETRY/GEOGRAPHY representations of CRS.
+    fn to_crs_string(&self) -> String;
 }
 
 /// Concrete implementation of a default longitude/latitude coordinate 
reference system
@@ -235,6 +262,10 @@ impl CoordinateReferenceSystem for AuthorityCode {
             Ok(None)
         }
     }
+
+    fn to_crs_string(&self) -> String {
+        format!("{}:{}", self.authority, self.code)
+    }
 }
 
 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
@@ -313,6 +344,10 @@ impl CoordinateReferenceSystem for ProjJSON {
 
         Ok(None)
     }
+
+    fn to_crs_string(&self) -> String {
+        self.to_json()
+    }
 }
 
 pub const OGC_CRS84_PROJJSON: &str = 
r#"{"$schema":"https://proj.org/schemas/v0.7/projjson.schema.json","type":"GeographicCRS","name":"WGS
 84 (CRS84)","datum_ensemble":{"name":"World Geodetic System 1984 
ensemble","members":[{"name":"World Geodetic System 1984 
(Transit)","id":{"authority":"EPSG","code":1166}},{"name":"World Geodetic 
System 1984 (G730)","id":{"authority":"EPSG","code":1152}},{"name":"World 
Geodetic System 1984 
(G873)","id":{"authority":"EPSG","code":1153}},{"name":"World  [...]
@@ -347,6 +382,7 @@ mod test {
         let projjson = OGC_CRS84_PROJJSON.parse::<ProjJSON>().unwrap();
         assert_eq!(projjson.to_authority_code().unwrap().unwrap(), 
"OGC:CRS84");
         assert_eq!(projjson.srid().unwrap(), Some(4326));
+        assert_eq!(projjson.to_json(), projjson.to_crs_string());
 
         let json_value: Value = 
serde_json::from_str(OGC_CRS84_PROJJSON).unwrap();
         let json_value_roundtrip: Value = 
serde_json::from_str(&projjson.to_json()).unwrap();
@@ -376,6 +412,7 @@ mod test {
         assert!(auth_code.crs_equals(&auth_code));
         assert!(!auth_code.crs_equals(LngLat::crs().unwrap().as_ref()));
         assert_eq!(auth_code.srid().unwrap(), Some(4269));
+        assert_eq!(auth_code.to_crs_string(), "EPSG:4269");
 
         assert_eq!(
             auth_code.to_authority_code().unwrap(),
diff --git a/rust/sedona/src/context.rs b/rust/sedona/src/context.rs
index 7723c20f..82947460 100644
--- a/rust/sedona/src/context.rs
+++ b/rust/sedona/src/context.rs
@@ -151,7 +151,7 @@ impl SedonaContext {
 
         // Register s2geography scalar kernels if built with s2geography 
support
         #[cfg(feature = "s2geography")]
-        
out.register_scalar_kernels(sedona_s2geography::register::scalar_kernels().into_iter())?;
+        out.register_s2geography()?;
 
         // Always register proj scalar kernels (although actually calling them 
will error
         // without this feature unless 
sedona_proj::register::configure_global_proj_engine()
@@ -164,6 +164,22 @@ impl SedonaContext {
         Ok(out)
     }
 
+    #[cfg(feature = "s2geography")]
+    fn register_s2geography(&mut self) -> Result<()> {
+        use sedona_proj::sd_order_lnglat;
+
+        
self.register_scalar_kernels(sedona_s2geography::register::scalar_kernels().into_iter())?;
+
+        let sd_order_kernel = sd_order_lnglat::OrderLngLat::new(
+            sedona_s2geography::s2geography::s2_cell_id_from_lnglat,
+        );
+        self.register_scalar_kernels(
+            [("sd_order", Arc::new(sd_order_kernel) as 
ScalarKernelRef)].into_iter(),
+        )?;
+
+        Ok(())
+    }
+
     /// Register all functions in a [FunctionSet] with this context
     pub fn register_function_set(&mut self, function_set: FunctionSet) {
         for udf in function_set.scalar_udfs() {

Reply via email to