This is an automated email from the ASF dual-hosted git repository. prantogg pushed a commit to branch pranav/add-st_asgeojson in repository https://gitbox.apache.org/repos/asf/sedona-db.git
commit c1672801e91ac814d81833f9dc9b72d76a1086f4 Author: Pranav Toggi <[email protected]> AuthorDate: Sun Dec 28 10:18:02 2025 -0800 feat: implement ST_AsGeoJSON --- Cargo.toml | 1 + rust/sedona-functions/Cargo.toml | 2 + rust/sedona-functions/src/lib.rs | 1 + rust/sedona-functions/src/register.rs | 1 + rust/sedona-functions/src/st_asgeojson.rs | 465 ++++++++++++++++++++++++++++++ 5 files changed, 470 insertions(+) diff --git a/Cargo.toml b/Cargo.toml index 9780fbe1..a84e149c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -105,6 +105,7 @@ geos = { git="https://github.com/georust/geos.git", rev="47afbad2483e489911ddb45 geo-types = "0.7.17" geo-traits = "0.3.0" geo = "0.31.0" +geojson = "0.24.2" geo-index = { version = "0.3.2", features = ["use-geo_0_31"] } diff --git a/rust/sedona-functions/Cargo.toml b/rust/sedona-functions/Cargo.toml index 9fb7f35f..72972c9e 100644 --- a/rust/sedona-functions/Cargo.toml +++ b/rust/sedona-functions/Cargo.toml @@ -45,6 +45,8 @@ arrow-buffer = { workspace = true } datafusion-common = { workspace = true } datafusion-expr = { workspace = true } geo-traits = { workspace = true } +geo-types = { workspace = true } +geojson = { workspace = true } sedona-common = { workspace = true } sedona-expr = { workspace = true } sedona-geometry = { workspace = true } diff --git a/rust/sedona-functions/src/lib.rs b/rust/sedona-functions/src/lib.rs index 44c8ad02..fde75073 100644 --- a/rust/sedona-functions/src/lib.rs +++ b/rust/sedona-functions/src/lib.rs @@ -27,6 +27,7 @@ pub mod st_analyze_agg; mod st_area; mod st_asbinary; mod st_astext; +mod st_asgeojson; mod st_azimuth; mod st_buffer; mod st_centroid; diff --git a/rust/sedona-functions/src/register.rs b/rust/sedona-functions/src/register.rs index ff439578..9fce9f5f 100644 --- a/rust/sedona-functions/src/register.rs +++ b/rust/sedona-functions/src/register.rs @@ -64,6 +64,7 @@ pub fn default_function_set() -> FunctionSet { crate::sd_order::sd_order_udf, crate::st_area::st_area_udf, crate::st_asbinary::st_asbinary_udf, + crate::st_asgeojson::st_asgeojson_udf, crate::st_astext::st_astext_udf, crate::st_azimuth::st_azimuth_udf, crate::st_buffer::st_buffer_udf, diff --git a/rust/sedona-functions/src/st_asgeojson.rs b/rust/sedona-functions/src/st_asgeojson.rs new file mode 100644 index 00000000..6c36fa98 --- /dev/null +++ b/rust/sedona-functions/src/st_asgeojson.rs @@ -0,0 +1,465 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +use std::sync::Arc; + +use crate::executor::WkbExecutor; +use arrow_array::builder::StringBuilder; +use arrow_schema::DataType; +use datafusion_common::error::{DataFusionError, Result}; +use datafusion_expr::{ + scalar_doc_sections::DOC_SECTION_OTHER, ColumnarValue, Documentation, Volatility, +}; +use geo_traits::to_geo::{ + ToGeoLineString, ToGeoMultiLineString, ToGeoMultiPoint, ToGeoMultiPolygon, ToGeoPoint, + ToGeoPolygon, +}; +use geo_traits::{GeometryCollectionTrait, GeometryTrait, GeometryType}; +use geo_types::Geometry; +use sedona_expr::scalar_udf::{SedonaScalarKernel, SedonaScalarUDF}; +use sedona_schema::{datatypes::SedonaType, matchers::ArgMatcher}; + +/// Output format type for GeoJSON +#[derive(Debug, Clone, Copy, PartialEq)] +enum GeoJsonType { + Simple, + Feature, + FeatureCollection, +} + +impl GeoJsonType { + fn from_str(s: &str) -> Result<Self> { + match s.to_lowercase().as_str() { + "simple" => Ok(GeoJsonType::Simple), + "feature" => Ok(GeoJsonType::Feature), + "featurecollection" => Ok(GeoJsonType::FeatureCollection), + _ => Err(DataFusionError::Execution(format!( + "Invalid GeoJSON type '{}'. Valid options are: 'Simple', 'Feature', 'FeatureCollection'", + s + ))), + } + } +} + +/// ST_AsGeoJSON() scalar UDF implementation +/// +/// An implementation of GeoJSON writing using the geojson crate. +pub fn st_asgeojson_udf() -> SedonaScalarUDF { + let udf = SedonaScalarUDF::new( + "st_asgeojson", + vec![ + Arc::new(STAsGeoJSON {}), + Arc::new(STAsGeoJSONWithType {}), + ], + Volatility::Immutable, + Some(st_asgeojson_doc()), + ); + udf +} + +fn st_asgeojson_doc() -> Documentation { + Documentation::builder( + DOC_SECTION_OTHER, + "Return the GeoJSON representation of a geometry or geography", + "ST_AsGeoJSON (A: Geometry [, type: String])", + ) + .with_argument("geom", "geometry: Input geometry or geography") + .with_argument("type", "string (optional): Output type - 'Simple' (default), 'Feature', or 'FeatureCollection'") + .with_sql_example("SELECT ST_AsGeoJSON(ST_Point(1.0, 2.0))") + .with_sql_example("SELECT ST_AsGeoJSON(ST_Point(1.0, 2.0), 'Feature')") + .with_sql_example("SELECT ST_AsGeoJSON(ST_Point(1.0, 2.0), 'FeatureCollection')") + .with_related_udf("ST_GeomFromGeoJSON") + .build() +} + +#[derive(Debug)] +struct STAsGeoJSON {} + +impl SedonaScalarKernel for STAsGeoJSON { + fn return_type(&self, args: &[SedonaType]) -> Result<Option<SedonaType>> { + let matcher = ArgMatcher::new( + vec![ArgMatcher::is_geometry_or_geography()], + SedonaType::Arrow(DataType::Utf8), + ); + + matcher.match_args(args) + } + + fn invoke_batch( + &self, + arg_types: &[SedonaType], + args: &[ColumnarValue], + ) -> Result<ColumnarValue> { + convert_to_geojson(arg_types, args, GeoJsonType::Simple) + } +} + +#[derive(Debug)] +struct STAsGeoJSONWithType {} + +impl SedonaScalarKernel for STAsGeoJSONWithType { + fn return_type(&self, args: &[SedonaType]) -> Result<Option<SedonaType>> { + let matcher = ArgMatcher::new( + vec![ + ArgMatcher::is_geometry_or_geography(), + ArgMatcher::is_string(), + ], + SedonaType::Arrow(DataType::Utf8), + ); + + matcher.match_args(args) + } + + fn invoke_batch( + &self, + arg_types: &[SedonaType], + args: &[ColumnarValue], + ) -> Result<ColumnarValue> { + // Extract the type parameter + let geojson_type = match &args[1] { + ColumnarValue::Scalar(datafusion_common::ScalarValue::Utf8(Some(type_str))) => { + GeoJsonType::from_str(type_str.as_str())? + } + ColumnarValue::Scalar(datafusion_common::ScalarValue::Utf8(None)) => { + GeoJsonType::Simple // Default to Simple if NULL + } + _ => { + return Err(DataFusionError::Execution( + "Second argument to ST_AsGeoJSON must be a string literal".to_string(), + )); + } + }; + + convert_to_geojson(&arg_types[..1], &args[..1], geojson_type) + } +} + +fn convert_to_geojson( + arg_types: &[SedonaType], + args: &[ColumnarValue], + geojson_type: GeoJsonType, +) -> Result<ColumnarValue> { + let executor = WkbExecutor::new(arg_types, args); + + // Estimate the minimum probable memory requirement of the output. + // GeoJSON is typically longer than WKT due to JSON formatting. + // Feature and FeatureCollection add extra wrapping + let base_size = match geojson_type { + GeoJsonType::Simple => 50, + GeoJsonType::Feature => 100, + GeoJsonType::FeatureCollection => 150, + }; + let min_probable_geojson_size = executor.num_iterations() * base_size; + + // Initialize an output builder of the appropriate type + let mut builder = + StringBuilder::with_capacity(executor.num_iterations(), min_probable_geojson_size); + + executor.execute_wkb_void(|maybe_item| { + match maybe_item { + Some(item) => { + // Convert WKB geometry to geo_types::Geometry using geo-traits + let geo_geometry = wkb_to_geometry(item)?; + + match geo_geometry { + Some(geom) => { + // Convert geo_types::Geometry to geojson::Geometry + let geojson_geom: geojson::Geometry = (&geom).try_into() + .map_err(|e| DataFusionError::Execution(format!("Failed to convert to GeoJSON: {:?}", e)))?; + + // Wrap the geometry based on the type parameter and serialize + let geojson_output = match geojson_type { + GeoJsonType::Simple => { + geojson_geom + } + GeoJsonType::Feature => { + let feature = geojson::Feature { + bbox: None, + geometry: Some(geojson_geom), + id: None, + properties: None, + foreign_members: None, + }; + let json_str = serde_json::to_string(&feature) + .map_err(|err| DataFusionError::External(Box::new(err)))?; + builder.append_value(&json_str); + return Ok(()); + } + GeoJsonType::FeatureCollection => { + let feature = geojson::Feature { + bbox: None, + geometry: Some(geojson_geom), + id: None, + properties: None, + foreign_members: None, + }; + let feature_collection = geojson::FeatureCollection { + bbox: None, + features: vec![feature], + foreign_members: None, + }; + let json_str = serde_json::to_string(&feature_collection) + .map_err(|err| DataFusionError::External(Box::new(err)))?; + builder.append_value(&json_str); + return Ok(()); + } + }; + + // For Simple type, serialize the geometry + let json_str = serde_json::to_string(&geojson_output) + .map_err(|err| DataFusionError::External(Box::new(err)))?; + builder.append_value(&json_str); + } + None => { + return Err(DataFusionError::NotImplemented( + "Unsupported geometry type for GeoJSON conversion".to_string(), + )); + } + } + } + None => builder.append_null(), + }; + + Ok(()) + })?; + + executor.finish(Arc::new(builder.finish())) +} + +/// Convert a WKB geometry to geo_types::Geometry, including GeometryCollection support +fn wkb_to_geometry(item: impl GeometryTrait<T = f64>) -> Result<Option<Geometry>> { + let geo_geometry = match item.as_type() { + GeometryType::Point(geom) => geom.try_to_point().map(Geometry::Point), + GeometryType::LineString(geom) => Some(Geometry::LineString(geom.to_line_string())), + GeometryType::Polygon(geom) => Some(Geometry::Polygon(geom.to_polygon())), + GeometryType::MultiPoint(geom) => geom.try_to_multi_point().map(Geometry::MultiPoint), + GeometryType::MultiLineString(geom) => { + Some(Geometry::MultiLineString(geom.to_multi_line_string())) + } + GeometryType::MultiPolygon(geom) => { + Some(Geometry::MultiPolygon(geom.to_multi_polygon())) + } + GeometryType::GeometryCollection(geom) => convert_geometry_collection(geom)?, + _ => None, + }; + Ok(geo_geometry) +} + +/// Convert a GeometryCollection to geo_types::Geometry +/// Handles up to 1 level of nesting to avoid compiler issues with recursion +fn convert_geometry_collection<GC: GeometryCollectionTrait<T = f64>>( + geom: &GC, +) -> Result<Option<Geometry>> { + let geometries: Result<Vec<_>> = geom + .geometries() + .map(|child| { + let child_geom = match child.as_type() { + GeometryType::Point(g) => g.try_to_point().map(Geometry::Point), + GeometryType::LineString(g) => Some(Geometry::LineString(g.to_line_string())), + GeometryType::Polygon(g) => Some(Geometry::Polygon(g.to_polygon())), + GeometryType::MultiPoint(g) => g.try_to_multi_point().map(Geometry::MultiPoint), + GeometryType::MultiLineString(g) => { + Some(Geometry::MultiLineString(g.to_multi_line_string())) + } + GeometryType::MultiPolygon(g) => { + Some(Geometry::MultiPolygon(g.to_multi_polygon())) + } + GeometryType::GeometryCollection(g) => { + // Support one level of nested GeometryCollection + return convert_geometry_collection(g)? + .ok_or_else(|| { + DataFusionError::NotImplemented( + "Nested GeometryCollection with unsupported types".to_string(), + ) + }); + } + _ => None, + }; + + child_geom.ok_or_else(|| { + DataFusionError::NotImplemented( + "GeometryCollection contains unsupported geometry types".to_string(), + ) + }) + }) + .collect(); + + let geometries = geometries?; + + Ok(Some(Geometry::GeometryCollection( + geo_types::GeometryCollection(geometries), + ))) +} + +#[cfg(test)] +mod tests { + use datafusion_common::scalar::ScalarValue; + use datafusion_expr::ScalarUDF; + use rstest::rstest; + use sedona_schema::datatypes::{ + WKB_GEOGRAPHY, WKB_GEOMETRY, WKB_VIEW_GEOGRAPHY, + }; + use sedona_testing::{compare::assert_scalar_equal, testers::ScalarUdfTester}; + + use super::*; + + #[test] + fn udf_metadata() { + let udf: ScalarUDF = st_asgeojson_udf().into(); + assert_eq!(udf.name(), "st_asgeojson"); + assert!(udf.documentation().is_some()) + } + + #[rstest] + fn udf( + #[values(WKB_GEOMETRY, WKB_GEOGRAPHY, WKB_VIEW_GEOGRAPHY, WKB_VIEW_GEOGRAPHY)] + sedona_type: SedonaType, + ) { + let udf = st_asgeojson_udf(); + let tester = ScalarUdfTester::new(udf.into(), vec![sedona_type]); + + // Test with a simple point + let result = tester.invoke_wkb_scalar(Some("POINT (1 2)")).unwrap(); + if let ScalarValue::Utf8(Some(json_str)) = result { + // Verify it's valid JSON and contains expected structure + let parsed: serde_json::Value = serde_json::from_str(json_str.as_str()).unwrap(); + assert_eq!(parsed["type"], "Point"); + assert!(parsed["coordinates"].is_array()); + } else { + panic!("Expected Utf8 scalar value"); + } + + // Test with null + assert_scalar_equal( + &tester.invoke_wkb_scalar(None).unwrap(), + &ScalarValue::Utf8(None), + ); + + // Test with array + let result_array = tester + .invoke_wkb_array(vec![Some("POINT(1 2)"), None, Some("POINT(3 5)")]) + .unwrap(); + + // Verify the array has the expected number of elements + assert_eq!((*result_array).len(), 3); + } + + #[rstest] + fn geometry_collection( + #[values(WKB_GEOMETRY, WKB_GEOGRAPHY, WKB_VIEW_GEOGRAPHY, WKB_VIEW_GEOGRAPHY)] + sedona_type: SedonaType, + ) { + let udf = st_asgeojson_udf(); + let tester = ScalarUdfTester::new(udf.into(), vec![sedona_type]); + + // Test with a simple GeometryCollection + let result = tester + .invoke_wkb_scalar(Some("GEOMETRYCOLLECTION(POINT(1 2), LINESTRING(0 0, 1 1))")) + .unwrap(); + if let ScalarValue::Utf8(Some(json_str)) = result { + // Verify it's valid JSON and contains expected structure + let parsed: serde_json::Value = serde_json::from_str(json_str.as_str()).unwrap(); + assert_eq!(parsed["type"], "GeometryCollection"); + assert!(parsed["geometries"].is_array()); + let geometries = parsed["geometries"].as_array().unwrap(); + assert_eq!(geometries.len(), 2); + assert_eq!(geometries[0]["type"], "Point"); + assert_eq!(geometries[1]["type"], "LineString"); + } else { + panic!("Expected Utf8 scalar value"); + } + + // Test with empty GeometryCollection + let result = tester + .invoke_wkb_scalar(Some("GEOMETRYCOLLECTION EMPTY")) + .unwrap(); + if let ScalarValue::Utf8(Some(json_str)) = result { + let parsed: serde_json::Value = serde_json::from_str(json_str.as_str()).unwrap(); + assert_eq!(parsed["type"], "GeometryCollection"); + let geometries = parsed["geometries"].as_array().unwrap(); + assert_eq!(geometries.len(), 0); + } else { + panic!("Expected Utf8 scalar value"); + } + } + + #[rstest] + fn geojson_type_parameter( + #[values(WKB_GEOMETRY, WKB_GEOGRAPHY)] + sedona_type: SedonaType, + ) { + // Test Simple type (default) + let tester = ScalarUdfTester::new(st_asgeojson_udf().into(), vec![sedona_type.clone()]); + let result = tester.invoke_wkb_scalar(Some("POINT (1 2)")).unwrap(); + if let ScalarValue::Utf8(Some(json_str)) = result { + let parsed: serde_json::Value = serde_json::from_str(json_str.as_str()).unwrap(); + assert_eq!(parsed["type"], "Point"); + assert!(parsed.get("geometry").is_none()); // No wrapping + } else { + panic!("Expected Utf8 scalar value"); + } + + // Test Feature type + let tester_with_type = ScalarUdfTester::new( + st_asgeojson_udf().into(), + vec![sedona_type, SedonaType::Arrow(DataType::Utf8)], + ); + let result = tester_with_type + .invoke_scalar_scalar("POINT (1 2)", "Feature") + .unwrap(); + if let ScalarValue::Utf8(Some(json_str)) = result { + let parsed: serde_json::Value = serde_json::from_str(json_str.as_str()).unwrap(); + assert_eq!(parsed["type"], "Feature"); + assert!(parsed["geometry"].is_object()); + assert_eq!(parsed["geometry"]["type"], "Point"); + assert!(parsed["geometry"]["coordinates"].is_array()); + } else { + panic!("Expected Utf8 scalar value"); + } + + // Test FeatureCollection type + let result = tester_with_type + .invoke_scalar_scalar("POINT (1 2)", "FeatureCollection") + .unwrap(); + if let ScalarValue::Utf8(Some(json_str)) = result { + let parsed: serde_json::Value = serde_json::from_str(json_str.as_str()).unwrap(); + assert_eq!(parsed["type"], "FeatureCollection"); + assert!(parsed["features"].is_array()); + let features = parsed["features"].as_array().unwrap(); + assert_eq!(features.len(), 1); + assert_eq!(features[0]["type"], "Feature"); + assert_eq!(features[0]["geometry"]["type"], "Point"); + } else { + panic!("Expected Utf8 scalar value"); + } + } + + #[test] + fn invalid_geojson_type() { + let udf = st_asgeojson_udf(); + let tester = ScalarUdfTester::new( + udf.into(), + vec![WKB_GEOMETRY, SedonaType::Arrow(DataType::Utf8)], + ); + + // Test with invalid type + let result = tester.invoke_scalar_scalar("POINT (1 2)", "InvalidType"); + + assert!(result.is_err()); + let err_msg = result.unwrap_err().to_string(); + assert!(err_msg.contains("Invalid GeoJSON type")); + } +}
