petern48 commented on code in PR #469: URL: https://github.com/apache/sedona-db/pull/469#discussion_r2654125060
########## rust/sedona-functions/src/st_asgeojson.rs: ########## @@ -0,0 +1,456 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +use std::sync::Arc; + +use crate::executor::WkbExecutor; +use arrow_array::builder::StringBuilder; +use arrow_schema::DataType; +use datafusion_common::error::{DataFusionError, Result}; +use datafusion_expr::{ + scalar_doc_sections::DOC_SECTION_OTHER, ColumnarValue, Documentation, Volatility, +}; +use geo_traits::to_geo::{ + ToGeoLineString, ToGeoMultiLineString, ToGeoMultiPoint, ToGeoMultiPolygon, ToGeoPoint, + ToGeoPolygon, +}; +use geo_traits::{GeometryCollectionTrait, GeometryTrait, GeometryType}; +use geo_types::Geometry; +use sedona_expr::scalar_udf::{SedonaScalarKernel, SedonaScalarUDF}; +use sedona_schema::{datatypes::SedonaType, matchers::ArgMatcher}; + +/// Output format type for GeoJSON +#[derive(Debug, Clone, Copy, PartialEq)] +enum GeoJsonType { + Simple, + Feature, + FeatureCollection, +} + +impl GeoJsonType { + fn from_str(s: &str) -> Result<Self> { + match s.to_lowercase().as_str() { + "simple" => Ok(GeoJsonType::Simple), + "feature" => Ok(GeoJsonType::Feature), + "featurecollection" => Ok(GeoJsonType::FeatureCollection), + _ => Err(DataFusionError::Execution(format!( + "Invalid GeoJSON type '{}'. Valid options are: 'Simple', 'Feature', 'FeatureCollection'", + s + ))), + } + } +} + +/// ST_AsGeoJSON() scalar UDF implementation +/// +/// An implementation of GeoJSON writing using the geojson crate. +pub fn st_asgeojson_udf() -> SedonaScalarUDF { + SedonaScalarUDF::new( + "st_asgeojson", + vec![Arc::new(STAsGeoJSON {}), Arc::new(STAsGeoJSONWithType {})], + Volatility::Immutable, + Some(st_asgeojson_doc()), + ) +} + +fn st_asgeojson_doc() -> Documentation { + Documentation::builder( + DOC_SECTION_OTHER, + "Return the GeoJSON representation of a geometry or geography", + "ST_AsGeoJSON (A: Geometry [, type: String])", + ) + .with_argument("geom", "geometry: Input geometry or geography") + .with_argument( + "type", + "string (optional): Output type - 'Simple' (default), 'Feature', or 'FeatureCollection'", + ) + .with_sql_example("SELECT ST_AsGeoJSON(ST_Point(1.0, 2.0))") + .with_sql_example("SELECT ST_AsGeoJSON(ST_Point(1.0, 2.0), 'Feature')") + .with_sql_example("SELECT ST_AsGeoJSON(ST_Point(1.0, 2.0), 'FeatureCollection')") + .with_related_udf("ST_GeomFromGeoJSON") + .build() +} + +#[derive(Debug)] +struct STAsGeoJSON {} + +impl SedonaScalarKernel for STAsGeoJSON { + fn return_type(&self, args: &[SedonaType]) -> Result<Option<SedonaType>> { + let matcher = ArgMatcher::new( + vec![ArgMatcher::is_geometry_or_geography()], + SedonaType::Arrow(DataType::Utf8), + ); + + matcher.match_args(args) + } + + fn invoke_batch( + &self, + arg_types: &[SedonaType], + args: &[ColumnarValue], + ) -> Result<ColumnarValue> { + convert_to_geojson(arg_types, args, GeoJsonType::Simple) + } +} + +#[derive(Debug)] +struct STAsGeoJSONWithType {} + +impl SedonaScalarKernel for STAsGeoJSONWithType { + fn return_type(&self, args: &[SedonaType]) -> Result<Option<SedonaType>> { + let matcher = ArgMatcher::new( + vec![ + ArgMatcher::is_geometry_or_geography(), + ArgMatcher::is_string(), + ], + SedonaType::Arrow(DataType::Utf8), + ); + + matcher.match_args(args) + } + + fn invoke_batch( + &self, + arg_types: &[SedonaType], + args: &[ColumnarValue], + ) -> Result<ColumnarValue> { + // Extract the type parameter + let geojson_type = match &args[1] { + ColumnarValue::Scalar(datafusion_common::ScalarValue::Utf8(Some(type_str))) => { + GeoJsonType::from_str(type_str.as_str())? + } + ColumnarValue::Scalar(datafusion_common::ScalarValue::Utf8(None)) => { + GeoJsonType::Simple // Default to Simple if NULL + } + _ => { + return Err(DataFusionError::Execution( + "Second argument to ST_AsGeoJSON must be a string literal".to_string(), + )); + } + }; + + convert_to_geojson(&arg_types[..1], &args[..1], geojson_type) + } +} + +fn convert_to_geojson( + arg_types: &[SedonaType], + args: &[ColumnarValue], + geojson_type: GeoJsonType, +) -> Result<ColumnarValue> { + let executor = WkbExecutor::new(arg_types, args); + + // Estimate the minimum probable memory requirement of the output. + // GeoJSON is typically longer than WKT due to JSON formatting. + // Feature and FeatureCollection add extra wrapping + let base_size = match geojson_type { + GeoJsonType::Simple => 50, + GeoJsonType::Feature => 100, + GeoJsonType::FeatureCollection => 150, + }; + let min_probable_geojson_size = executor.num_iterations() * base_size; + + // Initialize an output builder of the appropriate type + let mut builder = + StringBuilder::with_capacity(executor.num_iterations(), min_probable_geojson_size); + + executor.execute_wkb_void(|maybe_item| { + match maybe_item { + Some(item) => { + // Convert WKB geometry to geo_types::Geometry using geo-traits + let geo_geometry = wkb_to_geometry(item)?; + + match geo_geometry { + Some(geom) => { + // Convert geo_types::Geometry to geojson::Geometry + let geojson_geom: geojson::Geometry = (&geom).try_into().map_err(|e| { + DataFusionError::Execution(format!( + "Failed to convert to GeoJSON: {:?}", + e + )) + })?; Review Comment: filed a ticket so we don't forget: https://github.com/apache/sedona-db/issues/472 -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
