paleolimbot commented on code in PR #381: URL: https://github.com/apache/sedona-db/pull/381#discussion_r2570406153
########## rust/sedona-functions/src/st_interiorringn.rs: ########## @@ -0,0 +1,243 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::sync::Arc; + +use arrow_array::builder::BinaryBuilder; +use datafusion_common::cast::as_int64_array; +use datafusion_common::{DataFusionError, Result}; +use datafusion_expr::{scalar_doc_sections::DOC_SECTION_OTHER, Documentation}; +use geo_traits::to_geo::ToGeoLineString; +use geo_traits::{GeometryTrait, PolygonTrait}; +use sedona_expr::scalar_udf::{SedonaScalarKernel, SedonaScalarUDF}; +use sedona_geometry::wkb_factory::{write_wkb_linestring, WKB_MIN_PROBABLE_BYTES}; +use sedona_schema::datatypes::SedonaType; +use sedona_schema::{datatypes::WKB_GEOMETRY, matchers::ArgMatcher}; +use wkb::reader::Wkb; + +use crate::executor::WkbExecutor; + +/// ST_InteriorRingN() scalar UDF +/// +/// Native implementation to get the nth interior ring (hole) of a Polygon +pub fn st_interiorringn_udf() -> SedonaScalarUDF { + SedonaScalarUDF::new( + "st_interiorringn", + vec![Arc::new(STInteriorRingN)], + datafusion_expr::Volatility::Immutable, + Some(st_interiorringn_doc()), + ) +} + +fn st_interiorringn_doc() -> Documentation { + Documentation::builder( + DOC_SECTION_OTHER, + "Returns the Nth interior ring (hole) of a POLYGON geometry as a LINESTRING. \ + The index starts at 1. Returns NULL if the geometry is not a polygon or the index is out of range.", + "ST_GeometryN (geom: Geometry, n: integer)") + .with_argument("geom", "geometry: Input Polygon") + .with_argument("n", "n: Index") + .with_sql_example("SELECT ST_InteriorRingN('POLYGON ((0 0, 4 0, 4 4, 0 4, 0 0), (1 1, 1 2, 2 2, 2 1, 1 1))', 1)") + .build() +} + +#[derive(Debug)] +struct STInteriorRingN; + +impl SedonaScalarKernel for STInteriorRingN { + fn return_type(&self, args: &[SedonaType]) -> Result<Option<SedonaType>> { + let matcher = ArgMatcher::new( + vec![ArgMatcher::is_geometry(), ArgMatcher::is_integer()], + WKB_GEOMETRY, + ); + + matcher.match_args(args) + } + + fn invoke_batch( + &self, + arg_types: &[SedonaType], + args: &[datafusion_expr::ColumnarValue], + ) -> Result<datafusion_expr::ColumnarValue> { + let executor = WkbExecutor::new(arg_types, args); + let mut builder = BinaryBuilder::with_capacity( + executor.num_iterations(), + WKB_MIN_PROBABLE_BYTES * executor.num_iterations(), + ); + + let integer_value = args[1] + .cast_to(&arrow_schema::DataType::Int64, None)? + .to_array(executor.num_iterations())?; + let index_array = as_int64_array(&integer_value)?; + let mut index_iter = index_array.iter(); + + executor.execute_wkb_void(|maybe_wkb| { + match (maybe_wkb, index_iter.next().unwrap()) { + (Some(wkb), Some(index)) => { + if invoke_scalar(&wkb, (index - 1) as usize, &mut builder)? { + builder.append_value([]); + } else { + // Unsupported Geometry Type, Invalid index encountered + builder.append_null(); + } + } + _ => builder.append_null(), + } + Ok(()) + })?; + + executor.finish(Arc::new(builder.finish())) + } +} + +fn invoke_scalar(geom: &Wkb, index: usize, writer: &mut impl std::io::Write) -> Result<bool> { + let geometry = match geom.as_type() { + geo_traits::GeometryType::Polygon(pgn) => pgn.interior(index), + _ => None, + }; + + if let Some(buf) = geometry { + write_wkb_linestring( + writer, + buf.to_line_string() + .coords() + .map(|c| c.x_y()) + .collect::<Vec<(f64, f64)>>() + .into_iter(), + ) + .map_err(|e| DataFusionError::Execution(format!("{e}")))?; + Ok(true) + } else { + Ok(false) + } +} + +#[cfg(test)] +mod tests { + use rstest::rstest; + use sedona_schema::datatypes::WKB_VIEW_GEOMETRY; + use sedona_testing::{ + compare::assert_array_equal, create::create_array, testers::ScalarUdfTester, + }; + + use super::*; + + #[rstest] + fn udf(#[values(WKB_GEOMETRY, WKB_VIEW_GEOMETRY)] sedona_type: SedonaType) { + let tester = ScalarUdfTester::new( + st_interiorringn_udf().into(), + vec![ + sedona_type.clone(), + SedonaType::Arrow(arrow_schema::DataType::Int64), + ], + ); + tester.assert_return_type(WKB_GEOMETRY); + + let input_wkt = create_array( + &[ + // I. Null/Empty/Non-Polygon Inputs Review Comment: Thank you for all of these! I think the missing cases here are Z, M, and ZM. It may be helpful to break these up into a few related input_wkt/inters/expected triplets to keep the whole test case roughly on your screen as you're scrolling. ########## rust/sedona-functions/src/st_interiorringn.rs: ########## @@ -0,0 +1,243 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::sync::Arc; + +use arrow_array::builder::BinaryBuilder; +use datafusion_common::cast::as_int64_array; +use datafusion_common::{DataFusionError, Result}; +use datafusion_expr::{scalar_doc_sections::DOC_SECTION_OTHER, Documentation}; +use geo_traits::to_geo::ToGeoLineString; +use geo_traits::{GeometryTrait, PolygonTrait}; +use sedona_expr::scalar_udf::{SedonaScalarKernel, SedonaScalarUDF}; +use sedona_geometry::wkb_factory::{write_wkb_linestring, WKB_MIN_PROBABLE_BYTES}; +use sedona_schema::datatypes::SedonaType; +use sedona_schema::{datatypes::WKB_GEOMETRY, matchers::ArgMatcher}; +use wkb::reader::Wkb; + +use crate::executor::WkbExecutor; + +/// ST_InteriorRingN() scalar UDF +/// +/// Native implementation to get the nth interior ring (hole) of a Polygon +pub fn st_interiorringn_udf() -> SedonaScalarUDF { + SedonaScalarUDF::new( + "st_interiorringn", + vec![Arc::new(STInteriorRingN)], + datafusion_expr::Volatility::Immutable, + Some(st_interiorringn_doc()), + ) +} + +fn st_interiorringn_doc() -> Documentation { + Documentation::builder( + DOC_SECTION_OTHER, + "Returns the Nth interior ring (hole) of a POLYGON geometry as a LINESTRING. \ + The index starts at 1. Returns NULL if the geometry is not a polygon or the index is out of range.", + "ST_GeometryN (geom: Geometry, n: integer)") + .with_argument("geom", "geometry: Input Polygon") + .with_argument("n", "n: Index") + .with_sql_example("SELECT ST_InteriorRingN('POLYGON ((0 0, 4 0, 4 4, 0 4, 0 0), (1 1, 1 2, 2 2, 2 1, 1 1))', 1)") + .build() +} + +#[derive(Debug)] +struct STInteriorRingN; + +impl SedonaScalarKernel for STInteriorRingN { + fn return_type(&self, args: &[SedonaType]) -> Result<Option<SedonaType>> { + let matcher = ArgMatcher::new( + vec![ArgMatcher::is_geometry(), ArgMatcher::is_integer()], + WKB_GEOMETRY, + ); + + matcher.match_args(args) + } + + fn invoke_batch( + &self, + arg_types: &[SedonaType], + args: &[datafusion_expr::ColumnarValue], + ) -> Result<datafusion_expr::ColumnarValue> { + let executor = WkbExecutor::new(arg_types, args); + let mut builder = BinaryBuilder::with_capacity( + executor.num_iterations(), + WKB_MIN_PROBABLE_BYTES * executor.num_iterations(), + ); + + let integer_value = args[1] + .cast_to(&arrow_schema::DataType::Int64, None)? + .to_array(executor.num_iterations())?; + let index_array = as_int64_array(&integer_value)?; + let mut index_iter = index_array.iter(); + + executor.execute_wkb_void(|maybe_wkb| { + match (maybe_wkb, index_iter.next().unwrap()) { + (Some(wkb), Some(index)) => { + if invoke_scalar(&wkb, (index - 1) as usize, &mut builder)? { + builder.append_value([]); + } else { + // Unsupported Geometry Type, Invalid index encountered + builder.append_null(); + } + } + _ => builder.append_null(), + } + Ok(()) + })?; + + executor.finish(Arc::new(builder.finish())) + } +} + +fn invoke_scalar(geom: &Wkb, index: usize, writer: &mut impl std::io::Write) -> Result<bool> { + let geometry = match geom.as_type() { + geo_traits::GeometryType::Polygon(pgn) => pgn.interior(index), + _ => None, + }; + + if let Some(buf) = geometry { Review Comment: I think the writing of the output should be more like the output writing you added for ST_Reverse: https://github.com/apache/sedona-db/blob/16d232be0f28441b85801936b270fa43f7edceb4/rust/sedona-functions/src/st_reverse.rs#L189-L190 ...in particular, I am not sure that `to_line_string()` and `x_y()` are what we want here because we need to support Z, M, and ZM coordinate types. Also, `wkb` might be a better name than `buf` here since I don't think buf is actually a buffer here ########## rust/sedona-testing/src/benchmark_util.rs: ########## @@ -277,6 +277,8 @@ pub enum BenchmarkArgSpec { /// Randomly generated linestring input with a specified number of vertices MultiPoint(usize), /// Randomly generated floating point input with a given range of values + Int64(i64, i64), + /// Randomly generated floating point input with a given range of values Review Comment: ```suggestion /// Randomly generated integer input with a given range of values ``` ########## rust/sedona-functions/benches/native-functions.rs: ########## @@ -82,6 +82,21 @@ fn criterion_benchmark(c: &mut Criterion) { benchmark::scalar(c, &f, "native", "st_hasm", Point); benchmark::scalar(c, &f, "native", "st_hasm", LineString(10)); + benchmark::scalar( + c, + &f, + "native", + "st_interiorringn", + BenchmarkArgs::ArrayArray(Polygon(10), Int64(1, 10)), + ); Review Comment: I'm not sure this is a very representative benchmark because it sould return NULL for every input (`Polygon(10)` doesn't have a hole). It may be less confusing to omit it here (rather than include a version that isn't measuring what it appears to be measuring). -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
