petern48 commented on code in PR #369: URL: https://github.com/apache/sedona-db/pull/369#discussion_r2569723010
########## c/sedona-geos/src/st_numinteriorrings.rs: ########## @@ -0,0 +1,174 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::sync::Arc; + +use crate::executor::GeosExecutor; +use arrow_array::builder::Int32Builder; +use arrow_schema::DataType; +use datafusion_common::{error::Result, DataFusionError}; +use datafusion_expr::ColumnarValue; +use geos::{Geom, Geometry, GeometryTypes}; +use sedona_expr::scalar_udf::{ScalarKernelRef, SedonaScalarKernel}; +use sedona_schema::{datatypes::SedonaType, matchers::ArgMatcher}; + +pub fn st_num_interior_rings_impl() -> ScalarKernelRef { + Arc::new(STNumInteriorRings {}) +} + +#[derive(Debug)] +struct STNumInteriorRings {} + +impl SedonaScalarKernel for STNumInteriorRings { + fn return_type(&self, args: &[SedonaType]) -> Result<Option<SedonaType>> { + let matcher = ArgMatcher::new( + vec![ArgMatcher::is_geometry()], + SedonaType::Arrow(DataType::Int32), + ); + + matcher.match_args(args) + } + + fn invoke_batch( + &self, + arg_types: &[SedonaType], + args: &[ColumnarValue], + ) -> Result<ColumnarValue> { + let executor = GeosExecutor::new(arg_types, args); + let mut builder = Int32Builder::with_capacity(executor.num_iterations()); + executor.execute_wkb_void(|maybe_geom| { + match maybe_geom { + None => builder.append_null(), + Some(geom) => { + let res = invoke_scalar(&geom)?; + match res { + Some(n) => builder.append_value(n), + // non-polygon / unsupported -> NULL (matches PostGIS + py tests) Review Comment: ```suggestion ``` ########## python/sedonadb/tests/functions/test_functions.py: ########## @@ -2698,3 +2698,45 @@ def test_st_snap(eng, input, reference, tolerance, expected): def test_st_zmflag(eng, geom, expected): eng = eng.create_or_skip() eng.assert_query_result(f"SELECT ST_ZmFlag({geom_or_null(geom)})", expected) + + [email protected]("eng", [SedonaDB, PostGIS]) [email protected]( + ("geom", "expected"), + [ + (None, None), + ("POINT (1 2)", None), + ("LINESTRING (0 0, 1 1, 2 2)", None), + ("POLYGON EMPTY", 0), + ("POLYGON ((0 0, 4 0, 4 4, 0 4, 0 0))", 0), + ( + "POLYGON ((0 0,6 0,6 6,0 6,0 0),(2 2,4 2,4 4,2 4,2 2))", + 1, + ), + ( + "POLYGON (" + "(0 0,10 0,10 6,0 6,0 0)," + "(1 1,2 1,2 5,1 5,1 1)," + "(8 5,8 4,9 4,9 5,8 5)" + ")", Review Comment: ```suggestion "POLYGON ((0 0,10 0,10 6,0 6,0 0), (1 1,2 1,2 5,1 5,1 1),(8 5,8 4,9 4,9 5,8 5))", ``` Again, I slightly prefer to have these all on one line for the ease of copy-pasting. There's one more multiline case that's right below this to fix. ########## python/sedonadb/tests/functions/test_functions.py: ########## @@ -2698,3 +2698,45 @@ def test_st_snap(eng, input, reference, tolerance, expected): def test_st_zmflag(eng, geom, expected): eng = eng.create_or_skip() eng.assert_query_result(f"SELECT ST_ZmFlag({geom_or_null(geom)})", expected) + + [email protected]("eng", [SedonaDB, PostGIS]) [email protected]( + ("geom", "expected"), + [ + (None, None), + ("POINT (1 2)", None), + ("LINESTRING (0 0, 1 1, 2 2)", None), + ("POLYGON EMPTY", 0), Review Comment: Great job with the tests so far 👏. I'd like the Python tests to be a bit more comprehensive because it's very possible we implement ways to speed up ST_NumInteriorRings in the future. When we do, so it's important that we have good tests to make sure we don't introduce bugs. Here are the tests I'd like to have: - None (done) - Polygon with one ring (zero interior rings) (done) - Polygon with more rings (done) - Empty geom for each geometry type (not done) - One of each geometry type (not done) - All 7 of the geometry types are: `POINT`, `LINESTRING`, `POLYGON`, `MULTIPOINT`, `MULTILINESTRING`, `MULTIPOLYGON`, `GEOMETRYCOLLECTION`. You have most of these, but not quite all of them You cover most of these (indicated by "done"), but not the last two bullets. Could you add cases for the last two bullets? Here's an example of how it's done for another function. You can use similar (or even the same) test cases, except your "expected" value will be different than the True/False values in that function. https://github.com/apache/sedona-db/blob/16d232be0f28441b85801936b270fa43f7edceb4/python/sedonadb/tests/functions/test_functions.py#L1276-L1290 optional: It's also nice to group the tests by bullet (e.g. all empty cases together), for readability. ########## c/sedona-geos/src/st_numinteriorrings.rs: ########## @@ -0,0 +1,174 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::sync::Arc; + +use crate::executor::GeosExecutor; +use arrow_array::builder::Int32Builder; +use arrow_schema::DataType; +use datafusion_common::{error::Result, DataFusionError}; +use datafusion_expr::ColumnarValue; +use geos::{Geom, Geometry, GeometryTypes}; +use sedona_expr::scalar_udf::{ScalarKernelRef, SedonaScalarKernel}; +use sedona_schema::{datatypes::SedonaType, matchers::ArgMatcher}; + +pub fn st_num_interior_rings_impl() -> ScalarKernelRef { + Arc::new(STNumInteriorRings {}) +} + +#[derive(Debug)] +struct STNumInteriorRings {} + +impl SedonaScalarKernel for STNumInteriorRings { + fn return_type(&self, args: &[SedonaType]) -> Result<Option<SedonaType>> { + let matcher = ArgMatcher::new( + vec![ArgMatcher::is_geometry()], + SedonaType::Arrow(DataType::Int32), + ); + + matcher.match_args(args) + } + + fn invoke_batch( + &self, + arg_types: &[SedonaType], + args: &[ColumnarValue], + ) -> Result<ColumnarValue> { + let executor = GeosExecutor::new(arg_types, args); + let mut builder = Int32Builder::with_capacity(executor.num_iterations()); + executor.execute_wkb_void(|maybe_geom| { + match maybe_geom { + None => builder.append_null(), + Some(geom) => { + let res = invoke_scalar(&geom)?; + match res { + Some(n) => builder.append_value(n), + // non-polygon / unsupported -> NULL (matches PostGIS + py tests) + None => builder.append_null(), + } + } + } + Ok(()) + })?; + + executor.finish(Arc::new(builder.finish())) + } +} + +fn invoke_scalar(geom: &Geometry) -> Result<Option<i32>> { + match geom.geometry_type() { + GeometryTypes::Polygon => { + let is_empty = geom.is_empty().map_err(|e| { + DataFusionError::Execution(format!("Failed to check if geometry is empty: {e}")) + })?; + + if is_empty { + // empty polygon has no interior rings + Ok(Some(0)) + } else { + let count = geom.get_num_interior_rings().map_err(|e| { + DataFusionError::Execution(format!("Failed to get num interior rings: {e}")) + })?; + Ok(Some(count as i32)) + } + } + // non-polygon -> NULL + _ => Ok(None), + } +} +#[cfg(test)] +mod tests { + use std::sync::Arc; + + use arrow_array::{ArrayRef, Int32Array}; + use arrow_schema::DataType; + use datafusion_common::ScalarValue; + use rstest::rstest; + use sedona_expr::scalar_udf::SedonaScalarUDF; + use sedona_schema::datatypes::{SedonaType, WKB_GEOMETRY, WKB_VIEW_GEOMETRY}; + use sedona_testing::compare::assert_array_equal; + use sedona_testing::testers::ScalarUdfTester; + + use super::*; + + #[rstest] + fn udf(#[values(WKB_GEOMETRY, WKB_VIEW_GEOMETRY)] sedona_type: SedonaType) { + let udf = SedonaScalarUDF::from_kernel("st_numinteriorrings", st_num_interior_rings_impl()); + let tester = ScalarUdfTester::new(udf.into(), vec![sedona_type]); + tester.assert_return_type(DataType::Int32); + + // Polygon with 2 interior rings -> 2 + let result = tester + .invoke_scalar( + "POLYGON( + (0 0,10 0,10 6,0 6,0 0), + (1 1,2 1,2 5,1 5,1 1), + (8 5,8 4,9 4,9 5,8 5) + )", + ) + .unwrap(); + tester.assert_scalar_result_equals(result, 2_i32); + + // NULL -> NULL + let result = tester.invoke_scalar(ScalarValue::Null).unwrap(); + assert!(result.is_null()); + + let input_wkt = vec![ + None, + Some("POINT (1 2)"), + Some("LINESTRING (0 0, 1 1, 2 2)"), + Some("POLYGON EMPTY"), + Some("POLYGON ((0 0, 4 0, 4 4, 0 4, 0 0))"), + Some("POLYGON ((0 0,6 0,6 6,0 6,0 0),(2 2,4 2,4 4,2 4,2 2))"), + Some( + "POLYGON ( + (0 0,10 0,10 6,0 6,0 0), + (1 1,2 1,2 5,1 5,1 1), + (8 5,8 4,9 4,9 5,8 5) + )", + ), Review Comment: For copy-paste ease (and conciseness), I prefer to collapse these rust ones to a single line too. Two more cases below this and one more at the beginning of the function -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
