petern48 commented on code in PR #241:
URL: https://github.com/apache/sedona-db/pull/241#discussion_r2471687603


##########
c/sedona-geos/src/st_buffer.rs:
##########
@@ -54,65 +67,216 @@ impl SedonaScalarKernel for STBuffer {
         arg_types: &[SedonaType],
         args: &[ColumnarValue],
     ) -> Result<ColumnarValue> {
-        // Default params
-        let params_builder = BufferParams::builder();
+        invoke_batch_impl(arg_types, args)
+    }
+}
 
-        let params = params_builder
-            .build()
-            .map_err(|e| DataFusionError::External(Box::new(e)))?;
-
-        // Extract the constant scalar value before looping over the input 
geometries
-        let distance: Option<f64>;
-        let arg1 = args[1].cast_to(&DataType::Float64, None)?;
-        if let ColumnarValue::Scalar(scalar_arg) = &arg1 {
-            if scalar_arg.is_null() {
-                distance = None;
-            } else {
-                distance = Some(f64::try_from(scalar_arg.clone())?);
-            }
-        } else {
-            return Err(DataFusionError::Execution(format!(
-                "Invalid distance: {:?}",
-                args[1]
-            )));
-        }
+pub fn st_buffer_style_impl() -> ScalarKernelRef {
+    Arc::new(STBufferStyle {})
+}
+#[derive(Debug)]
+struct STBufferStyle {}
 
-        let executor = GeosExecutor::new(arg_types, args);
-        let mut builder = BinaryBuilder::with_capacity(
-            executor.num_iterations(),
-            WKB_MIN_PROBABLE_BYTES * executor.num_iterations(),
+impl SedonaScalarKernel for STBufferStyle {
+    fn return_type(&self, args: &[SedonaType]) -> Result<Option<SedonaType>> {
+        let matcher = ArgMatcher::new(
+            vec![
+                ArgMatcher::is_geometry(),
+                ArgMatcher::is_numeric(),
+                ArgMatcher::is_string(),
+            ],
+            WKB_GEOMETRY,
         );
-        executor.execute_wkb_void(|wkb| {
-            match (wkb, distance) {
-                (Some(wkb), Some(distance)) => {
-                    invoke_scalar(&wkb, distance, &params, &mut builder)?;
-                    builder.append_value([]);
-                }
-                _ => builder.append_null(),
-            }
 
-            Ok(())
-        })?;
+        matcher.match_args(args)
+    }
 
-        executor.finish(Arc::new(builder.finish()))
+    fn invoke_batch(
+        &self,
+        arg_types: &[SedonaType],
+        args: &[ColumnarValue],
+    ) -> Result<ColumnarValue> {
+        invoke_batch_impl(arg_types, args)
     }
 }
 
+fn invoke_batch_impl(arg_types: &[SedonaType], args: &[ColumnarValue]) -> 
Result<ColumnarValue> {
+    let executor = GeosExecutor::new(arg_types, args);
+    let mut builder = BinaryBuilder::with_capacity(
+        executor.num_iterations(),
+        WKB_MIN_PROBABLE_BYTES * executor.num_iterations(),
+    );
+
+    // Extract Args
+    let distance_value = args[1]
+        .cast_to(&DataType::Float64, None)?
+        .to_array(executor.num_iterations())?;
+    let distance_array = as_float64_array(&distance_value)?;
+    let mut distance_iter = distance_array.iter();
+
+    let buffer_style_params = extract_optional_string(args.get(2))?;
+
+    // Build BufferParams based on style parameters
+    let params = parse_buffer_params(buffer_style_params.as_deref())?;
+
+    let is_single_sided = buffer_style_params
+        .as_ref()
+        .map(|s| s.eq_ignore_ascii_case("side="))
+        .unwrap_or(false);
+
+    let is_left = buffer_style_params
+        .as_ref()
+        .map(|s| s.eq_ignore_ascii_case("left"))
+        .unwrap_or(false);
+
+    let is_right = buffer_style_params
+        .as_ref()
+        .map(|s| s.eq_ignore_ascii_case("right"))
+        .unwrap_or(false);
+
+    executor.execute_wkb_void(|wkb| {
+        match (wkb, distance_iter.next().unwrap()) {
+            (Some(wkb), Some(mut distance)) => {
+                if is_single_sided && ((is_left && distance < 0.0) || 
(is_right && distance > 0.0))

Review Comment:
   ```suggestion
                   if (is_left && distance < 0.0) || (is_right && distance > 
0.0)
   ```
   
   `is_single_sided` here is unnecessary since it's guaranteed to be true if 
`is_left` or `is_right` here is true.



##########
python/sedonadb/tests/functions/test_functions.py:
##########
@@ -176,6 +176,115 @@ def test_st_buffer(eng, geom, dist, expected_area):
     )
 
 
[email protected]("eng", [SedonaDB, PostGIS])
[email protected](
+    ("geom", "dist", "buffer_style_parameters", "expected_area"),
+    [
+        (None, None, None, None),
+        ("POINT(100 90)", 50, "'quad_segs=8'", 7803.612880645131),
+        (
+            "LINESTRING(50 50,150 150,150 50)",
+            10,
+            "'endcap=round join=round'",
+            5016.204476944362,
+        ),
+        (
+            "POLYGON((0 0, 0 10, 10 10, 10 0, 0 0))",
+            2,
+            "'join=miter'",
+            196.0,
+        ),
+        (
+            "LINESTRING(0 0, 10 0)",
+            5,
+            "'endcap=square'",
+            200.0,
+        ),
+        (
+            "POINT(0 0)",
+            10,
+            "'quad_segs=4'",
+            306.1467458920718,
+        ),
+        (
+            "POINT(0 0)",
+            10,
+            "'quad_segs=16'",
+            313.654849054594,
+        ),
+        (
+            "LINESTRING(0 0, 100 0, 100 100)",
+            5,
+            "'join=bevel'",
+            2065.536128806451,
+        ),
+        (
+            "LINESTRING(0 0, 50 0)",
+            10,
+            "'endcap=flat'",
+            1000.0,
+        ),
+        (
+            "POLYGON((0 0, 0 20, 20 20, 20 0, 0 0))",
+            -2,
+            "'join=round'",
+            256.0,
+        ),
+        (
+            "POLYGON((0 0, 0 100, 100 100, 100 0, 0 0), (20 20, 20 80, 80 80, 
80 20, 20 20))",
+            5,
+            "'join=round quad_segs=4'",
+            9576.536686473019,
+        ),
+        (
+            "MULTIPOINT((10 10), (30 30))",
+            5,
+            "'quad_segs=8'",
+            156.0722576129026,
+        ),
+        (
+            "GEOMETRYCOLLECTION(POINT(10 10), LINESTRING(50 50, 60 60))",
+            3,
+            "'endcap=round join=round'",
+            141.0388264830308,
+        ),
+        (
+            "POLYGON((0 0, 0 10, 10 10, 10 0, 0 0))",
+            0,
+            "'join=miter'",
+            100.0,
+        ),
+        (
+            "POINT(0 0)",
+            0.1,
+            "'quad_segs=8'",
+            0.031214451522580514,
+        ),
+        (
+            "LINESTRING(0 0, 50 0, 50 50)",
+            10,
+            "'join=miter miter_limit=2'",
+            2312.1445152258043,
+        ),
+        (
+            "LINESTRING(0 0, 0 100)",
+            10,
+            "'side=left'",
+            1000.0,
+        ),

Review Comment:
   Wow. You did an amazing job investigating. Glad to here you figured out what 
the other mystery was. I see [here](https://hub.docker.com/r/postgis/postgis) 
that there are indeed Docker images that support `geos` 13. I gave 
`postgis/postgis:18-3.6` a try locally, and the tests pass. It's the only one 
that falls under the ([recommended debian 
section](https://hub.docker.com/r/postgis/postgis#debian-based-recommended)). 
How about you just update it 
[here](https://github.com/apache/sedona-db/blob/0551583a675da1cf5cc61cb7c7a61352051f1fc6/compose.yml#L20).
   
   ```
   image: postgis/postgis:18-3.6
   ```
   
   WDYT @paleolimbot, is this a reasonable change? It ensures our `geos` 
version matches the container's. They say "This Docker-PostGIS image has a 
cautious release cycle to guarantee high stability", so it seems safe to use to 
me.



##########
c/sedona-geos/src/st_buffer.rs:
##########
@@ -54,65 +67,216 @@ impl SedonaScalarKernel for STBuffer {
         arg_types: &[SedonaType],
         args: &[ColumnarValue],
     ) -> Result<ColumnarValue> {
-        // Default params
-        let params_builder = BufferParams::builder();
+        invoke_batch_impl(arg_types, args)
+    }
+}
 
-        let params = params_builder
-            .build()
-            .map_err(|e| DataFusionError::External(Box::new(e)))?;
-
-        // Extract the constant scalar value before looping over the input 
geometries
-        let distance: Option<f64>;
-        let arg1 = args[1].cast_to(&DataType::Float64, None)?;
-        if let ColumnarValue::Scalar(scalar_arg) = &arg1 {
-            if scalar_arg.is_null() {
-                distance = None;
-            } else {
-                distance = Some(f64::try_from(scalar_arg.clone())?);
-            }
-        } else {
-            return Err(DataFusionError::Execution(format!(
-                "Invalid distance: {:?}",
-                args[1]
-            )));
-        }
+pub fn st_buffer_style_impl() -> ScalarKernelRef {
+    Arc::new(STBufferStyle {})
+}
+#[derive(Debug)]
+struct STBufferStyle {}
 
-        let executor = GeosExecutor::new(arg_types, args);
-        let mut builder = BinaryBuilder::with_capacity(
-            executor.num_iterations(),
-            WKB_MIN_PROBABLE_BYTES * executor.num_iterations(),
+impl SedonaScalarKernel for STBufferStyle {
+    fn return_type(&self, args: &[SedonaType]) -> Result<Option<SedonaType>> {
+        let matcher = ArgMatcher::new(
+            vec![
+                ArgMatcher::is_geometry(),
+                ArgMatcher::is_numeric(),
+                ArgMatcher::is_string(),
+            ],
+            WKB_GEOMETRY,
         );
-        executor.execute_wkb_void(|wkb| {
-            match (wkb, distance) {
-                (Some(wkb), Some(distance)) => {
-                    invoke_scalar(&wkb, distance, &params, &mut builder)?;
-                    builder.append_value([]);
-                }
-                _ => builder.append_null(),
-            }
 
-            Ok(())
-        })?;
+        matcher.match_args(args)
+    }
 
-        executor.finish(Arc::new(builder.finish()))
+    fn invoke_batch(
+        &self,
+        arg_types: &[SedonaType],
+        args: &[ColumnarValue],
+    ) -> Result<ColumnarValue> {
+        invoke_batch_impl(arg_types, args)
     }
 }
 
+fn invoke_batch_impl(arg_types: &[SedonaType], args: &[ColumnarValue]) -> 
Result<ColumnarValue> {
+    let executor = GeosExecutor::new(arg_types, args);
+    let mut builder = BinaryBuilder::with_capacity(
+        executor.num_iterations(),
+        WKB_MIN_PROBABLE_BYTES * executor.num_iterations(),
+    );
+
+    // Extract Args
+    let distance_value = args[1]
+        .cast_to(&DataType::Float64, None)?
+        .to_array(executor.num_iterations())?;
+    let distance_array = as_float64_array(&distance_value)?;
+    let mut distance_iter = distance_array.iter();
+
+    let buffer_style_params = extract_optional_string(args.get(2))?;
+
+    // Build BufferParams based on style parameters
+    let params = parse_buffer_params(buffer_style_params.as_deref())?;
+
+    let is_single_sided = buffer_style_params
+        .as_ref()
+        .map(|s| s.eq_ignore_ascii_case("side="))
+        .unwrap_or(false);
+

Review Comment:
   ```suggestion
   ```
   
   This condition for `is_single_sided` isn't quite right. If we put 
`side=both`, this would incorrectly return true. Looking a few lines below. 
There's only one place where this variable is used, and it's unnecessary (the 
rest of the condition guarantees correctness for us), so let's remove this 
variable.



##########
c/sedona-geos/src/st_buffer.rs:
##########
@@ -54,65 +67,216 @@ impl SedonaScalarKernel for STBuffer {
         arg_types: &[SedonaType],
         args: &[ColumnarValue],
     ) -> Result<ColumnarValue> {
-        // Default params
-        let params_builder = BufferParams::builder();
+        invoke_batch_impl(arg_types, args)
+    }
+}
 
-        let params = params_builder
-            .build()
-            .map_err(|e| DataFusionError::External(Box::new(e)))?;
-
-        // Extract the constant scalar value before looping over the input 
geometries
-        let distance: Option<f64>;
-        let arg1 = args[1].cast_to(&DataType::Float64, None)?;
-        if let ColumnarValue::Scalar(scalar_arg) = &arg1 {
-            if scalar_arg.is_null() {
-                distance = None;
-            } else {
-                distance = Some(f64::try_from(scalar_arg.clone())?);
-            }
-        } else {
-            return Err(DataFusionError::Execution(format!(
-                "Invalid distance: {:?}",
-                args[1]
-            )));
-        }
+pub fn st_buffer_style_impl() -> ScalarKernelRef {
+    Arc::new(STBufferStyle {})
+}
+#[derive(Debug)]
+struct STBufferStyle {}
 
-        let executor = GeosExecutor::new(arg_types, args);
-        let mut builder = BinaryBuilder::with_capacity(
-            executor.num_iterations(),
-            WKB_MIN_PROBABLE_BYTES * executor.num_iterations(),
+impl SedonaScalarKernel for STBufferStyle {
+    fn return_type(&self, args: &[SedonaType]) -> Result<Option<SedonaType>> {
+        let matcher = ArgMatcher::new(
+            vec![
+                ArgMatcher::is_geometry(),
+                ArgMatcher::is_numeric(),
+                ArgMatcher::is_string(),
+            ],
+            WKB_GEOMETRY,
         );
-        executor.execute_wkb_void(|wkb| {
-            match (wkb, distance) {
-                (Some(wkb), Some(distance)) => {
-                    invoke_scalar(&wkb, distance, &params, &mut builder)?;
-                    builder.append_value([]);
-                }
-                _ => builder.append_null(),
-            }
 
-            Ok(())
-        })?;
+        matcher.match_args(args)
+    }
 
-        executor.finish(Arc::new(builder.finish()))
+    fn invoke_batch(
+        &self,
+        arg_types: &[SedonaType],
+        args: &[ColumnarValue],
+    ) -> Result<ColumnarValue> {
+        invoke_batch_impl(arg_types, args)
     }
 }
 
+fn invoke_batch_impl(arg_types: &[SedonaType], args: &[ColumnarValue]) -> 
Result<ColumnarValue> {
+    let executor = GeosExecutor::new(arg_types, args);
+    let mut builder = BinaryBuilder::with_capacity(
+        executor.num_iterations(),
+        WKB_MIN_PROBABLE_BYTES * executor.num_iterations(),
+    );
+
+    // Extract Args
+    let distance_value = args[1]
+        .cast_to(&DataType::Float64, None)?
+        .to_array(executor.num_iterations())?;
+    let distance_array = as_float64_array(&distance_value)?;
+    let mut distance_iter = distance_array.iter();
+
+    let buffer_style_params = extract_optional_string(args.get(2))?;
+
+    // Build BufferParams based on style parameters
+    let params = parse_buffer_params(buffer_style_params.as_deref())?;
+
+    let is_single_sided = buffer_style_params
+        .as_ref()
+        .map(|s| s.eq_ignore_ascii_case("side="))
+        .unwrap_or(false);
+
+    let is_left = buffer_style_params
+        .as_ref()
+        .map(|s| s.eq_ignore_ascii_case("left"))
+        .unwrap_or(false);
+
+    let is_right = buffer_style_params
+        .as_ref()
+        .map(|s| s.eq_ignore_ascii_case("right"))
+        .unwrap_or(false);
+
+    executor.execute_wkb_void(|wkb| {
+        match (wkb, distance_iter.next().unwrap()) {
+            (Some(wkb), Some(mut distance)) => {
+                if is_single_sided && ((is_left && distance < 0.0) || 
(is_right && distance > 0.0))
+                {
+                    distance = -distance;
+                }
+                builder.append_value(invoke_scalar(&wkb, distance, &params)?);
+            }
+            _ => builder.append_null(),
+        }
+        Ok(())
+    })?;
+
+    executor.finish(Arc::new(builder.finish()))
+}
+
 fn invoke_scalar(
     geos_geom: &geos::Geometry,
     distance: f64,
     params: &BufferParams,
-    writer: &mut impl std::io::Write,
-) -> Result<()> {
+) -> Result<Vec<u8>> {
     let geometry = geos_geom
         .buffer_with_params(distance, params)
         .map_err(|e| DataFusionError::External(Box::new(e)))?;
     let wkb = geometry
         .to_wkb()
         .map_err(|e| DataFusionError::Execution(format!("Failed to convert to 
wkb: {e}")))?;
 
-    writer.write_all(wkb.as_ref())?;
-    Ok(())

Review Comment:
   While it looks odd, we actually want to keep the pattern here where we write 
the bytes directly into the `writer` buffer instead of returning it from the 
function. It's faster this way. We follow this pattern for any function that 
returns a geometry object. See my other comment above in the file to see the 
other half of this to reverse. I originally did the same thing when I first 
wrote this function, but dewey caught me.
   
   Here's what the caller code should look like. I'm referencing the existing 
code (before this PR) here.
   
   
https://github.com/apache/sedona-db/blob/0551583a675da1cf5cc61cb7c7a61352051f1fc6/c/sedona-geos/src/st_buffer.rs#L88-L89



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to