paleolimbot commented on code in PR #498:
URL: https://github.com/apache/sedona-db/pull/498#discussion_r2670932273


##########
rust/sedona-functions/src/st_geomfromwkt.rs:
##########
@@ -233,6 +366,64 @@ mod tests {
         );
     }
 
+    #[rstest]
+    fn ewkt(#[values(DataType::Utf8, DataType::Utf8View)] data_type: DataType) 
{
+        let udf = st_geomfromewkt_udf();
+        let tester = ScalarUdfTester::new(udf.into(), 
vec![SedonaType::Arrow(data_type.clone())]);
+        
tester.assert_return_type(SedonaType::new_item_crs(&WKB_GEOMETRY).unwrap());
+
+        assert_scalar_equal(
+            &tester.invoke_scalar("SRID=4326;POINT (1 2)").unwrap(),
+            &create_scalar_item_crs(Some("POINT (1 2)"), Some("OGC:CRS84"), 
&WKB_GEOMETRY),
+        );
+
+        assert_scalar_equal(
+            &tester.invoke_scalar("SRID=3857;POINT (1 2)").unwrap(),
+            &create_scalar_item_crs(Some("POINT (1 2)"), Some("EPSG:3857"), 
&WKB_GEOMETRY),
+        );
+
+        assert_scalar_equal(
+            &tester.invoke_scalar("POINT (3 4)").unwrap(),
+            &create_scalar_item_crs(Some("POINT (3 4)"), None, &WKB_GEOMETRY),
+        );
+
+        let array_in: ArrayRef = match data_type {
+            DataType::Utf8 => arrow_array::create_array!(
+                Utf8,
+                [
+                    Some("SRID=4326;POINT (1 2)"),
+                    Some("SRID=3857;POINT (1 2)"),
+                    None,
+                    Some("POINT (3 4)"),
+                    Some("SRID=0;POINT (5 6)")
+                ]
+            ) as ArrayRef,
+            DataType::Utf8View => arrow_array::create_array!(
+                Utf8View,
+                [
+                    Some("SRID=4326;POINT (1 2)"),
+                    Some("SRID=3857;POINT (1 2)"),
+                    None,
+                    Some("POINT (3 4)"),
+                    Some("SRID=0;POINT (5 6)")
+                ]
+            ) as ArrayRef,

Review Comment:
   I don't think you need the repeated case here (I'm pretty sure 
`tester.invoke_array()` will do that cast for you).



##########
rust/sedona-functions/src/st_geomfromwkt.rs:
##########
@@ -147,8 +149,139 @@ fn invoke_scalar(wkt_bytes: &str, builder: &mut 
BinaryBuilder) -> Result<()> {
     .map_err(|err| DataFusionError::Internal(format!("WKB write error: 
{err}")))
 }
 
+/// ST_GeomFromEWKT() UDF implementation
+///
+/// An implementation of EWKT reading using GeoRust's wkt crate.
+pub fn st_geomfromewkt_udf() -> SedonaScalarUDF {
+    let doc = Documentation::builder(
+        DOC_SECTION_OTHER,
+        "Construct a Geometry from EWKT",
+        "ST_GeomFromEWKT (Ewkt: String)",
+    )
+    .with_argument(
+        "EWKT",
+        "string: Extended well-known text representation of the geometry",
+    )
+    .with_sql_example("SELECT ST_GeomFromEWKT('SRID=4326;POINT(40.7128 
-74.0060)')")
+    .build();
+
+    SedonaScalarUDF::new(
+        "st_geomfromewkt",
+        vec![Arc::new(STGeoFromEWKT {})],
+        Volatility::Immutable,
+        Some(doc),
+    )
+}
+
+#[derive(Debug)]
+struct STGeoFromEWKT {
+    // TODO
+    // engine: Option<Arc<dyn CrsEngine + Send + Sync>>,
+}
+
+impl SedonaScalarKernel for STGeoFromEWKT {
+    fn return_type(&self, args: &[SedonaType]) -> Result<Option<SedonaType>> {
+        let matcher = ArgMatcher::new(
+            vec![ArgMatcher::is_string()],
+            SedonaType::new_item_crs(&WKB_GEOMETRY)?,
+        );
+        matcher.match_args(args)
+    }
+
+    fn invoke_batch(
+        &self,
+        arg_types: &[SedonaType],
+        args: &[ColumnarValue],
+    ) -> Result<ColumnarValue> {
+        let executor = WkbExecutor::new(arg_types, args);
+        let arg_array = args[0]
+            .cast_to(&DataType::Utf8View, None)?
+            .to_array(executor.num_iterations())?;
+
+        let mut geom_builder = BinaryBuilder::with_capacity(
+            executor.num_iterations(),
+            WKB_MIN_PROBABLE_BYTES * executor.num_iterations(),
+        );
+        let mut srid_builder = 
StringViewBuilder::with_capacity(executor.num_iterations());
+
+        for item in as_string_view_array(&arg_array)? {
+            if let Some(ewkt_bytes) = item {
+                match ewkt_bytes.split_once(";") {
+                    Some((maybe_srid, wkt_bytes)) => {
+                        let srid = parse_maybe_srid(maybe_srid);
+                        invoke_scalar_with_srid(
+                            wkt_bytes,
+                            srid,
+                            &mut geom_builder,
+                            &mut srid_builder,
+                        )?;
+                    }
+                    None => {
+                        invoke_scalar_with_srid(
+                            ewkt_bytes,
+                            None,
+                            &mut geom_builder,
+                            &mut srid_builder,
+                        )?;
+                    }
+                }
+                geom_builder.append_value(vec![]);
+            } else {
+                geom_builder.append_null();
+                srid_builder.append_null();
+            }
+        }
+
+        let new_geom_array = geom_builder.finish();
+        let item_result = executor.finish(Arc::new(new_geom_array))?;
+
+        let new_srid_array = srid_builder.finish();
+        let crs_value = if matches!(&item_result, ColumnarValue::Scalar(_)) {
+            ColumnarValue::Scalar(ScalarValue::try_from_array(&new_srid_array, 
0)?)
+        } else {
+            ColumnarValue::Array(Arc::new(new_srid_array))
+        };
+
+        make_item_crs(&WKB_GEOMETRY, item_result, &crs_value, None)
+    }
+}
+
+fn parse_maybe_srid(maybe_srid: &str) -> Option<String> {
+    if !maybe_srid.starts_with("SRID=") {
+        return None;
+    }
+    let srid_str = &maybe_srid[5..];
+    let srid: u16 = match srid_str.parse() {
+        Ok(srid) => srid,
+        Err(_) => return None,
+    };

Review Comment:
   Can this be a U32?



##########
rust/sedona-functions/src/st_geomfromwkt.rs:
##########
@@ -147,8 +149,139 @@ fn invoke_scalar(wkt_bytes: &str, builder: &mut 
BinaryBuilder) -> Result<()> {
     .map_err(|err| DataFusionError::Internal(format!("WKB write error: 
{err}")))
 }
 
+/// ST_GeomFromEWKT() UDF implementation
+///
+/// An implementation of EWKT reading using GeoRust's wkt crate.
+pub fn st_geomfromewkt_udf() -> SedonaScalarUDF {
+    let doc = Documentation::builder(
+        DOC_SECTION_OTHER,
+        "Construct a Geometry from EWKT",
+        "ST_GeomFromEWKT (Ewkt: String)",
+    )
+    .with_argument(
+        "EWKT",
+        "string: Extended well-known text representation of the geometry",
+    )
+    .with_sql_example("SELECT ST_GeomFromEWKT('SRID=4326;POINT(40.7128 
-74.0060)')")
+    .build();
+
+    SedonaScalarUDF::new(
+        "st_geomfromewkt",
+        vec![Arc::new(STGeoFromEWKT {})],
+        Volatility::Immutable,
+        Some(doc),
+    )
+}
+
+#[derive(Debug)]
+struct STGeoFromEWKT {
+    // TODO
+    // engine: Option<Arc<dyn CrsEngine + Send + Sync>>,

Review Comment:
   Feel free to leave this out for now...the CRS engine variants of some of the 
other functions aren't currently used with an engine and we probably need a 
more reliable way to inject support for validation.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to