paleolimbot commented on code in PR #275:
URL: https://github.com/apache/sedona-db/pull/275#discussion_r2507430219
##########
rust/sedona-functions/src/st_setsrid.rs:
##########
@@ -227,6 +230,105 @@ fn determine_return_type(
sedona_internal_err!("Unexpected argument types: {}, {}", args[0], args[1])
}
+/// [SedonaScalarKernel] wrapper that handles the SRID argument for
constructors like ST_Point
+#[derive(Debug)]
+pub(crate) struct SRIDifiedKernel {
+ inner: ScalarKernelRef,
+}
+
+impl SRIDifiedKernel {
+ pub(crate) fn new(inner: ScalarKernelRef) -> Self {
+ Self { inner }
+ }
+}
+
+impl SedonaScalarKernel for SRIDifiedKernel {
+ fn return_type_from_args_and_scalars(
+ &self,
+ args: &[SedonaType],
+ scalar_args: &[Option<&ScalarValue>],
+ ) -> Result<Option<SedonaType>> {
+ // args should consist of the original args and one extra arg for
+ // specifying CRS. So, first, validate the length and separate these.
+ //
+ // [arg0, arg1, ..., crs_arg];
+ // ^^^^^^^^^^^^^^^
+ // orig_args
+ let orig_args_len = match (args.len(), scalar_args.len()) {
+ (0, 0) => return Ok(None),
+ (l1, l2) if l1 == l2 => l1 - 1,
+ _ => return sedona_internal_err!("Arg types and arg values have
different lengths"),
+ };
+
+ let orig_args = &args[..orig_args_len];
+ let orig_scalar_args = &scalar_args[..orig_args_len];
Review Comment:
I think I understand this better now...this works, although probably `if
args.len() == 0 { return Ok(None) }` is sufficient (there are a lot of places
in our code where we rely on DataFusion passing us the right number of things).
I was worried before that if somebody passed (e.g.,) a single argument to
`ST_Point()` something funny would happen here, but I see now that the call to
the `inner.return_type_from_args_and_scalars()` will return correctly return
`Ok(None)` for that case.
Totally optional, but the error message for something like
`ST_Point('gazornenplat')` would probably be better if you moved the call to
`inner.return_type_from_args_and_scalars()` before the CRS parsing.
##########
rust/sedona-functions/src/st_setsrid.rs:
##########
@@ -227,6 +230,105 @@ fn determine_return_type(
sedona_internal_err!("Unexpected argument types: {}, {}", args[0], args[1])
}
+/// [SedonaScalarKernel] wrapper that handles the SRID argument for
constructors like ST_Point
+#[derive(Debug)]
+pub(crate) struct SRIDifiedKernel {
+ inner: ScalarKernelRef,
+}
+
+impl SRIDifiedKernel {
+ pub(crate) fn new(inner: ScalarKernelRef) -> Self {
+ Self { inner }
+ }
+}
+
+impl SedonaScalarKernel for SRIDifiedKernel {
+ fn return_type_from_args_and_scalars(
+ &self,
+ args: &[SedonaType],
+ scalar_args: &[Option<&ScalarValue>],
+ ) -> Result<Option<SedonaType>> {
+ // args should consist of the original args and one extra arg for
+ // specifying CRS. So, first, validate the length and separate these.
+ //
+ // [arg0, arg1, ..., crs_arg];
+ // ^^^^^^^^^^^^^^^
+ // orig_args
+ let orig_args_len = match (args.len(), scalar_args.len()) {
+ (0, 0) => return Ok(None),
+ (l1, l2) if l1 == l2 => l1 - 1,
+ _ => return sedona_internal_err!("Arg types and arg values have
different lengths"),
+ };
+
+ let orig_args = &args[..orig_args_len];
+ let orig_scalar_args = &scalar_args[..orig_args_len];
+
+ let crs = match scalar_args[orig_args_len] {
+ Some(crs) => crs,
+ None => return Ok(None),
+ };
+ let new_crs = match crs.cast_to(&DataType::Utf8) {
+ Ok(ScalarValue::Utf8(Some(crs))) => {
+ if crs == "0" {
+ None
+ } else {
+ validate_crs(&crs, None)?;
+ deserialize_crs(&serde_json::Value::String(crs))?
+ }
+ }
+ Ok(ScalarValue::Utf8(None)) => None,
+ Ok(_) | Err(_) => return sedona_internal_err!("Can't cast Crs
{crs:?} to Utf8"),
+ };
+
+ let mut result = self
+ .inner
+ .return_type_from_args_and_scalars(orig_args, orig_scalar_args);
+ if let Ok(Some(sedona_type)) = &mut result {
+ match sedona_type {
+ SedonaType::Wkb(_, crs) => *crs = new_crs,
+ SedonaType::WkbView(_, crs) => *crs = new_crs,
+ _ => {
+ return sedona_internal_err!("Return type must be Wkb or
WkbView");
+ }
+ }
+ }
+
+ result
+ }
+
+ fn invoke_batch(
+ &self,
+ arg_types: &[SedonaType],
+ args: &[ColumnarValue],
+ ) -> Result<ColumnarValue> {
+ let orig_args_len = arg_types.len() - 1;
+ let orig_arg_types = &arg_types[..orig_args_len];
+ let orig_args = &args[..orig_args_len];
+
+ // If the specified SRID is NULL, the result is also NULL. So, return
+ // NULL early and doesn't run `invoke_batch()`.
+ if let ColumnarValue::Scalar(sc) = &args[orig_args_len] {
Review Comment:
I think your last point is a good one...we should probably `invoke_batch`
first no matter what (to propagate any errors). I don't think that anybody is
relying the performance of returning a column full of nulls because they
probably made a mistake 🙂
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]