alamb commented on code in PR #17985:
URL: https://github.com/apache/datafusion/pull/17985#discussion_r2421530505
##########
datafusion/physical-expr/src/expressions/binary/kernels.rs:
##########
@@ -164,3 +172,129 @@ pub fn concat_elements_utf8view(
}
Ok(result.finish())
}
+
+/// Invoke a compute kernel on a pair of binary data arrays with flags
+macro_rules! regexp_is_match_flag {
+ ($LEFT:expr, $RIGHT:expr, $ARRAYTYPE:ident, $NOT:expr, $FLAG:expr) => {{
+ let ll = $LEFT
+ .as_any()
+ .downcast_ref::<$ARRAYTYPE>()
+ .expect("failed to downcast array");
+ let rr = $RIGHT
+ .as_any()
+ .downcast_ref::<$ARRAYTYPE>()
+ .expect("failed to downcast array");
+
+ let flag = if $FLAG {
+ Some($ARRAYTYPE::from(vec!["i"; ll.len()]))
+ } else {
+ None
+ };
+ let mut array = regexp_is_match(ll, rr, flag.as_ref())?;
+ if $NOT {
+ array = not(&array).unwrap();
+ }
+ Ok(Arc::new(array))
+ }};
+}
+
+pub(crate) fn regex_match_dyn(
+ left: ArrayRef,
+ right: ArrayRef,
+ not_match: bool,
+ flag: bool,
+) -> Result<ArrayRef> {
+ match left.data_type() {
+ DataType::Utf8 => {
+ regexp_is_match_flag!(left, right, StringArray, not_match, flag)
+ },
+ DataType::Utf8View => {
+ regexp_is_match_flag!(left, right, StringViewArray, not_match,
flag)
+ }
+ DataType::LargeUtf8 => {
+ regexp_is_match_flag!(left, right, LargeStringArray, not_match,
flag)
+ },
+ other => internal_err!(
+ "Data type {} not supported for binary_string_array_flag_op
operation regexp_is_match on string array",
Review Comment:
This error message should probably be updated to refer to the new function ,
`regexp_match_dyn` (I realize this message just got moved around)
##########
datafusion/physical-expr/src/expressions/binary/kernels.rs:
##########
@@ -164,3 +172,129 @@ pub fn concat_elements_utf8view(
}
Ok(result.finish())
}
+
+/// Invoke a compute kernel on a pair of binary data arrays with flags
+macro_rules! regexp_is_match_flag {
+ ($LEFT:expr, $RIGHT:expr, $ARRAYTYPE:ident, $NOT:expr, $FLAG:expr) => {{
+ let ll = $LEFT
+ .as_any()
+ .downcast_ref::<$ARRAYTYPE>()
+ .expect("failed to downcast array");
+ let rr = $RIGHT
+ .as_any()
+ .downcast_ref::<$ARRAYTYPE>()
+ .expect("failed to downcast array");
+
+ let flag = if $FLAG {
+ Some($ARRAYTYPE::from(vec!["i"; ll.len()]))
+ } else {
+ None
+ };
+ let mut array = regexp_is_match(ll, rr, flag.as_ref())?;
+ if $NOT {
+ array = not(&array).unwrap();
+ }
+ Ok(Arc::new(array))
+ }};
+}
+
+pub(crate) fn regex_match_dyn(
+ left: ArrayRef,
+ right: ArrayRef,
+ not_match: bool,
+ flag: bool,
+) -> Result<ArrayRef> {
+ match left.data_type() {
+ DataType::Utf8 => {
+ regexp_is_match_flag!(left, right, StringArray, not_match, flag)
+ },
+ DataType::Utf8View => {
+ regexp_is_match_flag!(left, right, StringViewArray, not_match,
flag)
+ }
+ DataType::LargeUtf8 => {
+ regexp_is_match_flag!(left, right, LargeStringArray, not_match,
flag)
+ },
+ other => internal_err!(
+ "Data type {} not supported for binary_string_array_flag_op
operation regexp_is_match on string array",
+ other
+ ),
+ }
+}
+
+/// Invoke a compute kernel on a data array and a scalar value with flag
+macro_rules! regexp_is_match_flag_scalar {
+ ($LEFT:expr, $RIGHT:expr, $ARRAYTYPE:ident, $NOT:expr, $FLAG:expr) => {{
+ let ll = $LEFT
+ .as_any()
+ .downcast_ref::<$ARRAYTYPE>()
+ .expect("failed to downcast array");
+
+ let string_value = match $RIGHT.try_as_str() {
+ Some(Some(string_value)) => string_value,
+ // null literal or non string
+ _ => {
+ return Some(internal_err!(
+ "failed to cast literal value {} for operation
'regexp_is_match_scalar'",
+ $RIGHT
+ ))
+ }
+ };
+
+ let flag = $FLAG.then_some("i");
+ match regexp_is_match_scalar(ll, &string_value, flag) {
+ Ok(mut array) => {
+ if $NOT {
+ array = not(&array).unwrap();
+ }
+ Ok(Arc::new(array))
+ }
+ Err(e) => internal_err!("failed to call regexp_is_match_scalar
{}", e),
+ }
+ }};
+}
+
+pub(crate) fn regex_match_dyn_scalar(
+ left: &dyn Array,
+ right: ScalarValue,
+ not_match: bool,
+ flag: bool,
+) -> Option<Result<ArrayRef>> {
+ let result: Result<ArrayRef> = match left.data_type() {
+ DataType::Utf8 => {
+ regexp_is_match_flag_scalar!(left, right, StringArray, not_match,
flag)
+ },
+ DataType::Utf8View => {
+ regexp_is_match_flag_scalar!(left, right, StringViewArray,
not_match, flag)
+ }
+ DataType::LargeUtf8 => {
+ regexp_is_match_flag_scalar!(left, right, LargeStringArray,
not_match, flag)
+ },
+ DataType::Dictionary(_, _) => {
+ let values = left.as_any_dictionary().values();
+
+ match values.data_type() {
+ DataType::Utf8 => regexp_is_match_flag_scalar!(values, right,
StringArray, not_match, flag),
+ DataType::Utf8View => regexp_is_match_flag_scalar!(values,
right, StringViewArray, not_match, flag),
+ DataType::LargeUtf8 => regexp_is_match_flag_scalar!(values,
right, LargeStringArray, not_match, flag),
+ other => internal_err!(
+ "Data type {} not supported as a dictionary value type for
binary_string_array_flag_op_scalar operation 'regexp_is_match_scalar' on string
array",
+ other
+ ),
+ }.map(
+ // downcast_dictionary_array duplicates code per possible key
type, so we aim to do all prep work before
Review Comment:
eventually it would be nicer to preserve the dictionary encoding (but I
realize this PR doesn't change the unpacking pattern here)
##########
datafusion/physical-expr/src/expressions/binary/kernels.rs:
##########
@@ -164,3 +172,129 @@ pub fn concat_elements_utf8view(
}
Ok(result.finish())
}
+
+/// Invoke a compute kernel on a pair of binary data arrays with flags
+macro_rules! regexp_is_match_flag {
+ ($LEFT:expr, $RIGHT:expr, $ARRAYTYPE:ident, $NOT:expr, $FLAG:expr) => {{
+ let ll = $LEFT
+ .as_any()
+ .downcast_ref::<$ARRAYTYPE>()
+ .expect("failed to downcast array");
+ let rr = $RIGHT
+ .as_any()
+ .downcast_ref::<$ARRAYTYPE>()
+ .expect("failed to downcast array");
+
+ let flag = if $FLAG {
+ Some($ARRAYTYPE::from(vec!["i"; ll.len()]))
+ } else {
+ None
+ };
+ let mut array = regexp_is_match(ll, rr, flag.as_ref())?;
+ if $NOT {
+ array = not(&array).unwrap();
+ }
+ Ok(Arc::new(array))
+ }};
+}
+
+pub(crate) fn regex_match_dyn(
+ left: ArrayRef,
+ right: ArrayRef,
+ not_match: bool,
+ flag: bool,
+) -> Result<ArrayRef> {
+ match left.data_type() {
+ DataType::Utf8 => {
+ regexp_is_match_flag!(left, right, StringArray, not_match, flag)
+ },
+ DataType::Utf8View => {
+ regexp_is_match_flag!(left, right, StringViewArray, not_match,
flag)
+ }
+ DataType::LargeUtf8 => {
+ regexp_is_match_flag!(left, right, LargeStringArray, not_match,
flag)
+ },
+ other => internal_err!(
+ "Data type {} not supported for binary_string_array_flag_op
operation regexp_is_match on string array",
+ other
+ ),
+ }
+}
+
+/// Invoke a compute kernel on a data array and a scalar value with flag
+macro_rules! regexp_is_match_flag_scalar {
+ ($LEFT:expr, $RIGHT:expr, $ARRAYTYPE:ident, $NOT:expr, $FLAG:expr) => {{
+ let ll = $LEFT
+ .as_any()
+ .downcast_ref::<$ARRAYTYPE>()
+ .expect("failed to downcast array");
+
+ let string_value = match $RIGHT.try_as_str() {
+ Some(Some(string_value)) => string_value,
+ // null literal or non string
+ _ => {
+ return Some(internal_err!(
+ "failed to cast literal value {} for operation
'regexp_is_match_scalar'",
+ $RIGHT
+ ))
+ }
+ };
Review Comment:
Minor -- I think you can write this more succinctly withs something like
this (untested):
```rust
let Some(string_value) = match $RIGHT.try_as_str() else {
// null literal or non string
return Some(internal_err!(
"failed to cast literal value {} for operation
'regexp_is_match_scalar'",
$RIGHT
))
};
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]