Jefffrey commented on code in PR #17195:
URL: https://github.com/apache/datafusion/pull/17195#discussion_r2348023637
##########
datafusion/functions/src/regex/regexpreplace.rs:
##########
@@ -590,44 +613,109 @@ pub fn specialize_regexp_replace<T: OffsetSizeTrait>(
.map(|arg| arg.to_array(inferred_length))
.collect::<Result<Vec<_>>>()?;
- match args[0].data_type() {
- DataType::Utf8View => {
- let string_array = args[0].as_string_view();
- let pattern_array = args[1].as_string::<i32>();
- let replacement_array = args[2].as_string::<i32>();
- regexp_replace::<i32, _, _>(
- string_array,
- pattern_array,
- replacement_array,
- args.get(3),
- )
- }
- DataType::Utf8 => {
- let string_array = args[0].as_string::<i32>();
- let pattern_array = args[1].as_string::<i32>();
- let replacement_array = args[2].as_string::<i32>();
- regexp_replace::<i32, _, _>(
- string_array,
- pattern_array,
- replacement_array,
- args.get(3),
- )
- }
- DataType::LargeUtf8 => {
- let string_array = args[0].as_string::<i64>();
- let pattern_array = args[1].as_string::<i64>();
- let replacement_array = args[2].as_string::<i64>();
- regexp_replace::<i64, _, _>(
- string_array,
- pattern_array,
- replacement_array,
- args.get(3),
- )
+ if args.get(3).is_none() {
Review Comment:
We could fold the if into the match itself like so:
```rust
match (
args[0].data_type(),
args[1].data_type(),
args[2].data_type(),
args.get(3).map(|a| a.data_type()),
) {
(
DataType::Utf8,
DataType::Utf8,
DataType::Utf8,
Some(DataType::Utf8) | None,
) => {
let string_array = args[0].as_string::<i32>();
let pattern_array = args[1].as_string::<i32>();
let replacement_array = args[2].as_string::<i32>();
let flags_array = args.get(3).map(|a| a.as_string::<i32>());
regexp_replace::<i32, _>(
string_array,
pattern_array,
replacement_array,
flags_array,
)
}
(
DataType::Utf8View,
DataType::Utf8View,
DataType::Utf8View,
Some(DataType::Utf8View) | None,
) => {
let string_array = args[0].as_string_view();
let pattern_array = args[1].as_string_view();
let replacement_array = args[2].as_string_view();
let flags_array = args.get(3).map(|a| a.as_string_view());
regexp_replace::<i32, _>(
string_array,
pattern_array,
replacement_array,
flags_array,
)
}
(
DataType::LargeUtf8,
DataType::LargeUtf8,
DataType::LargeUtf8,
Some(DataType::LargeUtf8) | None,
) => {
let string_array = args[0].as_string::<i64>();
let pattern_array = args[1].as_string::<i64>();
let replacement_array = args[2].as_string::<i64>();
let flags_array = args.get(3).map(|a| a.as_string::<i64>());
regexp_replace::<i64, _>(
string_array,
pattern_array,
replacement_array,
flags_array,
)
}
other => {
exec_err!(
"Unsupported data type {other:?} for function regex_replace"
)
}
```
Removes the duplication across the if arms
##########
datafusion/functions/src/regex/regexpreplace.rs:
##########
@@ -94,14 +98,30 @@ impl Default for RegexpReplaceFunc {
impl RegexpReplaceFunc {
pub fn new() -> Self {
- use DataType::*;
+ use TypeSignature::*;
+ use TypeSignatureClass::*;
Self {
signature: Signature::one_of(
vec![
- TypeSignature::Exact(vec![Utf8, Utf8, Utf8]),
Review Comment:
I think this is an option too?
```rust
Uniform(3, vec![Utf8View, LargeUtf8, Utf8]),
Uniform(4, vec![Utf8View, LargeUtf8, Utf8]),
```
I tested it on a checked out copy of this PR and the slt tests passed
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]