joroKr21 commented on code in PR #11392:
URL: https://github.com/apache/datafusion/pull/11392#discussion_r1674371540


##########
datafusion/core/tests/user_defined/user_defined_scalar_functions.rs:
##########
@@ -1021,6 +1028,121 @@ async fn 
create_scalar_function_from_sql_statement_postgres_syntax() -> Result<(
     Ok(())
 }
 
+#[derive(Debug)]
+struct MyRegexUdf {
+    signature: Signature,
+    regex: Regex,
+}
+
+impl MyRegexUdf {
+    fn new(pattern: &str) -> Self {
+        Self {
+            signature: Signature::exact(vec![DataType::Utf8], 
Volatility::Immutable),
+            regex: Regex::new(pattern).expect("regex"),
+        }
+    }
+
+    fn matches(&self, value: Option<&str>) -> Option<bool> {
+        Some(self.regex.is_match(value?))
+    }
+}
+
+impl ScalarUDFImpl for MyRegexUdf {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn name(&self) -> &str {
+        "regex_udf"
+    }
+
+    fn signature(&self) -> &Signature {
+        &self.signature
+    }
+
+    fn return_type(&self, args: &[DataType]) -> Result<DataType> {
+        if matches!(args, [DataType::Utf8]) {
+            Ok(DataType::Boolean)
+        } else {
+            plan_err!("regex_udf only accepts a Utf8 argument")
+        }
+    }
+
+    fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
+        match args {
+            [ColumnarValue::Scalar(ScalarValue::Utf8(value))] => {
+                Ok(ColumnarValue::Scalar(ScalarValue::Boolean(
+                    self.matches(value.as_deref()),
+                )))
+            }
+            [ColumnarValue::Array(values)] => {
+                let mut builder = BooleanBuilder::with_capacity(values.len());
+                for value in values.as_string::<i32>() {
+                    builder.append_option(self.matches(value))
+                }
+                Ok(ColumnarValue::Array(Arc::new(builder.finish())))
+            }
+            _ => exec_err!("regex_udf only accepts a Utf8 arguments"),
+        }
+    }
+
+    fn equals(&self, other: &dyn ScalarUDFImpl) -> bool {
+        if let Some(other) = other.as_any().downcast_ref::<MyRegexUdf>() {
+            self.regex.as_str() == other.regex.as_str()
+        } else {
+            false
+        }
+    }
+
+    fn hash_value(&self) -> u64 {
+        let hasher = &mut DefaultHasher::new();
+        self.regex.as_str().hash(hasher);
+        hasher.finish()
+    }
+}
+
+#[tokio::test]
+async fn test_parameterized_scalar_udf() -> Result<()> {
+    let batch = RecordBatch::try_from_iter([(
+        "text",
+        Arc::new(StringArray::from(vec!["foo", "bar", "foobar", "barfoo"])) as 
ArrayRef,
+    )])?;
+
+    let ctx = SessionContext::new();
+    ctx.register_batch("t", batch)?;
+    let t = ctx.table("t").await?;
+    let foo_udf = ScalarUDF::from(MyRegexUdf::new("fo{2}"));
+    let bar_udf = ScalarUDF::from(MyRegexUdf::new("[Bb]ar"));
+
+    let plan = LogicalPlanBuilder::from(t.into_optimized_plan()?)
+        .filter(
+            foo_udf
+                .call(vec![col("text")])
+                .and(bar_udf.call(vec![col("text")])),
+        )?
+        .filter(col("text").is_not_null())?
+        .build()?;
+
+    assert_eq!(
+        format!("{plan:?}"),
+        "Filter: t.text IS NOT NULL\n  Filter: regex_udf(t.text) AND 
regex_udf(t.text)\n    TableScan: t projection=[text]"

Review Comment:
   This particular case is deduplicated in `PushDownFilter`:
   ```rust
               LogicalPlan::Filter(child_filter) => {
                   let parents_predicates = 
split_conjunction_owned(filter.predicate);
   
                   // remove duplicated filters
                   let child_predicates = 
split_conjunction_owned(child_filter.predicate);
                   let new_predicates = parents_predicates
                       .into_iter()
                       .chain(child_predicates)
                       // use IndexSet to remove dupes while preserving 
predicate order
                       .collect::<IndexSet<_>>()
                       .into_iter()
                       .collect::<Vec<_>>();
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to