alamb commented on code in PR #16333:
URL: https://github.com/apache/datafusion/pull/16333#discussion_r2135784803


##########
datafusion/functions-nested/src/array_has.rs:
##########
@@ -232,98 +236,244 @@ fn array_has_inner_for_array(haystack: &ArrayRef, 
needle: &ArrayRef) -> Result<A
     match haystack.data_type() {
         DataType::List(_) => array_has_dispatch_for_array::<i32>(haystack, 
needle),
         DataType::LargeList(_) => 
array_has_dispatch_for_array::<i64>(haystack, needle),
+        DataType::FixedSizeList(_, _) => {
+            array_has_dispatch_for_fixed_size_list_array(haystack, needle)
+        }
         _ => exec_err!(
             "array_has does not support type '{:?}'.",
             haystack.data_type()
         ),
     }
 }
 
+macro_rules! array_has_dispatch_for_array {
+    ($haystack:expr, $needle:expr) => {{
+        let mut boolean_builder = BooleanArray::builder($haystack.len());
+
+        for (i, arr) in $haystack.iter().enumerate() {
+            if arr.is_none() || $needle.is_null(i) {
+                boolean_builder.append_null();
+                continue;
+            }
+            let arr = arr.unwrap();
+            let is_nested = arr.data_type().is_nested();
+            let needle_row = Scalar::new($needle.slice(i, 1));
+            let eq_array = compare_with_eq(&arr, &needle_row, is_nested)?;
+            boolean_builder.append_value(eq_array.true_count() > 0);
+        }
+
+        Ok(Arc::new(boolean_builder.finish()))
+    }};
+}
+
+fn array_has_dispatch_for_fixed_size_list_array(
+    haystack: &ArrayRef,
+    needle: &ArrayRef,
+) -> Result<ArrayRef> {
+    let haystack = as_fixed_size_list_array(haystack);
+    array_has_dispatch_for_array!(haystack, needle)
+}
+
 fn array_has_dispatch_for_array<O: OffsetSizeTrait>(
     haystack: &ArrayRef,
     needle: &ArrayRef,
 ) -> Result<ArrayRef> {
     let haystack = as_generic_list_array::<O>(haystack)?;
-    let mut boolean_builder = BooleanArray::builder(haystack.len());
+    array_has_dispatch_for_array!(haystack, needle)
+}
 
-    for (i, arr) in haystack.iter().enumerate() {
-        if arr.is_none() || needle.is_null(i) {
-            boolean_builder.append_null();
-            continue;
+macro_rules! array_has_dispatch_for_scalar {
+    ($haystack:expr, $offsets:expr, $needle:expr) => {{
+        let values = $haystack.values();
+        let is_nested = values.data_type().is_nested();
+        // If first argument is empty list (second argument is non-null), 
return false
+        // i.e. array_has([], non-null element) -> false
+        if values.is_empty() {
+            return Ok(Arc::new(BooleanArray::new(
+                BooleanBuffer::new_unset($haystack.len()),
+                None,
+            )));
+        }
+        let eq_array = compare_with_eq(values, $needle, is_nested)?;
+        let mut final_contained = vec![None; $haystack.len()];
+        for (i, offset) in $offsets.windows(2).enumerate() {
+            let start = offset[0].to_usize().unwrap();
+            let end = offset[1].to_usize().unwrap();
+            let length = end - start;
+            // For non-nested list, length is 0 for null
+            if length == 0 {
+                continue;
+            }
+            let sliced_array = eq_array.slice(start, length);
+            final_contained[i] = Some(sliced_array.true_count() > 0);
         }
-        let arr = arr.unwrap();
-        let is_nested = arr.data_type().is_nested();
-        let needle_row = Scalar::new(needle.slice(i, 1));
-        let eq_array = compare_with_eq(&arr, &needle_row, is_nested)?;
-        boolean_builder.append_value(eq_array.true_count() > 0);
-    }
 
-    Ok(Arc::new(boolean_builder.finish()))
+        Ok(Arc::new(BooleanArray::from(final_contained)))
+    }};
+}
+
+fn array_has_dispatch_for_fixed_size_list_scalar(
+    haystack: &ArrayRef,
+    needle: &dyn Datum,
+) -> Result<ArrayRef> {
+    let haystack = as_fixed_size_list_array(haystack);
+    let value_length = haystack.value_length() as usize;
+    let offsets = (0_i32..haystack.len() as i32)
+        .step_by(value_length)
+        .collect::<Vec<_>>();
+    array_has_dispatch_for_scalar!(haystack, offsets, needle)
 }
 
 fn array_has_dispatch_for_scalar<O: OffsetSizeTrait>(
     haystack: &ArrayRef,
     needle: &dyn Datum,
 ) -> Result<ArrayRef> {
     let haystack = as_generic_list_array::<O>(haystack)?;
-    let values = haystack.values();
-    let is_nested = values.data_type().is_nested();
     let offsets = haystack.value_offsets();
-    // If first argument is empty list (second argument is non-null), return 
false
-    // i.e. array_has([], non-null element) -> false
-    if values.is_empty() {
-        return Ok(Arc::new(BooleanArray::new(
-            BooleanBuffer::new_unset(haystack.len()),
-            None,
-        )));
-    }
-    let eq_array = compare_with_eq(values, needle, is_nested)?;
-    let mut final_contained = vec![None; haystack.len()];
-    for (i, offset) in offsets.windows(2).enumerate() {
-        let start = offset[0].to_usize().unwrap();
-        let end = offset[1].to_usize().unwrap();
-        let length = end - start;
-        // For non-nested list, length is 0 for null
-        if length == 0 {
-            continue;
+    array_has_dispatch_for_scalar!(haystack, offsets, needle)
+}
+
+fn array_has_all_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
+    array_has_all_and_any_inner(args, ComparisonType::All)
+}
+
+// General row comparison for array_has_all and array_has_any
+macro_rules! general_array_has_for_all_and_any {
+    ($haystack:expr, $needle:expr, $comparison_type:expr) => {{
+        let mut boolean_builder = BooleanArray::builder($haystack.len());
+        let converter = 
RowConverter::new(vec![SortField::new($haystack.value_type())])?;
+
+        for (arr, sub_arr) in $haystack.iter().zip($needle.iter()) {
+            if let (Some(arr), Some(sub_arr)) = (arr, sub_arr) {
+                let arr_values = converter.convert_columns(&[arr])?;
+                let sub_arr_values = converter.convert_columns(&[sub_arr])?;
+                
boolean_builder.append_value(general_array_has_all_and_any_kernel(
+                    arr_values,
+                    sub_arr_values,
+                    $comparison_type,
+                ));
+            } else {
+                boolean_builder.append_null();
+            }
         }
-        let sliced_array = eq_array.slice(start, length);
-        final_contained[i] = Some(sliced_array.true_count() > 0);
-    }
 
-    Ok(Arc::new(BooleanArray::from(final_contained)))
+        Ok(Arc::new(boolean_builder.finish()))
+    }};
 }
 
-fn array_has_all_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
-    match args[0].data_type() {
-        DataType::List(_) => {
-            array_has_all_and_any_dispatch::<i32>(&args[0], &args[1], 
ComparisonType::All)
+// String comparison for array_has_all and array_has_any
+macro_rules! array_has_all_and_any_string_internal {
+    ($haystack:expr, $needle:expr, $comparison_type:expr) => {{
+        let mut boolean_builder = BooleanArray::builder($haystack.len());
+        for (arr, sub_arr) in $haystack.iter().zip($needle.iter()) {
+            match (arr, sub_arr) {
+                (Some(arr), Some(sub_arr)) => {
+                    let haystack_array = string_array_to_vec(&arr);
+                    let needle_array = string_array_to_vec(&sub_arr);
+                    boolean_builder.append_value(array_has_string_kernel(
+                        haystack_array,
+                        needle_array,
+                        $comparison_type,
+                    ));
+                }
+                (_, _) => {
+                    boolean_builder.append_null();
+                }
+            }
         }
-        DataType::LargeList(_) => {
-            array_has_all_and_any_dispatch::<i64>(&args[0], &args[1], 
ComparisonType::All)
+
+        Ok(Arc::new(boolean_builder.finish()))
+    }};
+}
+
+macro_rules! array_has_all_and_any_dispatch {
+    ($haystack:expr, $needle:expr, $comparison_type:expr) => {
+        if $needle.values().is_empty() {
+            let buffer = match $comparison_type {
+                ComparisonType::All => BooleanBuffer::new_set($haystack.len()),
+                ComparisonType::Any => 
BooleanBuffer::new_unset($haystack.len()),
+            };
+            Ok(Arc::new(BooleanArray::from(buffer)))
+        } else {
+            match $needle.data_type() {
+                DataType::Utf8 | DataType::LargeUtf8 | DataType::Utf8View => {
+                    array_has_all_and_any_string_internal!(
+                        $haystack,
+                        $needle,
+                        $comparison_type
+                    )
+                }
+                _ => general_array_has_for_all_and_any!(
+                    $haystack,
+                    $needle,
+                    $comparison_type
+                ),
+            }
         }
-        _ => exec_err!(
-            "array_has does not support type '{:?}'.",
-            args[0].data_type()
-        ),
-    }
+    };
 }
 
-fn array_has_any_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
-    match args[0].data_type() {
-        DataType::List(_) => {
-            array_has_all_and_any_dispatch::<i32>(&args[0], &args[1], 
ComparisonType::Any)
+fn array_has_all_and_any_inner(
+    args: &[ArrayRef],
+    comparison_type: ComparisonType,
+) -> Result<ArrayRef> {
+    match (args[0].data_type(), args[1].data_type()) {

Review Comment:
   This will generate quite a few different copies of the code (code bloat) -- 
and I am not sure how important it is to create a specialized version of each 
type of comparison
   
   Would it be possible instead to update the coercion logic so the argments 
are always cast to the same type  (it might already be this way, I didn't 
double check) before `array_has_all` is called?



##########
datafusion/functions-nested/src/array_has.rs:
##########
@@ -232,98 +236,244 @@ fn array_has_inner_for_array(haystack: &ArrayRef, 
needle: &ArrayRef) -> Result<A
     match haystack.data_type() {
         DataType::List(_) => array_has_dispatch_for_array::<i32>(haystack, 
needle),
         DataType::LargeList(_) => 
array_has_dispatch_for_array::<i64>(haystack, needle),
+        DataType::FixedSizeList(_, _) => {
+            array_has_dispatch_for_fixed_size_list_array(haystack, needle)
+        }
         _ => exec_err!(
             "array_has does not support type '{:?}'.",
             haystack.data_type()
         ),
     }
 }
 
+macro_rules! array_has_dispatch_for_array {

Review Comment:
   I find macros a bit harder to deal with than normal functions
   
   Can you please add some documentation to these macros explaining what they 
do (and in particular, what the expectations on `$haystack` and `$needle` are)? 
Specifically, they look like they are supposed to be an array and scalar 
perhaps?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org
For additional commands, e-mail: github-h...@datafusion.apache.org

Reply via email to