Dandandan commented on code in PR #12329:
URL: https://github.com/apache/datafusion/pull/12329#discussion_r1754591761
##########
datafusion/functions-nested/src/extract.rs:
##########
@@ -687,3 +695,115 @@ where
);
general_array_slice::<O>(array, &from_array, &to_array, None)
}
+
+#[derive(Debug)]
+pub(super) struct ArrayAnyValue {
+ signature: Signature,
+ aliases: Vec<String>,
+}
+
+impl ArrayAnyValue {
+ pub fn new() -> Self {
+ Self {
+ signature: Signature::array(Volatility::Immutable),
+ aliases: vec![String::from("list_any_value")],
+ }
+ }
+}
+
+impl ScalarUDFImpl for ArrayAnyValue {
+ fn as_any(&self) -> &dyn Any {
+ self
+ }
+ fn name(&self) -> &str {
+ "array_any_value"
+ }
+ fn signature(&self) -> &Signature {
+ &self.signature
+ }
+ fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
+ match &arg_types[0] {
+ List(field)
+ | LargeList(field)
+ | FixedSizeList(field, _) => Ok(field.data_type().clone()),
+ _ => plan_err!(
+ "array_any_value can only accept List, LargeList or
FixedSizeList as the argument"
+ ),
+ }
+ }
+
+ fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
+ make_scalar_function(array_any_value_inner)(args)
+ }
+ fn aliases(&self) -> &[String] {
+ &self.aliases
+ }
+}
+
+fn array_any_value_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
+ if args.len() != 1 {
+ return exec_err!("array_any_value expects one argument");
+ }
+
+ match &args[0].data_type() {
+ List(_) => {
+ let array = as_list_array(&args[0])?;
+ general_array_any_value::<i32>(array)
+ }
+ LargeList(_) => {
+ let array = as_large_list_array(&args[0])?;
+ general_array_any_value::<i64>(array)
+ }
+ data_type => exec_err!("array_any_value does not support type: {:?}",
data_type),
+ }
+}
+
+fn general_array_any_value<O: OffsetSizeTrait>(
+ array: &GenericListArray<O>,
+) -> Result<ArrayRef>
+where
+ i64: TryInto<O>,
+{
+ let values = array.values();
+ let original_data = values.to_data();
+ let capacity = Capacities::Array(array.len());
+
+ let mut mutable =
+ MutableArrayData::with_capacities(vec![&original_data], true,
capacity);
+
+ for (row_index, offset_window) in array.offsets().windows(2).enumerate() {
+ let start = offset_window[0];
+ let end = offset_window[1];
+ let len = end - start;
+
+ // array is null
+ if len == O::usize_as(0) {
+ mutable.extend_nulls(1);
+ continue;
+ }
+
+ let row_value = array.value(row_index);
+ match row_value.nulls() {
+ Some(row_nulls_buffer) => {
+ // nulls are present in the array so try to take the first
valid element
+ if let Some(first_non_null_index) =
Review Comment:
I think based on `valid_indices` it should the first valid index, no?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]