alamb commented on code in PR #15504:
URL: https://github.com/apache/datafusion/pull/15504#discussion_r2023660049


##########
datafusion/physical-expr/src/aggregate.rs:
##########
@@ -97,6 +97,165 @@ impl AggregateExprBuilder {
     /// Constructs an `AggregateFunctionExpr` from the builder
     ///
     /// Note that an [`Self::alias`] must be provided before calling this 
method.
+    ///
+    /// # Example: Create an [`AggregateUDF`]
+    ///
+    /// In the following example, [`AggregateFunctionExpr`] will be built 
using [`AggregateExprBuilder`]
+    /// which provides a build function. Full example could be accessed from 
the source file.
+    ///
+    /// ```
+    /// # use std::any::Any;
+    /// # use std::sync::OnceLock;
+    /// # use std::sync::Arc;
+    /// # use arrow::datatypes::DataType;
+    /// # use datafusion_common::{DataFusionError, plan_err, Result, 
ScalarValue};
+    /// # use datafusion_expr::{col, ColumnarValue, Signature, Volatility, 
Expr, Documentation};
+    /// # use datafusion_expr::{AggregateUDFImpl, AggregateUDF, Accumulator, 
function::{AccumulatorArgs, StateFieldsArgs}};
+    /// # use datafusion_expr::window_doc_sections::DOC_SECTION_AGGREGATE;
+    /// # use arrow::datatypes::Schema;
+    /// # use arrow::datatypes::Field;
+    /// # use arrow::array::Array;
+    /// #
+    /// # #[derive(Debug)]
+    /// # struct FirstValueAccumulator {
+    /// #     value: Option<ScalarValue>,
+    /// #     data_type: DataType,
+    /// # }
+    /// #
+    /// # impl Accumulator for FirstValueAccumulator {
+    /// #     fn update_batch(&mut self, values: &[Arc<dyn Array>]) -> 
Result<()> {

Review Comment:
   I think we can significantly shorten the example by just using 
`unimplemented()!` instead of adding an actual implementation for an 
accumulator, and the methods.



##########
datafusion/physical-expr/src/aggregate.rs:
##########
@@ -97,6 +97,165 @@ impl AggregateExprBuilder {
     /// Constructs an `AggregateFunctionExpr` from the builder
     ///
     /// Note that an [`Self::alias`] must be provided before calling this 
method.
+    ///
+    /// # Example: Create an [`AggregateUDF`]
+    ///
+    /// In the following example, [`AggregateFunctionExpr`] will be built 
using [`AggregateExprBuilder`]
+    /// which provides a build function. Full example could be accessed from 
the source file.
+    ///
+    /// ```
+    /// # use std::any::Any;
+    /// # use std::sync::OnceLock;
+    /// # use std::sync::Arc;
+    /// # use arrow::datatypes::DataType;
+    /// # use datafusion_common::{DataFusionError, plan_err, Result, 
ScalarValue};
+    /// # use datafusion_expr::{col, ColumnarValue, Signature, Volatility, 
Expr, Documentation};
+    /// # use datafusion_expr::{AggregateUDFImpl, AggregateUDF, Accumulator, 
function::{AccumulatorArgs, StateFieldsArgs}};
+    /// # use datafusion_expr::window_doc_sections::DOC_SECTION_AGGREGATE;
+    /// # use arrow::datatypes::Schema;
+    /// # use arrow::datatypes::Field;
+    /// # use arrow::array::Array;
+    /// #
+    /// # #[derive(Debug)]
+    /// # struct FirstValueAccumulator {
+    /// #     value: Option<ScalarValue>,
+    /// #     data_type: DataType,
+    /// # }
+    /// #
+    /// # impl Accumulator for FirstValueAccumulator {
+    /// #     fn update_batch(&mut self, values: &[Arc<dyn Array>]) -> 
Result<()> {
+    /// #         if self.value.is_none() && !values.is_empty() {
+    /// #             let first_array = &values[0];
+    /// #             for i in 0..first_array.len() {
+    /// #                 if !first_array.is_null(i) {
+    /// #                     self.value = 
Some(ScalarValue::try_from_array(first_array, i)?);
+    /// #                     break;
+    /// #                 }
+    /// #             }
+    /// #         }
+    /// #         Ok(())
+    /// #     }
+    /// #
+    /// #     fn merge_batch(&mut self, states: &[Arc<dyn Array>]) -> 
Result<()> {
+    /// #         if self.value.is_none() && !states.is_empty() {
+    /// #             let first_array = &states[0];
+    /// #             for i in 0..first_array.len() {
+    /// #                 if !first_array.is_null(i) {
+    /// #                     self.value = 
Some(ScalarValue::try_from_array(first_array, i)?);
+    /// #                     break;
+    /// #                 }
+    /// #             }
+    /// #         }
+    /// #         Ok(())
+    /// #     }
+    /// #
+    /// #     fn evaluate(&mut self) -> Result<ScalarValue> {
+    /// #         match &self.value {
+    /// #             Some(value) => Ok(value.clone()),
+    /// #             None => ScalarValue::try_from(&self.data_type),
+    /// #         }
+    /// #     }
+    /// #
+    /// #     fn size(&self) -> usize {
+    /// #         std::mem::size_of_val(self)
+    /// #     }
+    /// #
+    /// #     fn state(&mut self) -> Result<Vec<ScalarValue>> {
+    /// #         match &self.value {
+    /// #             Some(value) => Ok(vec![value.clone()]),
+    /// #             None => ScalarValue::try_from(&self.data_type).map(|v| 
vec![v]),
+    /// #         }
+    /// #     }
+    /// # }
+    /// #
+    /// # #[derive(Debug, Clone)]
+    /// # struct FirstValueUdf {
+    /// #     signature: Signature,
+    /// # }
+    /// #
+    /// # impl FirstValueUdf {
+    /// #     fn new() -> Self {
+    /// #         Self {
+    /// #             signature: Signature::any(1, Volatility::Immutable),
+    /// #         }
+    /// #     }
+    /// # }
+    /// #
+    /// # static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
+    /// #
+    /// # fn get_doc() -> &'static Documentation {
+    /// #     DOCUMENTATION.get_or_init(|| {
+    /// #         Documentation::builder(
+    /// #             DOC_SECTION_AGGREGATE,
+    /// #             "returns the first value in a set of values",
+    /// #             "first_value(column)"
+    /// #         )
+    /// #         .with_argument("arg1", "The column to get the first value 
from")
+    /// #         .build()
+    /// #     })
+    /// # }
+    /// #
+    /// # impl AggregateUDFImpl for FirstValueUdf {
+    /// #     fn as_any(&self) -> &dyn Any { self }
+    /// #     fn name(&self) -> &str { "first_value" }
+    /// #     fn signature(&self) -> &Signature { &self.signature }
+    /// #     fn return_type(&self, args: &[DataType]) -> Result<DataType> {
+    /// #         Ok(args[0].clone())
+    /// #     }
+    /// #     
+    /// #     fn accumulator(&self, acc_args: AccumulatorArgs) -> 
Result<Box<dyn Accumulator>> {
+    /// #             let input_type = 
acc_args.schema.field(0).data_type().clone();
+    /// #             
+    /// #             Ok(Box::new(FirstValueAccumulator {
+    /// #                 value: None,
+    /// #                 data_type: input_type,
+    /// #             }))
+    /// #         }

Review Comment:
   If you did this (and similar things to the rest of the PR) you can avoid 
most of the rest of the copy/paste in the example above
   
   ```suggestion
       /// #     fn accumulator(&self, acc_args: AccumulatorArgs) -> 
Result<Box<dyn Accumulator>> {
       /// #            unimplemented!()
       /// #         }
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org
For additional commands, e-mail: github-h...@datafusion.apache.org

Reply via email to