alamb commented on code in PR #15504: URL: https://github.com/apache/datafusion/pull/15504#discussion_r2023660049
########## datafusion/physical-expr/src/aggregate.rs: ########## @@ -97,6 +97,165 @@ impl AggregateExprBuilder { /// Constructs an `AggregateFunctionExpr` from the builder /// /// Note that an [`Self::alias`] must be provided before calling this method. + /// + /// # Example: Create an [`AggregateUDF`] + /// + /// In the following example, [`AggregateFunctionExpr`] will be built using [`AggregateExprBuilder`] + /// which provides a build function. Full example could be accessed from the source file. + /// + /// ``` + /// # use std::any::Any; + /// # use std::sync::OnceLock; + /// # use std::sync::Arc; + /// # use arrow::datatypes::DataType; + /// # use datafusion_common::{DataFusionError, plan_err, Result, ScalarValue}; + /// # use datafusion_expr::{col, ColumnarValue, Signature, Volatility, Expr, Documentation}; + /// # use datafusion_expr::{AggregateUDFImpl, AggregateUDF, Accumulator, function::{AccumulatorArgs, StateFieldsArgs}}; + /// # use datafusion_expr::window_doc_sections::DOC_SECTION_AGGREGATE; + /// # use arrow::datatypes::Schema; + /// # use arrow::datatypes::Field; + /// # use arrow::array::Array; + /// # + /// # #[derive(Debug)] + /// # struct FirstValueAccumulator { + /// # value: Option<ScalarValue>, + /// # data_type: DataType, + /// # } + /// # + /// # impl Accumulator for FirstValueAccumulator { + /// # fn update_batch(&mut self, values: &[Arc<dyn Array>]) -> Result<()> { Review Comment: I think we can significantly shorten the example by just using `unimplemented()!` instead of adding an actual implementation for an accumulator, and the methods. ########## datafusion/physical-expr/src/aggregate.rs: ########## @@ -97,6 +97,165 @@ impl AggregateExprBuilder { /// Constructs an `AggregateFunctionExpr` from the builder /// /// Note that an [`Self::alias`] must be provided before calling this method. + /// + /// # Example: Create an [`AggregateUDF`] + /// + /// In the following example, [`AggregateFunctionExpr`] will be built using [`AggregateExprBuilder`] + /// which provides a build function. Full example could be accessed from the source file. + /// + /// ``` + /// # use std::any::Any; + /// # use std::sync::OnceLock; + /// # use std::sync::Arc; + /// # use arrow::datatypes::DataType; + /// # use datafusion_common::{DataFusionError, plan_err, Result, ScalarValue}; + /// # use datafusion_expr::{col, ColumnarValue, Signature, Volatility, Expr, Documentation}; + /// # use datafusion_expr::{AggregateUDFImpl, AggregateUDF, Accumulator, function::{AccumulatorArgs, StateFieldsArgs}}; + /// # use datafusion_expr::window_doc_sections::DOC_SECTION_AGGREGATE; + /// # use arrow::datatypes::Schema; + /// # use arrow::datatypes::Field; + /// # use arrow::array::Array; + /// # + /// # #[derive(Debug)] + /// # struct FirstValueAccumulator { + /// # value: Option<ScalarValue>, + /// # data_type: DataType, + /// # } + /// # + /// # impl Accumulator for FirstValueAccumulator { + /// # fn update_batch(&mut self, values: &[Arc<dyn Array>]) -> Result<()> { + /// # if self.value.is_none() && !values.is_empty() { + /// # let first_array = &values[0]; + /// # for i in 0..first_array.len() { + /// # if !first_array.is_null(i) { + /// # self.value = Some(ScalarValue::try_from_array(first_array, i)?); + /// # break; + /// # } + /// # } + /// # } + /// # Ok(()) + /// # } + /// # + /// # fn merge_batch(&mut self, states: &[Arc<dyn Array>]) -> Result<()> { + /// # if self.value.is_none() && !states.is_empty() { + /// # let first_array = &states[0]; + /// # for i in 0..first_array.len() { + /// # if !first_array.is_null(i) { + /// # self.value = Some(ScalarValue::try_from_array(first_array, i)?); + /// # break; + /// # } + /// # } + /// # } + /// # Ok(()) + /// # } + /// # + /// # fn evaluate(&mut self) -> Result<ScalarValue> { + /// # match &self.value { + /// # Some(value) => Ok(value.clone()), + /// # None => ScalarValue::try_from(&self.data_type), + /// # } + /// # } + /// # + /// # fn size(&self) -> usize { + /// # std::mem::size_of_val(self) + /// # } + /// # + /// # fn state(&mut self) -> Result<Vec<ScalarValue>> { + /// # match &self.value { + /// # Some(value) => Ok(vec![value.clone()]), + /// # None => ScalarValue::try_from(&self.data_type).map(|v| vec![v]), + /// # } + /// # } + /// # } + /// # + /// # #[derive(Debug, Clone)] + /// # struct FirstValueUdf { + /// # signature: Signature, + /// # } + /// # + /// # impl FirstValueUdf { + /// # fn new() -> Self { + /// # Self { + /// # signature: Signature::any(1, Volatility::Immutable), + /// # } + /// # } + /// # } + /// # + /// # static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new(); + /// # + /// # fn get_doc() -> &'static Documentation { + /// # DOCUMENTATION.get_or_init(|| { + /// # Documentation::builder( + /// # DOC_SECTION_AGGREGATE, + /// # "returns the first value in a set of values", + /// # "first_value(column)" + /// # ) + /// # .with_argument("arg1", "The column to get the first value from") + /// # .build() + /// # }) + /// # } + /// # + /// # impl AggregateUDFImpl for FirstValueUdf { + /// # fn as_any(&self) -> &dyn Any { self } + /// # fn name(&self) -> &str { "first_value" } + /// # fn signature(&self) -> &Signature { &self.signature } + /// # fn return_type(&self, args: &[DataType]) -> Result<DataType> { + /// # Ok(args[0].clone()) + /// # } + /// # + /// # fn accumulator(&self, acc_args: AccumulatorArgs) -> Result<Box<dyn Accumulator>> { + /// # let input_type = acc_args.schema.field(0).data_type().clone(); + /// # + /// # Ok(Box::new(FirstValueAccumulator { + /// # value: None, + /// # data_type: input_type, + /// # })) + /// # } Review Comment: If you did this (and similar things to the rest of the PR) you can avoid most of the rest of the copy/paste in the example above ```suggestion /// # fn accumulator(&self, acc_args: AccumulatorArgs) -> Result<Box<dyn Accumulator>> { /// # unimplemented!() /// # } ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For additional commands, e-mail: github-h...@datafusion.apache.org