This is an automated email from the ASF dual-hosted git repository.
github-bot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 03d17e8473 Improve documentation for `AggregateUdfImpl::simplify` and
`WindowUDFImpl::simplify` (#20712)
03d17e8473 is described below
commit 03d17e84737625df743f5a4755bd6583f4d75119
Author: Andrew Lamb <[email protected]>
AuthorDate: Thu Mar 5 16:14:15 2026 -0500
Improve documentation for `AggregateUdfImpl::simplify` and
`WindowUDFImpl::simplify` (#20712)
## Rationale for this change
While working on https://github.com/apache/datafusion/pull/20665 I was
somewhat confused about how the AggregateUDF simplify worked. So I added
some more clarifying comments
## What changes are included in this PR?
1. Improve documentation strings
## Are these changes tested?
Yes by CI
## Are there any user-facing changes?
Better API docs
---------
Co-authored-by: Yongting You <[email protected]>
---
datafusion/expr/src/expr.rs | 4 ++--
datafusion/expr/src/function.rs | 40 ++++++++++++++++++----------------------
datafusion/expr/src/udaf.rs | 36 ++++++++++++++++++++++--------------
datafusion/expr/src/udf.rs | 2 +-
datafusion/expr/src/udwf.rs | 27 +++++++++++++++------------
5 files changed, 58 insertions(+), 51 deletions(-)
diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs
index ef0187a878..5c6acd480e 100644
--- a/datafusion/expr/src/expr.rs
+++ b/datafusion/expr/src/expr.rs
@@ -1010,7 +1010,7 @@ impl WindowFunctionDefinition {
}
}
- /// Return the inner window simplification function, if any
+ /// Returns this window function's simplification hook, if any.
///
/// See [`WindowFunctionSimplification`] for more information
pub fn simplify(&self) -> Option<WindowFunctionSimplification> {
@@ -1097,7 +1097,7 @@ impl WindowFunction {
}
}
- /// Return the inner window simplification function, if any
+ /// Returns this window function's simplification hook, if any.
///
/// See [`WindowFunctionSimplification`] for more information
pub fn simplify(&self) -> Option<WindowFunctionSimplification> {
diff --git a/datafusion/expr/src/function.rs b/datafusion/expr/src/function.rs
index 68d2c90732..68865cbe1c 100644
--- a/datafusion/expr/src/function.rs
+++ b/datafusion/expr/src/function.rs
@@ -27,6 +27,8 @@ pub use datafusion_functions_aggregate_common::accumulator::{
AccumulatorArgs, AccumulatorFactoryFunction, StateFieldsArgs,
};
+use crate::expr::{AggregateFunction, WindowFunction};
+use crate::simplify::SimplifyContext;
pub use datafusion_functions_window_common::expr::ExpressionArgs;
pub use datafusion_functions_window_common::field::WindowUDFFieldArgs;
pub use datafusion_functions_window_common::partition::PartitionEvaluatorArgs;
@@ -64,28 +66,22 @@ pub type PartitionEvaluatorFactory =
pub type StateTypeFunction =
Arc<dyn Fn(&DataType) -> Result<Arc<Vec<DataType>>> + Send + Sync>;
-/// [crate::udaf::AggregateUDFImpl::simplify] simplifier closure
-/// A closure with two arguments:
-/// * 'aggregate_function': [crate::expr::AggregateFunction] for which
simplified has been invoked
-/// * 'info': [crate::simplify::SimplifyContext]
+/// Type alias for [crate::udaf::AggregateUDFImpl::simplify].
///
-/// Closure returns simplified [Expr] or an error.
-pub type AggregateFunctionSimplification = Box<
- dyn Fn(
- crate::expr::AggregateFunction,
- &crate::simplify::SimplifyContext,
- ) -> Result<Expr>,
->;
+/// This closure is invoked with:
+/// * `aggregate_function`: [AggregateFunction] with already simplified
arguments
+/// * `info`: [SimplifyContext]
+///
+/// It returns a simplified [Expr] or an error.
+pub type AggregateFunctionSimplification =
+ Box<dyn Fn(AggregateFunction, &SimplifyContext) -> Result<Expr>>;
-/// [crate::udwf::WindowUDFImpl::simplify] simplifier closure
-/// A closure with two arguments:
-/// * 'window_function': [crate::expr::WindowFunction] for which simplified
has been invoked
-/// * 'info': [crate::simplify::SimplifyContext]
+/// Type alias for [crate::udwf::WindowUDFImpl::simplify].
+///
+/// This closure is invoked with:
+/// * `window_function`: [WindowFunction] with already simplified arguments
+/// * `info`: [SimplifyContext]
///
-/// Closure returns simplified [Expr] or an error.
-pub type WindowFunctionSimplification = Box<
- dyn Fn(
- crate::expr::WindowFunction,
- &crate::simplify::SimplifyContext,
- ) -> Result<Expr>,
->;
+/// It returns a simplified [Expr] or an error.
+pub type WindowFunctionSimplification =
+ Box<dyn Fn(WindowFunction, &SimplifyContext) -> Result<Expr>>;
diff --git a/datafusion/expr/src/udaf.rs b/datafusion/expr/src/udaf.rs
index ee38077dbf..f2e2c53cdb 100644
--- a/datafusion/expr/src/udaf.rs
+++ b/datafusion/expr/src/udaf.rs
@@ -294,7 +294,7 @@ impl AggregateUDF {
self.inner.reverse_expr()
}
- /// Do the function rewrite
+ /// Returns this aggregate function's simplification hook, if any.
///
/// See [`AggregateUDFImpl::simplify`] for more details.
pub fn simplify(&self) -> Option<AggregateFunctionSimplification> {
@@ -651,26 +651,34 @@ pub trait AggregateUDFImpl: Debug + DynEq + DynHash +
Send + Sync {
AggregateOrderSensitivity::HardRequirement
}
- /// Optionally apply per-UDaF simplification / rewrite rules.
+ /// Returns an optional hook for simplifying this user-defined aggregate.
///
- /// This can be used to apply function specific simplification rules during
- /// optimization (e.g. `arrow_cast` --> `Expr::Cast`). The default
- /// implementation does nothing.
+ /// Use this hook to apply function-specific rewrites during optimization.
+ /// The default implementation returns `None`.
///
- /// Note that DataFusion handles simplifying arguments and "constant
- /// folding" (replacing a function call with constant arguments such as
- /// `my_add(1,2) --> 3` ). Thus, there is no need to implement such
- /// optimizations manually for specific UDFs.
+ /// For example, `percentile_cont(x, 0.0)` and `percentile_cont(x, 1.0)`
can
+ /// be rewritten to `MIN(x)` or `MAX(x)` depending on the `ORDER BY`
+ /// direction.
+ ///
+ /// DataFusion already simplifies arguments and performs constant folding
+ /// (for example, `my_add(1, 2) -> 3`). For nested expressions, the
optimizer
+ /// runs simplification in multiple passes, so arguments are typically
+ /// simplified before this hook is invoked. As a result, UDF
implementations
+ /// usually do not need to handle argument simplification themselves.
+ ///
+ /// See configuration `datafusion.optimizer.max_passes` for details on how
many
+ /// optimization passes may be applied.
///
/// # Returns
///
- /// [None] if simplify is not defined or,
+ /// `None` if simplify is not defined.
///
- /// Or, a closure with two arguments:
- /// * 'aggregate_function': [AggregateFunction] for which simplified has
been invoked
- /// * 'info': [crate::simplify::SimplifyContext]
+ /// Or, a closure ([`AggregateFunctionSimplification`]) invoked with:
+ /// * `aggregate_function`: [AggregateFunction] with already simplified
+ /// arguments
+ /// * `info`: [crate::simplify::SimplifyContext]
///
- /// closure returns simplified [Expr] or an error.
+ /// The closure returns a simplified [Expr] or an error.
///
/// # Notes
///
diff --git a/datafusion/expr/src/udf.rs b/datafusion/expr/src/udf.rs
index 9705006e41..9286dd30b1 100644
--- a/datafusion/expr/src/udf.rs
+++ b/datafusion/expr/src/udf.rs
@@ -217,7 +217,7 @@ impl ScalarUDF {
self.inner.return_field_from_args(args)
}
- /// Do the function rewrite
+ /// Returns this scalar function's simplification result.
///
/// See [`ScalarUDFImpl::simplify`] for more details.
pub fn simplify(
diff --git a/datafusion/expr/src/udwf.rs b/datafusion/expr/src/udwf.rs
index 8f2b8a0d9b..7f86a69f87 100644
--- a/datafusion/expr/src/udwf.rs
+++ b/datafusion/expr/src/udwf.rs
@@ -157,7 +157,7 @@ impl WindowUDF {
self.inner.signature()
}
- /// Do the function rewrite
+ /// Returns this window function's simplification hook, if any.
///
/// See [`WindowUDFImpl::simplify`] for more details.
pub fn simplify(&self) -> Option<WindowFunctionSimplification> {
@@ -344,25 +344,28 @@ pub trait WindowUDFImpl: Debug + DynEq + DynHash + Send +
Sync {
partition_evaluator_args: PartitionEvaluatorArgs,
) -> Result<Box<dyn PartitionEvaluator>>;
- /// Optionally apply per-UDWF simplification / rewrite rules.
+ /// Returns an optional hook for simplifying this user-defined window
+ /// function.
///
- /// This can be used to apply function specific simplification rules during
- /// optimization. The default implementation does nothing.
+ /// Use this hook to apply function-specific rewrites during optimization.
+ /// The default implementation returns `None`.
///
- /// Note that DataFusion handles simplifying arguments and "constant
- /// folding" (replacing a function call with constant arguments such as
- /// `my_add(1,2) --> 3` ). Thus, there is no need to implement such
- /// optimizations manually for specific UDFs.
+ /// DataFusion already simplifies arguments and performs constant folding
+ /// (for example, `my_add(1, 2) -> 3`), so there is usually no need to
+ /// implement those optimizations manually for specific UDFs.
///
/// Example:
/// `advanced_udwf.rs`:
<https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/udf/advanced_udwf.rs>
///
/// # Returns
- /// [None] if simplify is not defined or,
+ /// `None` if simplify is not defined.
///
- /// Or, a closure with two arguments:
- /// * 'window_function': [crate::expr::WindowFunction] for which
simplified has been invoked
- /// * 'info': [crate::simplify::SimplifyContext]
+ /// Or, a closure ([`WindowFunctionSimplification`]) invoked with:
+ /// * `window_function`: [WindowFunction] with already simplified
+ /// arguments
+ /// * `info`: [crate::simplify::SimplifyContext]
+ ///
+ /// The closure returns a simplified [Expr] or an error.
///
/// # Notes
/// The returned expression must have the same schema as the original
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]