findepi commented on code in PR #17684:
URL: https://github.com/apache/datafusion/pull/17684#discussion_r2372957164


##########
datafusion/physical-plan/src/windows/mod.rs:
##########
@@ -467,23 +493,44 @@ pub(crate) fn window_equivalence_properties(
                 // utilize set-monotonicity since the set shrinks as the frame
                 // boundary starts "touching" the end of the table.
                 else if frame.is_causal() {
-                    let args_all_lexs = sliding_expr
-                        .get_aggregate_expr()
-                        .expressions()
-                        .into_iter()
-                        .map(sort_options_resolving_constant)
-                        .multi_cartesian_product();
-
-                    let (mut asc, mut satisfied) = (false, false);
-                    for order in args_all_lexs {
-                        if let Some(f) = order.first() {
-                            asc = !f.options.descending;
+                    // Find one valid ordering for aggregate arguments instead 
of
+                    // checking all combinations
+                    let aggregate_exprs = 
sliding_expr.get_aggregate_expr().expressions();
+                    let mut candidate_order = vec![];
+                    let mut asc = false;
+
+                    for (idx, expr) in aggregate_exprs.iter().enumerate() {
+                        let mut found = false;
+                        let sort_options =
+                            sort_options_resolving_constant(Arc::clone(expr), 
false);
+
+                        // Try each option and pick the first that works
+                        for sort_expr in sort_options.iter() {
+                            candidate_order.push(sort_expr.clone());
+
+                            if let Some(lex) = 
LexOrdering::new(candidate_order.clone()) {
+                                if window_eq_properties.ordering_satisfy(lex)? 
{
+                                    if idx == 0 {
+                                        asc = !sort_expr.options.descending;

Review Comment:
   why first is special? worth a code comment 
   
   (i know the logic is pre-existing and did not have a comment, but you seem 
to know what this is about and I do not)



##########
datafusion/physical-plan/src/windows/mod.rs:
##########
@@ -634,11 +681,45 @@ pub fn get_window_mode(
     Ok(None)
 }
 
-fn sort_options_resolving_constant(expr: Arc<dyn PhysicalExpr>) -> 
Vec<PhysicalSortExpr> {
-    vec![
-        PhysicalSortExpr::new(Arc::clone(&expr), SortOptions::new(false, 
false)),
-        PhysicalSortExpr::new(expr, SortOptions::new(true, true)),
-    ]
+/// Generates sort option variations for a given expression.
+///
+/// This function is used to handle constant columns in window operations. 
Since constant
+/// columns can be considered as having any ordering, we generate multiple 
sort options
+/// to explore different ordering possibilities.
+///
+/// # Parameters
+/// - `expr`: The physical expression to generate sort options for
+/// - `all_options`: If true, generates all 4 possible sort options (ASC/DESC 
× NULLS FIRST/LAST).
+///   If false, generates only 2 options that preserve set monotonicity.
+///
+/// # When to use `all_options = true`:
+/// Use for PARTITION BY columns where we want to explore all possible 
orderings to find
+/// one that matches the existing data ordering.
+///
+/// # When to use `all_options = false`:
+/// Use for aggregate/window function arguments where set monotonicity needs 
to be preserved.
+/// Only generates ASC NULLS LAST and DESC NULLS FIRST because:
+/// - Set monotonicity is broken if data has increasing order but nulls come 
first
+/// - Set monotonicity is broken if data has decreasing order but nulls come 
last

Review Comment:
   Maybe `only_monotonic` could be a more descriptive param name?
   



##########
datafusion/physical-plan/src/windows/mod.rs:
##########
@@ -371,17 +371,41 @@ pub(crate) fn window_equivalence_properties(
     for (i, expr) in window_exprs.iter().enumerate() {
         let partitioning_exprs = expr.partition_by();
         let no_partitioning = partitioning_exprs.is_empty();
-        // Collect columns defining partitioning, and construct all 
`SortOptions`
-        // variations for them. Then, we will check each one whether it 
satisfies
-        // the existing ordering provided by the input plan.
+
+        // Find "one" valid ordering for partition columns to avoid 
exponential complexity.
         let mut all_satisfied_lexs = vec![];
-        for lex in partitioning_exprs
-            .iter()
-            .map(|pb_order| 
sort_options_resolving_constant(Arc::clone(pb_order)))
-            .multi_cartesian_product()
-            .filter_map(LexOrdering::new)
+        let mut candidate_ordering = vec![];
+
+        for partition_expr in partitioning_exprs.iter() {
+            let sort_options =
+                sort_options_resolving_constant(Arc::clone(partition_expr), 
true);
+
+            // Try each sort option and pick the first one that works
+            let mut found = false;
+            for sort_expr in sort_options.iter() {
+                candidate_ordering.push(sort_expr.clone());
+                if let Some(lex) = 
LexOrdering::new(candidate_ordering.clone()) {
+                    if window_eq_properties.ordering_satisfy(lex)? {
+                        found = true;
+                        break;
+                    }
+                }
+                // This option didn't work, remove it and try the next one
+                candidate_ordering.pop();
+            }
+            // If no sort option works for this column, we can't build a valid 
ordering
+            if !found {
+                candidate_ordering.clear();
+                break;
+            }
+        }
+
+        // If we successfully built an ordering for all columns, use it
+        // When there are no partition expressions, candidate_ordering will be 
empty and won't be added
+        if candidate_ordering.len() == partitioning_exprs.len()
+            && !candidate_ordering.is_empty()

Review Comment:
   `&& !candidate_ordering.is_empty()` is redundant.
   in the empty case, the `LexOrdering::new` returns `None`
   
   ```suggestion
   ```



##########
datafusion/physical-plan/src/windows/mod.rs:
##########
@@ -467,23 +493,44 @@ pub(crate) fn window_equivalence_properties(
                 // utilize set-monotonicity since the set shrinks as the frame
                 // boundary starts "touching" the end of the table.
                 else if frame.is_causal() {
-                    let args_all_lexs = sliding_expr
-                        .get_aggregate_expr()
-                        .expressions()
-                        .into_iter()
-                        .map(sort_options_resolving_constant)
-                        .multi_cartesian_product();

Review Comment:
   Thanks for solving this exponential case too!
   
   I am not sure the regression tests cover this case.
   Do we need some more test queries?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org
For additional commands, e-mail: github-h...@datafusion.apache.org

Reply via email to