findepi commented on code in PR #17684:
URL: https://github.com/apache/datafusion/pull/17684#discussion_r2368740132


##########
datafusion/physical-plan/src/windows/mod.rs:
##########
@@ -371,18 +371,46 @@ pub(crate) fn window_equivalence_properties(
     for (i, expr) in window_exprs.iter().enumerate() {
         let partitioning_exprs = expr.partition_by();
         let no_partitioning = partitioning_exprs.is_empty();
-        // Collect columns defining partitioning, and construct all 
`SortOptions`
-        // variations for them. Then, we will check each one whether it 
satisfies
-        // the existing ordering provided by the input plan.
+
+        // Use incremental approach to avoid O(4^n) exponential complexity:
+        // Instead of generating all combinations upfront via 
multi_cartesian_product,
+        // we build orderings incrementally and prune invalid paths early.
         let mut all_satisfied_lexs = vec![];
-        for lex in partitioning_exprs
-            .iter()
-            .map(|pb_order| 
sort_options_resolving_constant(Arc::clone(pb_order)))
-            .multi_cartesian_product()
-            .filter_map(LexOrdering::new)
-        {
-            if window_eq_properties.ordering_satisfy(lex.clone())? {
-                all_satisfied_lexs.push(lex);
+        if !no_partitioning {

Review Comment:
   This condition still looks redundant. IIUC, when 
`partitioning_exprs.is_empty()` the code inside the THEN arm will not do 
anything



##########
datafusion/physical-plan/src/windows/mod.rs:
##########
@@ -371,18 +371,41 @@ pub(crate) fn window_equivalence_properties(
     for (i, expr) in window_exprs.iter().enumerate() {
         let partitioning_exprs = expr.partition_by();
         let no_partitioning = partitioning_exprs.is_empty();
-        // Collect columns defining partitioning, and construct all 
`SortOptions`
-        // variations for them. Then, we will check each one whether it 
satisfies
-        // the existing ordering provided by the input plan.
+
+        // Find "one" valid ordering for partition columns to avoid 
exponential complexity.
         let mut all_satisfied_lexs = vec![];
-        for lex in partitioning_exprs
-            .iter()
-            .map(|pb_order| 
sort_options_resolving_constant(Arc::clone(pb_order)))
-            .multi_cartesian_product()
-            .filter_map(LexOrdering::new)
-        {
-            if window_eq_properties.ordering_satisfy(lex.clone())? {
-                all_satisfied_lexs.push(lex);
+        if !no_partitioning {
+            // Find a single valid ordering using a greedy approach
+            let mut ordering = vec![];
+            for partition_expr in partitioning_exprs.iter() {
+                let sort_options =
+                    
sort_options_resolving_constant(Arc::clone(partition_expr), true);
+
+                // Try each sort option and pick the first one that works
+                let mut found = false;
+                for sort_expr in sort_options.iter() {
+                    let mut candidate_ordering = ordering.clone();
+                    candidate_ordering.push(sort_expr.clone());

Review Comment:
   nit:
   
   `ordering.clone()` is avoidable. 
   logically, it's an expression being pushed-tried-else-popped:
   
   ```rust
   // Find a single valid ordering using a greedy approach
   let mut candidate_ordering = vec![];
   for partition_expr in partitioning_exprs.iter() {
       let sort_options =
           sort_options_resolving_constant(Arc::clone(partition_expr), true);
   
       // Try each sort option and pick the first one that works
       let mut found = false;
       for sort_expr in sort_options.iter() {
           candidate_ordering.push(sort_expr.clone());
   
           if let Some(lex) = LexOrdering::new(candidate_ordering.clone()) {
               if window_eq_properties.ordering_satisfy(lex)? {
                   found = true;
                   break;
               }
           } else {
               candidate_ordering.pop();
           }
       }
       // If no sort option works for this column, we can't build a valid 
ordering
       if !found {
           candidate_ordering.clear();
           break;
       }
   }
   // If we successfully built an ordering for all columns, use it
   if candidate_ordering.len() == partitioning_exprs.len() {
       if let Some(lex) = LexOrdering::new(ordering) {
           all_satisfied_lexs.push(lex);
       }
   }
   ```



##########
datafusion/physical-plan/src/windows/mod.rs:
##########
@@ -471,7 +501,7 @@ pub(crate) fn window_equivalence_properties(
                         .get_aggregate_expr()
                         .expressions()
                         .into_iter()
-                        .map(sort_options_resolving_constant)
+                        .map(|expr| sort_options_resolving_constant(expr, 
false))
                         .multi_cartesian_product();

Review Comment:
   let's maybe file issue / follow-up PR, so that we don't need to re-review 
code unnecessarily



##########
datafusion/physical-plan/src/windows/mod.rs:
##########
@@ -371,18 +371,41 @@ pub(crate) fn window_equivalence_properties(
     for (i, expr) in window_exprs.iter().enumerate() {
         let partitioning_exprs = expr.partition_by();
         let no_partitioning = partitioning_exprs.is_empty();
-        // Collect columns defining partitioning, and construct all 
`SortOptions`
-        // variations for them. Then, we will check each one whether it 
satisfies
-        // the existing ordering provided by the input plan.
+
+        // Find "one" valid ordering for partition columns to avoid 
exponential complexity.
         let mut all_satisfied_lexs = vec![];
-        for lex in partitioning_exprs
-            .iter()
-            .map(|pb_order| 
sort_options_resolving_constant(Arc::clone(pb_order)))
-            .multi_cartesian_product()
-            .filter_map(LexOrdering::new)
-        {
-            if window_eq_properties.ordering_satisfy(lex.clone())? {
-                all_satisfied_lexs.push(lex);
+        if !no_partitioning {
+            // Find a single valid ordering using a greedy approach
+            let mut ordering = vec![];
+            for partition_expr in partitioning_exprs.iter() {
+                let sort_options =
+                    
sort_options_resolving_constant(Arc::clone(partition_expr), true);
+
+                // Try each sort option and pick the first one that works
+                let mut found = false;
+                for sort_expr in sort_options.iter() {
+                    let mut candidate_ordering = ordering.clone();
+                    candidate_ordering.push(sort_expr.clone());
+
+                    if let Some(lex) = 
LexOrdering::new(candidate_ordering.clone()) {
+                        if window_eq_properties.ordering_satisfy(lex)? {

Review Comment:
   With every new window sort expression iterated over, we seem to be 
processing all previous sort expressions. This feels quadratic. 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org
For additional commands, e-mail: github-h...@datafusion.apache.org

Reply via email to