Changeset: c4a78e109706 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/c4a78e109706
Modified Files:
        sql/server/rel_optimizer.c
Branch: select-window-pushdown
Log Message:

Optimize filter push down for window functions

This change optimizes the push down of filters to the source tables of 
aggregation window functions, which can lead to significant improvements in 
performance (see issue #7301).
The filters are only pushed if they completely filter out entire groups. If 
they only filter out rows of a group, they cannot be safely pushed down.


diffs (142 lines):

diff --git a/sql/server/rel_optimizer.c b/sql/server/rel_optimizer.c
--- a/sql/server/rel_optimizer.c
+++ b/sql/server/rel_optimizer.c
@@ -4457,6 +4457,89 @@ rel_push_groupby_down(visitor *v, sql_re
        return rel;
 }
 
+/* 
+ * Gets the column expressions of a diff function and adds them to "columns".
+ * The diff function has two possible argument types: either a sql_exp 
representing a column
+ * or a sql_exp representing another diff function, therefore this function is 
recursive.
+ */
+static void
+get_diff_function_columns(sql_exp *diffExp, list *columns) {
+       list *args = diffExp->l;
+       
+       for (node *arg = args->h; arg; arg = arg->next) {
+               sql_exp *exp = arg->data;
+               
+               // diff function
+               if (exp->type == e_func) {
+                       get_diff_function_columns(exp, columns);
+               }
+               // column
+               else {
+                       list_append(columns, exp);
+               }
+       }
+}
+
+/* 
+ * Builds a list of aggregation key columns to be used by the select push down 
algorithm, namely for
+ * window functions. Returns NULL if the window function does not partition by 
any column
+ */
+static list *
+get_aggregation_key_columns(sql_allocator *sa, sql_rel *r) {
+       for (node* n = r->exps->h; n; n = n->next) {
+               sql_exp *e = n->data;
+               
+               if (e->type == e_func) {
+                       sql_subfunc *f = e->f;
+                       
+                       // aggregation function
+                       if (!strcmp(f->func->base.name, "rank")) {
+                               list* rankArguments = e->l;
+                               // the partition key is the second argument
+                               sql_exp *partitionExp = 
rankArguments->h->next->data;
+
+                               // check if the key contains any columns, i.e., 
is a diff function
+                               if (partitionExp->type == e_func) {
+                                       // get columns to list
+                                       list *aggColumns = sa_list(sa);
+                                       get_diff_function_columns(partitionExp, 
aggColumns);
+                                       return aggColumns;
+                               }
+                               // the function has no aggregation columns 
(e_atom of boolean)
+                               else {
+                                       return NULL;
+                               }
+
+                       }
+               }
+       }
+       return NULL;
+}
+
+/*
+ * Checks if a filter column is also used as an aggregation key, so it can be 
later safely pushed down.
+ */
+static int
+filter_column_in_aggregation_columns(sql_exp *column, list *aggColumns) {
+       char *tableName = column->l;
+       char *columnName = column->r;
+
+       for (node *n = aggColumns->h; n; n = n->next) {
+               sql_exp *aggCol = n->data;
+               char *aggColTableName = aggCol->l;
+               char *aggColColumnName = aggCol->r;
+               
+               if (!strcmp(tableName, aggColTableName) && !strcmp(columnName, 
aggColColumnName)) {
+                       /* match */
+                       return 1;
+               }
+       }
+
+       /* no matches found */
+       return 0;
+}
+
+
 /*
  * Push select down, pushes the selects through (simple) projections. Also
  * it cleans up the projections which become useless.
@@ -4605,6 +4688,48 @@ rel_push_select_down(visitor *v, sql_rel
                                n = next;
                        }
                }
+               
+               /* push filters if they match the aggregation key on a window 
function */
+               else if (pl && pl->op != op_ddl && exps_have_unsafe(r->exps, 
0)) {
+                       set_processed(pl);
+                       /* list of aggregation key columns */
+                       list *aggColumns = 
get_aggregation_key_columns(v->sql->sa, r);
+
+                       /* aggregation keys found, check if any filter matches 
them */
+                       if (aggColumns) {
+                               for (n = exps->h; n;) {
+                                       node *next = n->next;
+                                       sql_exp *e = n->data, *ne = NULL;
+                                       
+                                       if (e->type == e_cmp) {
+                                               /* simple comparison filter */
+                                               if (e->flag == cmp_gt || 
e->flag == cmp_gte || e->flag == cmp_lte || e->flag == cmp_lt
+                                                       || e->flag == cmp_equal 
|| e->flag == cmp_notequal || e->flag == cmp_in || e->flag == cmp_notin) {
+                                                       sql_exp* column = e->l;
+
+                                                       /* check if the 
expression matches any aggregation key, meaning we can 
+                                                          try to safely push 
it down */
+                                                       if 
(filter_column_in_aggregation_columns(column, aggColumns)) {
+                                                               ne = 
exp_push_down_prj(v->sql, e, r, pl);
+
+                                                               /* can we move 
it down */
+                                                               if (ne && ne != 
e && pl->exps) {
+                                                                       if 
(!is_select(pl->op) || rel_is_ref(pl))
+                                                                               
r->l = pl = rel_select(v->sql->sa, pl, NULL);
+                                                                       
rel_select_add_exp(v->sql->sa, pl, ne);
+                                                                       
list_remove_node(exps, NULL, n);
+                                                                       
v->changes++;
+                                                               }
+                                                       }
+                                               }
+                                       }
+                                       n = next;
+                               }
+
+                               /* cleanup list */
+                               list_destroy(aggColumns);
+                       }
+               }
        }
 
        /* try push select under set relation */
_______________________________________________
checkin-list mailing list -- checkin-list@monetdb.org
To unsubscribe send an email to checkin-list-le...@monetdb.org

Reply via email to