Changeset: 9ac9b70563b0 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/9ac9b70563b0
Modified Files:
        monetdb5/mal/mal_instruction.h
        monetdb5/optimizer/opt_reorder.c
Branch: default
Log Message:

Improve the old reorder optimizer
The reorder is a simple logical grouping. All mitosis code is now organised by 
partition.
Within a partition instructions are put as close as possible to the last hot 
potato being generated.
This affects the queuing, avoiding doing work on other partitions, focussing on 
one partition at a time if possible.


diffs (truncated from 504 to 300 lines):

diff --git a/monetdb5/mal/mal_instruction.h b/monetdb5/mal/mal_instruction.h
--- a/monetdb5/mal/mal_instruction.h
+++ b/monetdb5/mal/mal_instruction.h
@@ -13,7 +13,7 @@
 #include "mal_stack.h"
 #include "mal_namespace.h"
 
-#define isaSignature(P)  ((P)->token >=COMMANDsymbol)
+#define isaSignature(P)  ((P)->token >=COMMANDsymbol || (P)->token == 
PATTERNsymbol)
 
 #ifdef HAVE_SYS_TIMES_H
 # include <sys/times.h>
diff --git a/monetdb5/optimizer/opt_reorder.c b/monetdb5/optimizer/opt_reorder.c
--- a/monetdb5/optimizer/opt_reorder.c
+++ b/monetdb5/optimizer/opt_reorder.c
@@ -18,349 +18,164 @@
  * to rely on an affordable heuristic steps.
  *
  * The reorder optimizer transfers the breadth-first plans of
- * the mergetable into a depth-first traversal.
+ * the mergetable into a multi-phase execution plan.
  * This increases cohesion for parallel execution.
- * It is performed by basic block that ends in a side-effect
- * bearing instruction.
- * Simply walking backward and pulling out subplans is then
- * the way to go. This step could be optimized somewhat
- * by giving preference to variables that are too far
- * away in the plan from their source. It is however not
- * explored.
- * The reorder is only executed if the mergetable produced results.
+ * A subquery is processed completely as quickly as possible.
+ * Only when the subquery is stalled for available input, the
+ * threads may start working on another subquery.
  *
- * The secondary approach is to pull instructions to the
- * head of the plan if the dataflow such permits.
- * If you are not careful, you will end up with
- * the breadth first plan again.
- *
- * Beware, variables can be assigned a value multiple times.
- * The order this implies should be respected.
- *
- * Hidden dependencies occur als with e.g. sql.assert() calls.
- * Reordering them may easily lead to an assertion violation.
- * Therefore, reordering should be limited to basic blocks without
- * side-effects.
  */
 #include "monetdb_config.h"
 #include "opt_reorder.h"
 #include "mal_instruction.h"
 #include "mal_interpreter.h"
-/*
- * Collect the statement dependencies in a table first
- * This can be done in linear time in size of the program.
- * Also check for barrier blocks. We only allow reordering
- * for a linear plan. Future extensions could consider
- * re-ordering basic blocks only.
- *
- * For this purpose this optimizer should be run before 
- * the dataflow optimizer, because it adds barriers.
- */
-typedef struct{
-       int cnt;
-       int used;
-       int pos,pos2;
-       int stmt[FLEXIBLE_ARRAY_MEMBER];
-} *Node, NodeRecord;
 
-static void
-OPTremoveDep(Node *list, int lim)
-{
-       int i;
-
-       for (i=0; i< lim; i++)
-               if (list[i])
-                       GDKfree(list[i]);
-       GDKfree(list);
-}
-
-static Node *
-OPTdependencies(Client cntxt, MalBlkPtr mb, int **Ulist)
-{
-       Node *list = (Node *) GDKzalloc(sizeof(Node) * mb->stop);
-       int *var = (int*) GDKzalloc(sizeof(int) * mb->vtop), *uselist = NULL;
-       int i,j,sz=0;
-       InstrPtr p = NULL;
-       int block = 0;
-
-       (void) cntxt;
-
-       if (list == NULL || var == NULL){
-               if (list ) GDKfree(list);
-               if (var) GDKfree(var);
-               return NULL;
-       }
-
-/* Instead of malloced structures for each instruction,
- * we can also consider a simple scan to determinee the max size
- * and allocate a single block
- */
 
-       for ( i=0; i< mb->stop; i++){
-               p= getInstrPtr(mb,i);
-               block |= p->barrier != 0;
-               list[i]= (Node) GDKzalloc(offsetof(NodeRecord, stmt) + 
sizeof(int) * p->argc);
-               if (list[i] == NULL){
-                       OPTremoveDep(list, i);
-                       GDKfree(var);
-                       return 0;
-               }
-               list[i]->cnt = p->argc;
-               /* Collect the dependencies of arguments on previous 
instructions */
-               for( j=p->retc; j<p->argc; j++) {
-                       list[i]->stmt[j] = var[getArg(p,j)];
-                       list[var[getArg(p,j)]]->used++;
-               }
-               /* keep the assignment order 
-                * Duplicate assignment of a variable is not wanted
-                * but can not be avoided when loops are involved
-                * In this case we bail out*/
-               for( j= 0; j < p->retc; j++) {
-                       if ( var[ getArg(p,j)] ) {
-                               //list[i]->stmt[j] = var [getArg(p,j)];
-                               // escape we should avoid reused variables.
-                               OPTremoveDep(list, i + 1);
-                               GDKfree(var);
-                               return 0;
-                       }
-               }
-               /* remember the statement where a variable was last assignment 
*/
-               for( j=0; j<p->retc; j++)
-                       var[getArg(p,j)] = i;
-       }
-       /*
-        *      mnstr_printf(cntxt->fdout,"DEPENDENCY TABLE\n");
-        *      for(i=0;i<mb->stop; i++)
-        *              if( list[i]->cnt){
-        *                      mnstr_printf(cntxt->fdout,"%s.%s [%d,%d]",
-        *                              mb->stmt[i]->modname,
-        *                              mb->stmt[i]->fcnname, i, list[i]->used);
-        *                      for(j=p->retc; j< list[i]->cnt; j++)
-        *                              mnstr_printf(cntxt->fdout, " %d", 
list[i]->stmt[j]);
-        *                      mnstr_printf(cntxt->fdout,"\n");
-        *              }
-        */
-       for(i=0;i<mb->stop; i++) {
-               list[i]->pos = sz;
-               list[i]->pos2 = sz;
-               sz += list[i]->used;
-       }
-       if( sz == 0){
-               OPTremoveDep(list, mb->stop);
-               GDKfree(var);
-               return NULL;
-       }
-       uselist = GDKzalloc(sizeof(int)*sz);
-       if (!uselist) {
-               OPTremoveDep(list, mb->stop);
-               GDKfree(var);
-               return NULL;
-       }
+/* Insert the instruction immediately after a previous instruction that
+ * generated an argument needed.
+ * If non can be found, add it to the end.
+ * Be aware of side-effect instructions, they may not be skipped.
+ */
+static void
+insertInstruction(MalBlkPtr mb, MalStkPtr stk, InstrPtr p, int start)
+{               
+        int i,j,k;
+        InstrPtr q;
+               (void) stk;
 
-       for(i=0;i<mb->stop; i++) {
-               if (list[i]->cnt) {
-                       p= getInstrPtr(mb,i);
-                       for(j=p->retc; j< list[i]->cnt; j++) {
-                               uselist[list[list[i]->stmt[j]]->pos2] = i;
-                               list[list[i]->stmt[j]]->pos2++;
-                       }
-               }
-       }
-       /*
-        *      for(i=0, sz = 0; i<mb->stop; i++) {
-        *              mnstr_printf(cntxt->fdout,"%d is used by", i);
-        *              for(j=0; j<list[i]->used; j++, sz++)
-        *                      mnstr_printf(cntxt->fdout," %d", uselist[sz]);
-        *              mnstr_printf(cntxt->fdout,"\n");
-        *      }
-        */
-
-       if ( block ){
-               OPTremoveDep(list, mb->stop);
-               GDKfree(uselist);
-               GDKfree(var);
-               return NULL;
-       }
-       GDKfree(var);
-       *Ulist = uselist;
-       return list;
-}
-
-static int
-OPTbreadthfirst(Client cntxt, MalBlkPtr mb, int pc, int max, InstrPtr old[], 
Node dep[], int *uselist)
-{
-       int i;
-       InstrPtr p;
-
-       if (pc > max)
-               return 0;
-
-       p = old[pc];
-       if (p == NULL)
-               return 0;
-
-       if (THRhighwater())
-               return -1;
-
-       for (i= p->retc; i< dep[pc]->cnt; i++)
-               if (OPTbreadthfirst(cntxt, mb, dep[pc]->stmt[i], max, old, dep, 
uselist) < 0)
-                       return -1;
-       if (old[pc] != NULL) {
-               old[pc] = 0;
-               pushInstruction(mb, p);
-       }
-       if (getModuleId(p) == groupRef)
-               for (i = 0; i< dep[pc]->used; i++)
-                       if (OPTbreadthfirst(cntxt, mb, uselist[dep[pc]->pos+i], 
max, old, dep, uselist) < 0)
-                               return -1;
-       return 0;
-}
-
-/* SQL appends are collected to create a better dataflow block */
-/* alternatively, we should postpone all mcv-chained actions */
-static int
-OPTpostponeAppends(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr p)
-{
-       int i,j,k=0, actions =0, last=-1;
-       InstrPtr *old, *appends;
-       int limit, slimit;
-       (void) cntxt;
-       (void) stk;
-       (void) p;
-
-       appends =(InstrPtr*) GDKzalloc(mb->ssize * sizeof(InstrPtr));
-       if( appends == NULL)
-               return 0;
-       limit= mb->stop;
-       slimit= mb->ssize;
-       old = mb->stmt;
-       if ( newMalBlkStmt(mb, mb->ssize) < 0) {
-               GDKfree(appends);
-               return 0;
-       }
-       for( i=0; i<limit; i++){
-               if ( getModuleId(old[i]) == sqlRef && getFunctionId(old[i]) == 
appendRef){
-                       last = i;
-               }
-       }
-       for( i=0; i<limit; i++){
-               if ( getModuleId(old[i]) == sqlRef && getFunctionId(old[i]) == 
appendRef){
-                       // only postpone under strict conditions
-                       if( actions )
-                               pushInstruction(mb, old[i]);
-                       else {
-                               if (k > 0 &&  getArg(old[i],1) == 
getArg(appends[k-1],0))
-                                       appends[k++]= old[i];
-                               else {
-                                       for(j=0; j<k; j++)
-                                               pushInstruction(mb,appends[j]);
-                                       pushInstruction(mb, old[i]);
-                                       actions++;
+        if( hasSideEffects(mb, p, FALSE)){
+                pushInstruction(mb,p);
+                return;
+        }
+        for(i = mb->stop -1; i>0 && i>start; i--)
+        {
+                q= getInstrPtr(mb, i);
+                if( hasSideEffects(mb, q, FALSE))
+                        break;
+                               for (j=0; j< q->retc; j++){
+                                       for(k= p->retc; k <p->argc; k++){
+                                               if( getArg(q,j) == getArg(p,k)){
+                                                       /* found a location to 
insert the new instruction */
+                                                       for(i++; i>0 && i< 
mb->stop; i++){
+                                                               q= 
getInstrPtr(mb,i);
+                                                               mb->stmt[i]= p;
+                                                               p = q;
+                                                       }
+                                                       pushInstruction(mb,p);
+                                                       return;
+                                               }
+                                       }
                                }
-                       }
-                       continue;
-               }
-               if ( i == last){
-                       actions++;
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to