Changeset: 9ac9b70563b0 for MonetDB URL: https://dev.monetdb.org/hg/MonetDB/rev/9ac9b70563b0 Modified Files: monetdb5/mal/mal_instruction.h monetdb5/optimizer/opt_reorder.c Branch: default Log Message:
Improve the old reorder optimizer The reorder is a simple logical grouping. All mitosis code is now organised by partition. Within a partition instructions are put as close as possible to the last hot potato being generated. This affects the queuing, avoiding doing work on other partitions, focussing on one partition at a time if possible. diffs (truncated from 504 to 300 lines): diff --git a/monetdb5/mal/mal_instruction.h b/monetdb5/mal/mal_instruction.h --- a/monetdb5/mal/mal_instruction.h +++ b/monetdb5/mal/mal_instruction.h @@ -13,7 +13,7 @@ #include "mal_stack.h" #include "mal_namespace.h" -#define isaSignature(P) ((P)->token >=COMMANDsymbol) +#define isaSignature(P) ((P)->token >=COMMANDsymbol || (P)->token == PATTERNsymbol) #ifdef HAVE_SYS_TIMES_H # include <sys/times.h> diff --git a/monetdb5/optimizer/opt_reorder.c b/monetdb5/optimizer/opt_reorder.c --- a/monetdb5/optimizer/opt_reorder.c +++ b/monetdb5/optimizer/opt_reorder.c @@ -18,349 +18,164 @@ * to rely on an affordable heuristic steps. * * The reorder optimizer transfers the breadth-first plans of - * the mergetable into a depth-first traversal. + * the mergetable into a multi-phase execution plan. * This increases cohesion for parallel execution. - * It is performed by basic block that ends in a side-effect - * bearing instruction. - * Simply walking backward and pulling out subplans is then - * the way to go. This step could be optimized somewhat - * by giving preference to variables that are too far - * away in the plan from their source. It is however not - * explored. - * The reorder is only executed if the mergetable produced results. + * A subquery is processed completely as quickly as possible. + * Only when the subquery is stalled for available input, the + * threads may start working on another subquery. * - * The secondary approach is to pull instructions to the - * head of the plan if the dataflow such permits. - * If you are not careful, you will end up with - * the breadth first plan again. - * - * Beware, variables can be assigned a value multiple times. - * The order this implies should be respected. - * - * Hidden dependencies occur als with e.g. sql.assert() calls. - * Reordering them may easily lead to an assertion violation. - * Therefore, reordering should be limited to basic blocks without - * side-effects. */ #include "monetdb_config.h" #include "opt_reorder.h" #include "mal_instruction.h" #include "mal_interpreter.h" -/* - * Collect the statement dependencies in a table first - * This can be done in linear time in size of the program. - * Also check for barrier blocks. We only allow reordering - * for a linear plan. Future extensions could consider - * re-ordering basic blocks only. - * - * For this purpose this optimizer should be run before - * the dataflow optimizer, because it adds barriers. - */ -typedef struct{ - int cnt; - int used; - int pos,pos2; - int stmt[FLEXIBLE_ARRAY_MEMBER]; -} *Node, NodeRecord; -static void -OPTremoveDep(Node *list, int lim) -{ - int i; - - for (i=0; i< lim; i++) - if (list[i]) - GDKfree(list[i]); - GDKfree(list); -} - -static Node * -OPTdependencies(Client cntxt, MalBlkPtr mb, int **Ulist) -{ - Node *list = (Node *) GDKzalloc(sizeof(Node) * mb->stop); - int *var = (int*) GDKzalloc(sizeof(int) * mb->vtop), *uselist = NULL; - int i,j,sz=0; - InstrPtr p = NULL; - int block = 0; - - (void) cntxt; - - if (list == NULL || var == NULL){ - if (list ) GDKfree(list); - if (var) GDKfree(var); - return NULL; - } - -/* Instead of malloced structures for each instruction, - * we can also consider a simple scan to determinee the max size - * and allocate a single block - */ - for ( i=0; i< mb->stop; i++){ - p= getInstrPtr(mb,i); - block |= p->barrier != 0; - list[i]= (Node) GDKzalloc(offsetof(NodeRecord, stmt) + sizeof(int) * p->argc); - if (list[i] == NULL){ - OPTremoveDep(list, i); - GDKfree(var); - return 0; - } - list[i]->cnt = p->argc; - /* Collect the dependencies of arguments on previous instructions */ - for( j=p->retc; j<p->argc; j++) { - list[i]->stmt[j] = var[getArg(p,j)]; - list[var[getArg(p,j)]]->used++; - } - /* keep the assignment order - * Duplicate assignment of a variable is not wanted - * but can not be avoided when loops are involved - * In this case we bail out*/ - for( j= 0; j < p->retc; j++) { - if ( var[ getArg(p,j)] ) { - //list[i]->stmt[j] = var [getArg(p,j)]; - // escape we should avoid reused variables. - OPTremoveDep(list, i + 1); - GDKfree(var); - return 0; - } - } - /* remember the statement where a variable was last assignment */ - for( j=0; j<p->retc; j++) - var[getArg(p,j)] = i; - } - /* - * mnstr_printf(cntxt->fdout,"DEPENDENCY TABLE\n"); - * for(i=0;i<mb->stop; i++) - * if( list[i]->cnt){ - * mnstr_printf(cntxt->fdout,"%s.%s [%d,%d]", - * mb->stmt[i]->modname, - * mb->stmt[i]->fcnname, i, list[i]->used); - * for(j=p->retc; j< list[i]->cnt; j++) - * mnstr_printf(cntxt->fdout, " %d", list[i]->stmt[j]); - * mnstr_printf(cntxt->fdout,"\n"); - * } - */ - for(i=0;i<mb->stop; i++) { - list[i]->pos = sz; - list[i]->pos2 = sz; - sz += list[i]->used; - } - if( sz == 0){ - OPTremoveDep(list, mb->stop); - GDKfree(var); - return NULL; - } - uselist = GDKzalloc(sizeof(int)*sz); - if (!uselist) { - OPTremoveDep(list, mb->stop); - GDKfree(var); - return NULL; - } +/* Insert the instruction immediately after a previous instruction that + * generated an argument needed. + * If non can be found, add it to the end. + * Be aware of side-effect instructions, they may not be skipped. + */ +static void +insertInstruction(MalBlkPtr mb, MalStkPtr stk, InstrPtr p, int start) +{ + int i,j,k; + InstrPtr q; + (void) stk; - for(i=0;i<mb->stop; i++) { - if (list[i]->cnt) { - p= getInstrPtr(mb,i); - for(j=p->retc; j< list[i]->cnt; j++) { - uselist[list[list[i]->stmt[j]]->pos2] = i; - list[list[i]->stmt[j]]->pos2++; - } - } - } - /* - * for(i=0, sz = 0; i<mb->stop; i++) { - * mnstr_printf(cntxt->fdout,"%d is used by", i); - * for(j=0; j<list[i]->used; j++, sz++) - * mnstr_printf(cntxt->fdout," %d", uselist[sz]); - * mnstr_printf(cntxt->fdout,"\n"); - * } - */ - - if ( block ){ - OPTremoveDep(list, mb->stop); - GDKfree(uselist); - GDKfree(var); - return NULL; - } - GDKfree(var); - *Ulist = uselist; - return list; -} - -static int -OPTbreadthfirst(Client cntxt, MalBlkPtr mb, int pc, int max, InstrPtr old[], Node dep[], int *uselist) -{ - int i; - InstrPtr p; - - if (pc > max) - return 0; - - p = old[pc]; - if (p == NULL) - return 0; - - if (THRhighwater()) - return -1; - - for (i= p->retc; i< dep[pc]->cnt; i++) - if (OPTbreadthfirst(cntxt, mb, dep[pc]->stmt[i], max, old, dep, uselist) < 0) - return -1; - if (old[pc] != NULL) { - old[pc] = 0; - pushInstruction(mb, p); - } - if (getModuleId(p) == groupRef) - for (i = 0; i< dep[pc]->used; i++) - if (OPTbreadthfirst(cntxt, mb, uselist[dep[pc]->pos+i], max, old, dep, uselist) < 0) - return -1; - return 0; -} - -/* SQL appends are collected to create a better dataflow block */ -/* alternatively, we should postpone all mcv-chained actions */ -static int -OPTpostponeAppends(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr p) -{ - int i,j,k=0, actions =0, last=-1; - InstrPtr *old, *appends; - int limit, slimit; - (void) cntxt; - (void) stk; - (void) p; - - appends =(InstrPtr*) GDKzalloc(mb->ssize * sizeof(InstrPtr)); - if( appends == NULL) - return 0; - limit= mb->stop; - slimit= mb->ssize; - old = mb->stmt; - if ( newMalBlkStmt(mb, mb->ssize) < 0) { - GDKfree(appends); - return 0; - } - for( i=0; i<limit; i++){ - if ( getModuleId(old[i]) == sqlRef && getFunctionId(old[i]) == appendRef){ - last = i; - } - } - for( i=0; i<limit; i++){ - if ( getModuleId(old[i]) == sqlRef && getFunctionId(old[i]) == appendRef){ - // only postpone under strict conditions - if( actions ) - pushInstruction(mb, old[i]); - else { - if (k > 0 && getArg(old[i],1) == getArg(appends[k-1],0)) - appends[k++]= old[i]; - else { - for(j=0; j<k; j++) - pushInstruction(mb,appends[j]); - pushInstruction(mb, old[i]); - actions++; + if( hasSideEffects(mb, p, FALSE)){ + pushInstruction(mb,p); + return; + } + for(i = mb->stop -1; i>0 && i>start; i--) + { + q= getInstrPtr(mb, i); + if( hasSideEffects(mb, q, FALSE)) + break; + for (j=0; j< q->retc; j++){ + for(k= p->retc; k <p->argc; k++){ + if( getArg(q,j) == getArg(p,k)){ + /* found a location to insert the new instruction */ + for(i++; i>0 && i< mb->stop; i++){ + q= getInstrPtr(mb,i); + mb->stmt[i]= p; + p = q; + } + pushInstruction(mb,p); + return; + } + } } - } - continue; - } - if ( i == last){ - actions++; _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list