Changeset: 874abcfa85e5 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/874abcfa85e5
Modified Files:
        monetdb5/optimizer/opt_projectionpath.c
Branch: default
Log Message:

Documentation of a failed experiment
Out projectionpath should not be split


diffs (235 lines):

diff --git a/monetdb5/optimizer/opt_projectionpath.c 
b/monetdb5/optimizer/opt_projectionpath.c
--- a/monetdb5/optimizer/opt_projectionpath.c
+++ b/monetdb5/optimizer/opt_projectionpath.c
@@ -18,112 +18,123 @@
 // future experiments.
 //#define ELIMCOMMONPREFIX
 
-#define LOOKAHEAD 500   /* limit the lookahead for candidates */
-
-/* locate common prefixes  in projection lists
- * The algorithm is quadratic in the number of paths considered. */
-
 #ifdef ELIMCOMMONPREFIX
 static int
-OPTprojectionPrefix(Client cntxt, MalBlkPtr mb, int prefixlength)
+OPTprojectionPrefix(Client cntxt, MalBlkPtr mb)
 {
-       int i, j, k, match, actions=0;
-       InstrPtr p,q,r,*old;
+       int i, j, k, maxmatch, actions=0;
+       InstrPtr p,q,*old;
        int limit, slimit;
-       str msg = MAL_SUCCEED;
+       InstrPtr *paths = NULL;
+       int     *alias = NULL;
 
-
+       (void) cntxt;
        old = mb->stmt;
        limit = mb->stop;
        slimit= mb->ssize;
-       if (newMalBlkStmt(mb,mb->ssize) < 0)
+
+       paths = (InstrPtr *) GDKzalloc(mb->vsize * sizeof(InstrPtr));
+       if (paths == NULL)
+               return 0;
+       alias = (int*) GDKzalloc(mb->vsize * sizeof(int));
+       if( alias == NULL){
+               GDKfree(paths);
                return 0;
+       }
+
+       maxmatch = 0; // to collect maximum common paths
+       /* Collect the projection paths achored at the same start */
+       for( i=0; i< limit; i++){
+               p = old[i];
+               if ( getFunctionId(p) == projectionpathRef && p->argc > 3){
+                       k = getArg(p,1);
+                       if( paths[k] == 0)
+                               paths[k] = p;
+                       q = paths[k];
+                       // Calculate the number of almost identical paths
+                       if( q->argc == p->argc){
+                               for(j = q->retc; j<q->argc - 1; j++)
+                                       if( getArg(p,j) != getArg(q,j))
+                                               break;
+                               if( j == q->argc -1 ){
+                                       alias[k] = alias[k] -1;
+                                       if (alias[k] < maxmatch)
+                                               maxmatch = alias[k];
+                               }
+                       }
+               }
+       }
+       if (maxmatch == -1){
+               GDKfree(alias);
+               GDKfree(paths);
+               return 0;
+       }
+
+       if (newMalBlkStmt(mb,mb->ssize) < 0){
+               GDKfree(paths);
+               GDKfree(alias);
+               return 0;
+       }
 
 
        for( i = 0; i < limit; i++){
                p = old[i];
-               assert(p);
-               if ( getFunctionId(p) != projectionpathRef || p->argc < 
prefixlength) {
+               if ( getFunctionId(p) != projectionpathRef ){
+                       pushInstruction(mb,p);
+                       continue;
+               }
+               if( p->argc < 3){
                        pushInstruction(mb,p);
                        continue;
                }
 
-               /* we fixed a projection path of the target prefixlength
-                * Search now the remainder for at least one case where it
-                * has a common prefix of prefixlength
-                */
-               for(match = 0,  j= i+1; j < limit && j < i + LOOKAHEAD; j++) {
-                       q= old[j];
-                       if ( getFunctionId(q) != projectionpathRef || q->argc < 
prefixlength)
-                               continue;
-                       for( match =0,  k = q->retc; k <  prefixlength; k++)
-                               match += getArg(q,k) == getArg(p,k);
-                       if ( match == prefixlength - q->retc )
-                               break;
-                       match = 0;
+               actions++;
+               // the first one should be split if there is interest
+               k = getArg(p,1);
+               q = paths[k];
+               if( alias[k] < 0){
+                       // inject the join prefix calculation
+                       q= copyInstruction(q);
+                       q->argc = q->argc -1;
+                       getArg(q,0) = newTmpVariable(mb, getArgType(mb,q, 
q->argc -1));
+                       pushInstruction(mb, q);
+                       alias[k] = getArg(q,0);
+                       q = copyInstruction(p);
+                       getArg(q,1) = alias[k];
+                       getArg(q,2) = getArg(q, q->argc -1);
+                       q->argc = 3;
+                       pushInstruction(mb,q);
+                       continue;
                }
-               if ( match && match == prefixlength - q->retc ){
-                       /* at least one instruction has been found.
-                        * Inject the prefex projection path and replace all 
use cases
-                        */
-
-                       /* create the factored out prefix projection */
-                       r = copyInstruction(p);
-                       if( r == NULL){
-                               return -1;
+               // check if we can replace the projectionpath with an alias
+               k = getArg(p,1);
+               q = paths[k];
+               if( alias[k] >  0 && q->argc == p->argc){
+                       for(j = q->retc; j<q->argc - 1; j++)
+                               if( getArg(p,j) != getArg(q,j))
+                                       break;
+                       if( j == q->argc - 1){
+                               // we found a common prefix, and it is the 
first one?
+                               getArg(p,1) = alias[k];
+                               getArg(p,2) = getArg(p, p->argc -1);
+                               p->argc = 3;
+                               pushInstruction(mb,p);
                        }
-                       r->argc = prefixlength;
-                       getArg(r,0) = newTmpVariable(mb, 
newBatType(getBatType(getArgType(mb,r,r->argc-1))));
-                       if( r->argc == 3)
-                               setFunctionId(r,projectionRef);
-                       r->typechk = TYPE_UNKNOWN;
-                       pushInstruction(mb,r);
-
+               } else {
+                       pushInstruction(mb,p);
+                       continue;
+               }
 
-                       /* patch all instructions with same prefix. */
-                       for( ; j < limit; j++) {
-                               q= old[j];
-                               if ( getFunctionId(q) != projectionpathRef || 
q->argc < prefixlength)
-                                       continue;
-                               for( match =0,  k = r->retc; k < r->argc; k++)
-                                       match += getArg(q,k) == getArg(r,k);
-                               if (match &&  match == prefixlength - r->retc ){
-                                       actions++;
-                                       if( q->argc == r->argc ){
-                                               clrFunction(q);
-                                               getArg(q,q->retc) = getArg(r,0);
-                                               q->argc = q->retc + 1;
-                                       } else {
-                                               getArg(q,q->retc) = getArg(r,0);
-                                               for( k= q->retc +1 ; k < 
prefixlength; k++)
-                                                       delArgument(q, q->retc 
+ 1);
-                                               if( q->argc == 3)
-                                                       
setFunctionId(q,projectionRef);
-                                       }
-                               }
-                       }
-                       /* patch instruction p by deletion of common prefix */
-                       if( r->argc == p->argc ){
-                               clrFunction(p);
-                               getArg(p,p->retc) = getArg(r,0);
-                               p->argc = p->retc + 1;
-                       } else {
-                               getArg(p,p->retc) = getArg(r,0);
-                               for( k= p->retc +  1; k < prefixlength; k++)
-                                       delArgument(p, p->retc + 1);
-                               if( p->argc == 3)
-                                       setFunctionId(p,projectionRef);
-                       }
-               }
-               pushInstruction(mb,p);
        }
 
        for(; i<slimit; i++)
                if(old[i])
                        freeInstruction(old[i]);
        GDKfree(old);
-       if( actions)
-               actions += OPTdeadcodeImplementation(cntxt, mb, 0, 0);
+       GDKfree(paths);
+       GDKfree(alias);
+       //if(actions)
+               //printFunction(cntxt->fdout, mb, 0, LIST_MAL_ALL);
        return actions;
 }
 #endif
@@ -272,19 +283,14 @@ OPTprojectionpathImplementation(Client c
 
        /* All complete projection paths have been constructed.
         * There may be cases where there is a common prefix used multiple 
times.
-        * They are located and removed in a few scans over the plan
-        *
-        * The prefix path mostly consist of smaller columns,
-        * which make the benefit not large. In SF100 roughly 100 out of
-        * 4500 projection operations were removed.
-        * On medium scale databases it may save cpu cycles.
-        * Turning this feature into a compile time option.
-               if( maxprefixlength > 3){
-                        //Before searching the prefix, we should remove all 
non-used instructions.
-                       actions += OPTdeadcodeImplementation(cntxt, mb, 0, 0);
-                       for( ; maxprefixlength > 2; maxprefixlength--)
-                               actions += OPTprojectionPrefix(cntxt, mb, 
maxprefixlength);
-               }
+        * Especially in wide table projections and lengthy paths
+        * They can be located and replaced in the plan by factoring the common 
part out
+        * First experiments on tpch SF10- Q7, showed significant decrease in 
performance
+        * Also a run against SF-100 did not show improvement in Q7
+        * Also there are collateral damages in the testweb.
+        if( OPTdeadcodeImplementation(cntxt, mb, 0, 0) == MAL_SUCCEED){
+               actions += OPTprojectionPrefix(cntxt, mb);
+       }
         */
 
     /* Defense line against incorrect plans */
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to