Changeset: 436d6d269f1a for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=436d6d269f1a
Modified Files:
        monetdb5/optimizer/opt_groups.mx
Branch: default
Log Message:

Reorder grouping sequence
Reordering the grouping sequence using sampling and then increasing
sort of the columns on the number of unique values turned out best.
On SF100 Q16 the choice could improve performance with a factor 2.
The overhead of sampling is not neglible for smaller scale factors
On SF10 it turned 1.5 sec into 1.8sec.
Futher optimizations are possible.


diffs (91 lines):

diff --git a/monetdb5/optimizer/opt_groups.mx b/monetdb5/optimizer/opt_groups.mx
--- a/monetdb5/optimizer/opt_groups.mx
+++ b/monetdb5/optimizer/opt_groups.mx
@@ -169,23 +169,74 @@ GRPmulticolumngroup(Client cntxt, MalBlk
 {
        int *grp = (int*) getArgReference(stk,pci,0);
        int *ext = (int*) getArgReference(stk,pci,1);
-       int i, newgrp, newext;
+       int i, j, newgrp, newext;
        str msg = MAL_SUCCEED;
+       lng *sizes = (lng*) GDKzalloc(sizeof(lng) * pci->argc), l;
+       bat *bid = (bat*) GDKzalloc(sizeof(bat) * pci->argc), bi;
+       BUN *cnt = (BUN*) GDKzalloc(sizeof(BUN) * pci->argc), c;
+       BAT *b, *sample, *uniq;
+
+       for( i=2; i< pci->argc; i++){
+               bid[i] = *(int *) getArgReference(stk, pci, i);
+        b = BATdescriptor(bid[i]);
+               if ( b ){
+                       cnt[i]= BATcount(b);
+                       sample = BATsample(b,1000);
+                       if ( sample) {
+                               uniq = BATkunique( BATmirror(sample));
+                               if ( uniq){
+                                       sizes[i] = (lng) BATcount(uniq);
+                                       BBPreleaseref(uniq->batCacheid);
+                               }
+                               BBPreleaseref(sample->batCacheid);
+                       }
+                       BBPreleaseref(bid[i]);
+               }
+       }
+
+       /* for (i=2; i<pci->argc; i++)
+               mnstr_printf(cntxt->fdout,"# before[%d] "LLFMT"\n",i, 
sizes[i]); */
+       /* sort order may have influences */
+       /* SF100 Q16 showed < ordering is 2 times faster as > ordering */
+       for ( i = 2; i< pci->argc; i++)
+       for ( j = i+1; j<pci->argc; j++)
+       if ( sizes[j] < sizes[i]){
+               l = sizes[j]; sizes[j]= sizes[i]; sizes[i]= l;
+               bi = bid[j]; bid[j]= bid[i]; bid[i]= bi;
+               c = cnt[j]; cnt[j]= cnt[i]; cnt[i]= c;
+       }
+       /* for (i=2; i<pci->argc; i++)
+               mnstr_printf(cntxt->fdout,"# after [%d] "LLFMT"\n",i, 
sizes[i]); */
 
        /* (grp,ext) := group.new(..) */
-       msg = GRPgroup(grp, ext, (int*) getArgReference(stk,pci,2));
-
-       /* (grp,ext) := group.derive(grp,ext,arg) */
-       /* (grp,ext) := group.done(grp,ext,arg) */
-       for ( i=3; msg == MAL_SUCCEED && i < pci->argc; i++){
-               newgrp= *grp;
-               newext= *ext;
-               msg = GRPderive(grp, ext, &newgrp, &newext, (int*) 
getArgReference(stk,pci,i));
-               if ( msg == MAL_SUCCEED){
-                       BBPdecref(newgrp, TRUE);
-                       BBPdecref(newext, TRUE);
+       msg = GRPgroup(grp, ext, &bid[2]);
+       /* check group count */
+       b = ( msg == MAL_SUCCEED)?  BATdescriptor(*grp): 0;
+       if (  b && BATcount(b) != cnt[2]) {
+               BBPreleaseref(*grp);
+               /* (grp,ext) := group.derive(grp,ext,arg) */
+               /* (grp,ext) := group.done(grp,ext,arg) */
+               for ( i=3; msg == MAL_SUCCEED && i < pci->argc; i++){
+                       newgrp= *grp;
+                       newext= *ext;
+                       msg = GRPderive(grp, ext, &newgrp, &newext, &bid[i]);
+                       if ( msg == MAL_SUCCEED){
+                               BBPdecref(newgrp, TRUE);
+                               BBPdecref(newext, TRUE);
+                       }
+                       /* check group count */
+                       b = BATdescriptor(*grp);
+                       if ( b && BATcount(b) == cnt[i]){
+                               BBPreleaseref(*grp);
+                               break;
+                       }
+                       BBPreleaseref(*grp);
                }
-       }
+       } else
+               BBPreleaseref(*grp);
+       GDKfree(sizes);
+       GDKfree(bid);
+       GDKfree(cnt);
        (void) cntxt;
        (void) mb;
        return msg;
_______________________________________________
Checkin-list mailing list
Checkin-list@monetdb.org
http://mail.monetdb.org/mailman/listinfo/checkin-list

Reply via email to