Changeset: 436d6d269f1a for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=436d6d269f1a Modified Files: monetdb5/optimizer/opt_groups.mx Branch: default Log Message:
Reorder grouping sequence Reordering the grouping sequence using sampling and then increasing sort of the columns on the number of unique values turned out best. On SF100 Q16 the choice could improve performance with a factor 2. The overhead of sampling is not neglible for smaller scale factors On SF10 it turned 1.5 sec into 1.8sec. Futher optimizations are possible. diffs (91 lines): diff --git a/monetdb5/optimizer/opt_groups.mx b/monetdb5/optimizer/opt_groups.mx --- a/monetdb5/optimizer/opt_groups.mx +++ b/monetdb5/optimizer/opt_groups.mx @@ -169,23 +169,74 @@ GRPmulticolumngroup(Client cntxt, MalBlk { int *grp = (int*) getArgReference(stk,pci,0); int *ext = (int*) getArgReference(stk,pci,1); - int i, newgrp, newext; + int i, j, newgrp, newext; str msg = MAL_SUCCEED; + lng *sizes = (lng*) GDKzalloc(sizeof(lng) * pci->argc), l; + bat *bid = (bat*) GDKzalloc(sizeof(bat) * pci->argc), bi; + BUN *cnt = (BUN*) GDKzalloc(sizeof(BUN) * pci->argc), c; + BAT *b, *sample, *uniq; + + for( i=2; i< pci->argc; i++){ + bid[i] = *(int *) getArgReference(stk, pci, i); + b = BATdescriptor(bid[i]); + if ( b ){ + cnt[i]= BATcount(b); + sample = BATsample(b,1000); + if ( sample) { + uniq = BATkunique( BATmirror(sample)); + if ( uniq){ + sizes[i] = (lng) BATcount(uniq); + BBPreleaseref(uniq->batCacheid); + } + BBPreleaseref(sample->batCacheid); + } + BBPreleaseref(bid[i]); + } + } + + /* for (i=2; i<pci->argc; i++) + mnstr_printf(cntxt->fdout,"# before[%d] "LLFMT"\n",i, sizes[i]); */ + /* sort order may have influences */ + /* SF100 Q16 showed < ordering is 2 times faster as > ordering */ + for ( i = 2; i< pci->argc; i++) + for ( j = i+1; j<pci->argc; j++) + if ( sizes[j] < sizes[i]){ + l = sizes[j]; sizes[j]= sizes[i]; sizes[i]= l; + bi = bid[j]; bid[j]= bid[i]; bid[i]= bi; + c = cnt[j]; cnt[j]= cnt[i]; cnt[i]= c; + } + /* for (i=2; i<pci->argc; i++) + mnstr_printf(cntxt->fdout,"# after [%d] "LLFMT"\n",i, sizes[i]); */ /* (grp,ext) := group.new(..) */ - msg = GRPgroup(grp, ext, (int*) getArgReference(stk,pci,2)); - - /* (grp,ext) := group.derive(grp,ext,arg) */ - /* (grp,ext) := group.done(grp,ext,arg) */ - for ( i=3; msg == MAL_SUCCEED && i < pci->argc; i++){ - newgrp= *grp; - newext= *ext; - msg = GRPderive(grp, ext, &newgrp, &newext, (int*) getArgReference(stk,pci,i)); - if ( msg == MAL_SUCCEED){ - BBPdecref(newgrp, TRUE); - BBPdecref(newext, TRUE); + msg = GRPgroup(grp, ext, &bid[2]); + /* check group count */ + b = ( msg == MAL_SUCCEED)? BATdescriptor(*grp): 0; + if ( b && BATcount(b) != cnt[2]) { + BBPreleaseref(*grp); + /* (grp,ext) := group.derive(grp,ext,arg) */ + /* (grp,ext) := group.done(grp,ext,arg) */ + for ( i=3; msg == MAL_SUCCEED && i < pci->argc; i++){ + newgrp= *grp; + newext= *ext; + msg = GRPderive(grp, ext, &newgrp, &newext, &bid[i]); + if ( msg == MAL_SUCCEED){ + BBPdecref(newgrp, TRUE); + BBPdecref(newext, TRUE); + } + /* check group count */ + b = BATdescriptor(*grp); + if ( b && BATcount(b) == cnt[i]){ + BBPreleaseref(*grp); + break; + } + BBPreleaseref(*grp); } - } + } else + BBPreleaseref(*grp); + GDKfree(sizes); + GDKfree(bid); + GDKfree(cnt); (void) cntxt; (void) mb; return msg; _______________________________________________ Checkin-list mailing list Checkin-list@monetdb.org http://mail.monetdb.org/mailman/listinfo/checkin-list