Changeset: 0891a54a264d for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=0891a54a264d Added Files: monetdb5/modules/mal/groups.c monetdb5/modules/mal/groups.h monetdb5/modules/mal/joinpath.c monetdb5/modules/mal/joinpath.h Modified Files: monetdb5/modules/mal/Makefile.ag monetdb5/optimizer/opt_groups.c monetdb5/optimizer/opt_groups.h monetdb5/optimizer/opt_joinpath.c monetdb5/optimizer/opt_joinpath.h Branch: default Log Message:
Split optimizers from runtime Step towards making the optimizer directory focussed on the optimization process. The support part should go to modules/mal. diffs (truncated from 949 to 300 lines): diff --git a/monetdb5/modules/mal/Makefile.ag b/monetdb5/modules/mal/Makefile.ag --- a/monetdb5/modules/mal/Makefile.ag +++ b/monetdb5/modules/mal/Makefile.ag @@ -38,9 +38,11 @@ lib_mal = { constraints.c constraints.h \ factories.c factories.h \ groupby.c groupby.h \ + groups.c groups.h \ histogram.h \ inspect.c inspect.h \ iterator.c iterator.h \ + joinpath.c joinpath.h \ language.c language.h \ mal_io.c mal_io.h \ mal_mapi.c mal_mapi.h \ diff --git a/monetdb5/modules/mal/groups.c b/monetdb5/modules/mal/groups.c new file mode 100644 --- /dev/null +++ b/monetdb5/modules/mal/groups.c @@ -0,0 +1,117 @@ +/* + * The contents of this file are subject to the MonetDB Public License + * Version 1.1 (the "License"); you may not use this file except in + * compliance with the License. You may obtain a copy of the License at + * http://www.monetdb.org/Legal/MonetDBLicense + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the + * License for the specific language governing rights and limitations + * under the License. + * + * The Original Code is the MonetDB Database System. + * + * The Initial Developer of the Original Code is CWI. + * Portions created by CWI are Copyright (C) 1997-July 2008 CWI. + * Copyright August 2008-2012 MonetDB B.V. + * All Rights Reserved. +*/ +#include "monetdb_config.h" +#include "groups.h" +#include "group.h" + +/* + * The groups optimizer takes a sequence and attempts to minimize the intermediate result. + * The choice depends on a good estimate of intermediate results using properties. + * We start by just performing the underlying MAL instructions in sequence as requested + * This will lead to better locality of BAT access. + */ +typedef struct{ + int *arg; + BAT *b; +} Elm; + +str +GRPmulticolumngroup(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci) +{ + int *grp = (int*) getArgReference(stk,pci,0); + int *ext = (int*) getArgReference(stk,pci,1); + int i, j, oldgrp, oldext; + str msg = MAL_SUCCEED; + lng *sizes = (lng*) GDKzalloc(sizeof(lng) * pci->argc), l; + bat *bid = (bat*) GDKzalloc(sizeof(bat) * pci->argc), bi; + BUN *cnt = (BUN*) GDKzalloc(sizeof(BUN) * pci->argc), c; + BAT *b, *sample, *uniq; + + for( i=2; i< pci->argc; i++){ + bid[i] = *(int *) getArgReference(stk, pci, i); + b = BATdescriptor(bid[i]); + if ( b ){ + cnt[i]= BATcount(b); + sample = BATsample(b,1000); + if ( sample) { + uniq = BATkunique( BATmirror(sample)); + if ( uniq){ + sizes[i] = (lng) BATcount(uniq); + BBPreleaseref(uniq->batCacheid); + } + BBPreleaseref(sample->batCacheid); + } + BBPreleaseref(bid[i]); + } + } + + /* for (i=2; i<pci->argc; i++) + mnstr_printf(cntxt->fdout,"# before[%d] "LLFMT"\n",i, sizes[i]); */ + /* sort order may have influences */ + /* SF100 Q16 showed < ordering is 2 times faster as > ordering */ + for ( i = 2; i< pci->argc; i++) + for ( j = i+1; j<pci->argc; j++) + if ( sizes[j] < sizes[i]){ + l = sizes[j]; sizes[j]= sizes[i]; sizes[i]= l; + bi = bid[j]; bid[j]= bid[i]; bid[i]= bi; + c = cnt[j]; cnt[j]= cnt[i]; cnt[i]= c; + } + /* for (i=2; i<pci->argc; i++) + mnstr_printf(cntxt->fdout,"# after [%d] "LLFMT"\n",i, sizes[i]); */ + + /* (grp,ext) := group.new(..) */ + *grp = 0; + *ext = 0; + msg = GRPgroup(grp, ext, &bid[2]); + if ( msg != MAL_SUCCEED){ + GDKfree(sizes); + GDKfree(bid); + GDKfree(cnt); + return msg; + } + /* check group count */ + b = BATdescriptor(*grp); + if ( b && BATcount(b) != cnt[2]) { + BBPreleaseref(*grp); + b = 0; + /* (grp,ext) := group.derive(grp,ext,arg) */ + /* (grp,ext) := group.done(grp,ext,arg) */ + for ( i=3; i < pci->argc; i++){ + oldgrp= *grp; + oldext= *ext; + msg = GRPderive(grp, ext, &oldgrp, &oldext, &bid[i]); + if ( msg == MAL_SUCCEED){ + BBPdecref(oldgrp, TRUE); + BBPdecref(oldext, TRUE); + } else break; + /* check group count */ + b = BATdescriptor(*grp); + if ( b && BATcount(b) == cnt[i]) + break; + } + } + if (b) + BBPreleaseref(*grp); + GDKfree(sizes); + GDKfree(bid); + GDKfree(cnt); + (void) cntxt; + (void) mb; + return msg; +} diff --git a/monetdb5/modules/mal/groups.h b/monetdb5/modules/mal/groups.h new file mode 100644 --- /dev/null +++ b/monetdb5/modules/mal/groups.h @@ -0,0 +1,36 @@ +/* + * The contents of this file are subject to the MonetDB Public License + * Version 1.1 (the "License"); you may not use this file except in + * compliance with the License. You may obtain a copy of the License at + * http://www.monetdb.org/Legal/MonetDBLicense + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the + * License for the specific language governing rights and limitations + * under the License. + * + * The Original Code is the MonetDB Database System. + * + * The Initial Developer of the Original Code is CWI. + * Portions created by CWI are Copyright (C) 1997-July 2008 CWI. + * Copyright August 2008-2012 MonetDB B.V. + * All Rights Reserved. +*/ +#ifndef _OPT_GROUPS_ +#define _OPT_GROUPS_ +#include "monetdb_config.h" +#include "mal_interpreter.h" + +#ifdef WIN32 +#if !defined(LIBMAL) && !defined(LIBATOMS) && !defined(LIBKERNEL) && !defined(LIBMAL) && !defined(LIBOPTIMIZER) && !defined(LIBSCHEDULER) && !defined(LIBMONETDB5) +#define groups_export extern __declspec(dllimport) +#else +#define groups_export extern __declspec(dllexport) +#endif +#else +#define groups_export extern +#endif + +groups_export str GRPmulticolumngroup(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci); + +#endif diff --git a/monetdb5/modules/mal/joinpath.c b/monetdb5/modules/mal/joinpath.c new file mode 100644 --- /dev/null +++ b/monetdb5/modules/mal/joinpath.c @@ -0,0 +1,311 @@ +/* + * The contents of this file are subject to the MonetDB Public License + * Version 1.1 (the "License"); you may not use this file except in + * compliance with the License. You may obtain a copy of the License at + * http://www.monetdb.org/Legal/MonetDBLicense + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the + * License for the specific language governing rights and limitations + * under the License. + * + * The Original Code is the MonetDB Database System. + * + * The Initial Developer of the Original Code is CWI. + * Portions created by CWI are Copyright (C) 1997-July 2008 CWI. + * Copyright August 2008-2012 MonetDB B.V. + * All Rights Reserved. +*/ +/* + * Post-optimization. After the join path has been constructed + * we could search for common subpaths. This heuristic is to + * remove any pair which is used more than once. + * Inner paths are often foreign key walks. + * The heuristics is sufficient for the code produced by SQL frontend. + * The alternative is to search for all possible subpaths and materialize them. + * For example, using recursion for all common paths. + */ +#include "monetdb_config.h" +#include "joinpath.h" +//#include "cluster.h" + +/* + * The join path optimizer takes a join sequence and + * attempts to minimize the intermediate result. + * The choice depends on a good estimate of intermediate + * results using properties. + * For the time being, we use a simplistic model, based + * on the assumption that most joins are foreign key joins anyway. + * + * We use a sample based approach for sizeable tables. + * The model is derived from the select statement. However, we did not succeed. + * The code is now commented for future improvement. + * + * Final conclusion from this exercise is: + * The difference between the join input size and the join output size is not + * the correct (or unique) metric which should be used to decide which order + * should be used in the joinPath. + * A SMALL_OPERAND is preferrable set to those cases where the table + * fits in the cache. This depends on the cache size and operand type. + * For the time being we limit ourself to a default of 1Kelements + */ +/*#define SAMPLE_THRESHOLD_lOG 17*/ + +#define SMALL_OPERAND 1024 + +static BUN +ALGjoinCost(Client cntxt, BAT *l, BAT *r, int flag) +{ + BUN lc, rc; + BUN cost=0; +#if 0 + BUN lsize,rsize; + BAT *lsample, *rsample, *j; +#endif + + (void) flag; + lc = BATcount(l); + rc = BATcount(r); +#if 0 + /* The sampling method */ + if(flag < 2 && ( lc > 100000 || rc > 100000)){ + lsize= MIN(lc/100, (1<<SAMPLE_THRESHOLD_lOG)/3); + lsample= BATsample(l,lsize); + BBPreclaim(lsample); + rsize= MIN(rc/100, (1<<SAMPLE_THRESHOLD_lOG)/3); + rsample= BATsample(r,rsize); + BBPreclaim(rsample); + j= BATjoin(l,r, MAX(lsize,rsize)); + lsize= BATcount(j); + BBPreclaim(j); + return lsize; + } +#endif + + /* first use logical properties to estimate upper bound of result size */ + if (l->tkey && r->hkey) + cost = MIN(lc,rc); + else + if (l->tkey) + cost = rc; + else + if (r->hkey) + cost = lc; + else + if (lc * rc >= BUN_MAX) + cost = BUN_MAX; + else + cost = lc * rc; + + /* then use physical properties to rank costs */ + if (BATtdense(l) && BAThdense(r)) + /* densefetchjoin -> sequential access */ + cost /= 7; + else + if (BATtordered(l) && BAThdense(r)) + /* orderedfetchjoin > sequential access */ + cost /= 6; + else + if (BATtdense(l) && BAThordered(r) && flag != 0 /* no leftjoin */) + /* (reversed-) orderedfetchjoin -> sequential access */ + cost /= 6; + else + if (BAThdense(r) && rc <= SMALL_OPERAND) + /* fetchjoin with random access in L1 */ + cost /= 5; + else + if (BATtdense(l) && lc <= SMALL_OPERAND && flag != 0 /* no leftjoin */) _______________________________________________ Checkin-list mailing list Checkin-list@monetdb.org http://mail.monetdb.org/mailman/listinfo/checkin-list