Changeset: d638889e4eea for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=d638889e4eea Modified Files: monetdb5/optimizer/opt_generator.c sql/backends/monet5/generator/generator.c Branch: default Log Message:
New optimizer for generate series It now also recognizes simple casting over series, not materializing them, but introducing a new series with the casted bounds. Also fixed growing rangejoin activities. diffs (truncated from 338 to 300 lines): diff --git a/monetdb5/optimizer/opt_generator.c b/monetdb5/optimizer/opt_generator.c --- a/monetdb5/optimizer/opt_generator.c +++ b/monetdb5/optimizer/opt_generator.c @@ -25,118 +25,142 @@ * Series generating module for integer, decimal, real, double and timestamps. */ +#define errorCheck(P,MOD,I) \ +setModuleId(P, generatorRef);\ +typeChecker(cntxt->fdout, cntxt->nspace, mb, P, TRUE);\ +if(P->typechk == TYPE_UNKNOWN){\ + setModuleId(P,MOD);\ + typeChecker(cntxt->fdout, cntxt->nspace, mb, P, TRUE);\ + setModuleId(series[I], generatorRef);\ + setFunctionId(series[I], seriesRef);\ + typeChecker(cntxt->fdout, cntxt->nspace, mb, series[I], TRUE);\ +}\ +pushInstruction(mb,P); -static int -assignedOnce(MalBlkPtr mb, int varid) -{ - InstrPtr p; - int i,j, c=0; - - for(i = 1; i< mb->stop; i++){ - p = getInstrPtr(mb,i); - for( j = 0; j < p->retc; j++) - if( getArg(p,j) == varid){ - c++; - break; - } - } - return c == 1; -} -static int -useCount(MalBlkPtr mb, int varid) -{ - InstrPtr p; - int i,j, d,c=0; - - for(i = 1; i< mb->stop; i++){ - p = getInstrPtr(mb,i); - d= 0; - for( j = p->retc; j < p->argc; j++) - if( getArg(p,j) == varid) - d++; - c += d > 0; - } - return c; -} +#define casting(TPE)\ + k= getArg(p,1);\ + p->argc = p->retc;\ + q= newStmt(mb,calcRef,TPE##Ref);\ + setArgType(mb,q,0,TYPE_##TPE);\ + pushArgument(mb,q,getArg(series[k],1));\ + p = pushArgument(mb,p, getArg(q,0));\ + q= newStmt(mb,calcRef,TPE##Ref);\ + setArgType(mb,q,0,TYPE_##TPE);\ + pushArgument(mb,q,getArg(series[k],2));\ + p = pushArgument(mb,p, getArg(q,0));\ + if( p->argc == 4){\ + q= newStmt(mb,calcRef,TPE##Ref);\ + setArgType(mb,q,0,TYPE_##TPE);\ + pushArgument(mb,q,getArg(series[k],3));\ + p = pushArgument(mb,p, getArg(q,0));\ + }\ + setModuleId(p,generatorRef);\ + setFunctionId(p,parametersRef);\ + setVarUDFtype(mb,getArg(p,0));\ + series[getArg(p,0)] = p;\ + pushInstruction(mb,p); int OPTgeneratorImplementation(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci) { - InstrPtr p,q; - int i,j,k, actions=0, used, cases, blocked; + InstrPtr p,q, *old, *series; + int i, k, limit, actions=0; + str m; + str bteRef = getName("bte",3); + str shtRef = getName("sht",3); + str intRef = getName("int",3); + str lngRef = getName("lng",3); + str fltRef = getName("flt",3); + str dblRef = getName("dbl",3); (void) cntxt; (void) stk; (void) pci; - for( i=1; i < mb->stop; i++){ - p = getInstrPtr(mb,i); + series = (InstrPtr*) GDKzalloc(sizeof(InstrPtr) * mb->vtop); + old= mb->stmt; + limit = mb->stop; + if ( newMalBlkStmt(mb,2 * mb->ssize) < 0) { + GDKfree(series); + return 0; + } + + for( i=0; i < limit; i++){ + p = old[i]; + if ( p->token == ENDsymbol){ + pushInstruction(mb,p); + break; + } if ( getModuleId(p) == generatorRef && getFunctionId(p) == seriesRef){ - /* found a target for propagation */ - used = 0; - if ( assignedOnce(mb, getArg(p,0)) ){ - cases = useCount(mb, getArg(p,0)); - blocked = 0; - for( j = i+1; j< mb->stop && blocked == 0; j++){ - q = getInstrPtr(mb,j); - if ( getModuleId(q) == algebraRef && getFunctionId(q) == subselectRef && getArg(q,1) == getArg(p,0)){ - setModuleId(q, generatorRef); - typeChecker(cntxt->fdout, cntxt->nspace, mb, q, TRUE); - used++; - } else - if ( getModuleId(q) == algebraRef && getFunctionId(q) == thetasubselectRef && getArg(q,1) == getArg(p,0)){ - setModuleId(q, generatorRef); - typeChecker(cntxt->fdout, cntxt->nspace, mb, q, TRUE); - used++; - } else - if ( getModuleId(q) == algebraRef && getFunctionId(q) == leftfetchjoinRef && getArg(q,2) == getArg(p,0)){ - // projection over a series - setModuleId(q, generatorRef); - typeChecker(cntxt->fdout, cntxt->nspace, mb, q, TRUE); - used++; - } else - if ( getModuleId(q) == algebraRef && getFunctionId(q) == joinRef && (getArg(q,2) == getArg(p,0) || getArg(q,3) == getArg(p,0))){ - // projection over a series - setModuleId(q, generatorRef); - typeChecker(cntxt->fdout, cntxt->nspace, mb, q, TRUE); - if(q->typechk == TYPE_UNKNOWN){ - setModuleId(q, algebraRef); - typeChecker(cntxt->fdout, cntxt->nspace, mb, q, TRUE); - } else - used++; - } else - if ( getModuleId(q) == sqlRef && getFunctionId(q) == putName("exportValue",11) && isaBatType(getArgType(mb,p,0)) ){ - // interface expects scalar type only, not expressable in MAL signature - blocked++; - mb->errors++; - showException(cntxt->fdout, MAL, "generate_series", "internal error, generate_series is a table producing function"); - }else - if ( getModuleId(q) == languageRef && getFunctionId(q) == passRef && getArg(q,1) == getArg(p,0)) - // nothing happens in this instruction - used++; - else { - // check for use without conversion - for(k = q->retc; k < q->argc; k++) - if( getArg(q,k) == getArg(p,0)){ - blocked++; - } - // materialize a copy and re-use where appropriate - } + series[getArg(p,0)] = p; + setModuleId(p, generatorRef); + setFunctionId(p, parametersRef); + typeChecker(cntxt->fdout, cntxt->nspace, mb, p, TRUE); + pushInstruction(mb,p); + } else + if ( getModuleId(p) == algebraRef && getFunctionId(p) == subselectRef && series[getArg(p,1)]){ + errorCheck(p,algebraRef,getArg(p,1)); + } else + if ( getModuleId(p) == algebraRef && getFunctionId(p) == thetasubselectRef && series[getArg(p,1)]){ + errorCheck(p,algebraRef,getArg(p,1)); + } else + if ( getModuleId(p) == algebraRef && getFunctionId(p) == leftfetchjoinRef && series[getArg(p,2)]){ + errorCheck(p,algebraRef,getArg(p,2)); + } else + if ( getModuleId(p) == algebraRef && getFunctionId(p) == joinRef && series[getArg(p,2)] ){ + errorCheck(p,algebraRef,getArg(p,2)); + } else + if ( getModuleId(p) == algebraRef && getFunctionId(p) == joinRef && series[getArg(p,3)]){ + errorCheck(p,algebraRef,getArg(p,3)); + } else + if ( getModuleId(p) == sqlRef && getFunctionId(p) == putName("exportValue",11) && isaBatType(getArgType(mb,p,0)) ){ + // interface expects scalar type only, not expressable in MAL signature + mb->errors++; + showException(cntxt->fdout, MAL, "generate_series", "internal error, generate_series is a table producing function"); + }else + if ( getModuleId(p) == batcalcRef && getFunctionId(p) == bteRef && series[getArg(p,1)] && p->argc == 2 ){ + casting(bte); + } else + if ( getModuleId(p) == batcalcRef && getFunctionId(p) == shtRef && series[getArg(p,1)] && p->argc == 2 ){ + casting(sht); + } else + if ( getModuleId(p) == batcalcRef && getFunctionId(p) == intRef && series[getArg(p,1)] && p->argc == 2 ){ + casting(int); + } else + if ( getModuleId(p) == batcalcRef && getFunctionId(p) == lngRef && series[getArg(p,1)] && p->argc == 2 ){ + casting(lng); + } else + if ( getModuleId(p) == batcalcRef && getFunctionId(p) == fltRef && series[getArg(p,1)] && p->argc == 2 ){ + casting(flt); + } else + if ( getModuleId(p) == batcalcRef && getFunctionId(p) == dblRef && series[getArg(p,1)] && p->argc == 2 ){ + casting(dbl); + } else + if ( getModuleId(p) == languageRef && getFunctionId(p) == passRef ) + pushInstruction(mb,p); + else { + // check for use without conversion + for(k = p->retc; k < p->argc; k++) + if( series[getArg(p,k)]){ + m = getModuleId(p); + setModuleId(p, generatorRef); + typeChecker(cntxt->fdout, cntxt->nspace, mb, p, TRUE); + if(p->typechk == TYPE_UNKNOWN){ + setModuleId(p,m); + typeChecker(cntxt->fdout, cntxt->nspace, mb, p, TRUE); + setModuleId(series[getArg(p,k)], generatorRef); + setFunctionId(series[getArg(p,k)], seriesRef); + typeChecker(cntxt->fdout, cntxt->nspace, mb, series[getArg(p,k)], TRUE); } - // fix the original, only when all use cases are replaced by the overloaded function - if(used == cases && blocked == 0){ - setModuleId(p, generatorRef); - setFunctionId(p, parametersRef); - typeChecker(cntxt->fdout, cntxt->nspace, mb, p, TRUE); - } - if( used) - actions++; -#ifdef VLT_DEBUG - mnstr_printf(cntxt->fdout,"#generator target %d cases %d used %d error %d\n",getArg(p,0), cases, used, p->typechk); -#endif } + pushInstruction(mb,p); } } + for (i++; i < limit; i++) + pushInstruction(mb, old[i]); + GDKfree(old); + GDKfree(series); + #ifdef VLT_DEBUG printFunction(cntxt->fdout,mb,0,LIST_MAL_ALL); #endif diff --git a/sql/backends/monet5/generator/generator.c b/sql/backends/monet5/generator/generator.c --- a/sql/backends/monet5/generator/generator.c +++ b/sql/backends/monet5/generator/generator.c @@ -968,13 +968,14 @@ str VLTgenerator_join(Client cntxt, MalB } #define VLTrangeExpand() \ -{ bln= BATextend(bln,BATgrows(bln));\ +{ limit+= cnt * (limit/(done?done:1)+1);\ + bln= BATextend(bln, limit);\ if( bln == NULL){\ BBPreleaseref(blow->batCacheid);\ BBPreleaseref(bhgh->batCacheid);\ throw(MAL,"generator.rangejoin",MAL_MALLOC_FAIL);\ }\ - brn= BATextend(brn,BATgrows(brn));\ + brn= BATextend(brn, limit);\ if( brn == NULL) {\ BBPreleaseref(blow->batCacheid);\ BBPreleaseref(bhgh->batCacheid);\ @@ -982,7 +983,6 @@ str VLTgenerator_join(Client cntxt, MalB }\ ol = (oid*) Tloc(bln,BUNfirst(bln)) + c;\ or = (oid*) Tloc(brn,BUNfirst(brn)) + c;\ - limit= BATcapacity(bln);\ } /* The operands of a join operation can either be defined on a generator */ @@ -998,7 +998,7 @@ str VLTgenerator_join(Client cntxt, MalB throw(MAL,"generator.rangejoin","Illegal range");\ vlow = (TPE*) Tloc(blow,BUNfirst(blow));\ vhgh = (TPE*) Tloc(bhgh,BUNfirst(bhgh));\ - for( ; cnt >0; cnt--, o++,vlow++,vhgh++){\ + for( ; cnt >0; cnt--, done++, o++,vlow++,vhgh++){\ f1 = f + floor(ABS(*vlow-f)/ABS(s)) * s;\ if ( f1 < *vlow ) f1+= s;\ w = (BUN) floor(ABS(f1-f)/ABS(s));\ @@ -1015,7 +1015,7 @@ str VLTgenerator_rangejoin(Client cntxt, { BAT *blow = NULL, *bhgh = NULL, *bln = NULL, *brn= NULL; bit li,ri; - BUN limit, cnt,c =0; + BUN limit, cnt, done=0, c =0; oid o= 0, *ol, *or; int tpe, incr=0; InstrPtr p = NULL; @@ -1059,35 +1059,37 @@ str VLTgenerator_rangejoin(Client cntxt, /* The actual join code for generators be injected here */ switch(tpe){ case TYPE_bte: VLTrangejoin(bte,abs); break; - case TYPE_sht: //VLTrangejoin(sht,abs); break; - { sht f,f1,l,s; sht *vlow,*vhgh; BUN w; - f = *getArgReference_sht(stk,p, 1); - l = *getArgReference_sht(stk,p, 2); + case TYPE_sht: VLTrangejoin(sht,abs); break; + case TYPE_int: VLTrangejoin(int,abs); break; + case TYPE_lng: //VLTrangejoin(lng,llabs); break; _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list