Changeset: f4aa4ce5316d for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=f4aa4ce5316d Modified Files: gdk/gdk_arrays.c gdk/gdk_arrays.h monetdb5/modules/kernel/arrays.c monetdb5/modules/kernel/arrays.h monetdb5/modules/kernel/arrays.mal sql/backends/monet5/rel_bin.c sql/backends/monet5/sql_gencode.c Branch: arrays Log Message:
joining array dimensions with condition a.dim1=b.dim1 diffs (truncated from 701 to 300 lines): diff --git a/gdk/gdk_arrays.c b/gdk/gdk_arrays.c --- a/gdk/gdk_arrays.c +++ b/gdk/gdk_arrays.c @@ -63,6 +63,26 @@ createDim(lng); createDim(dbl); createDim(flt); +#define createGCD(TPE) \ +TPE gcd_##TPE(TPE val1, TPE val2) { \ + TPE res; \ + if(val1 == val2) \ + return val1; \ + res=val1-val2; \ + if(res < val2) \ + return gcd_##TPE(val2,res); \ + return gcd_##TPE(res,val2); \ +} \ + +createGCD(bte); +createGCD(sht); +createGCD(int); +createGCD(wrd); +createGCD(oid); +createGCD(lng); +createGCD(dbl); +createGCD(flt); + gdk_array* arrayNew(unsigned short dimsNum) { gdk_array *array = (gdk_array*)GDKmalloc(sizeof(gdk_array)); diff --git a/gdk/gdk_arrays.h b/gdk/gdk_arrays.h --- a/gdk/gdk_arrays.h +++ b/gdk/gdk_arrays.h @@ -59,6 +59,15 @@ gdk_export gdk_return arrayDelete(gdk_ar gdk_export gdk_return analyticDimensionDelete(gdk_analytic_dimension *dim); gdk_export gdk_array* arrayCopy(gdk_array* array); +gdk_export bte gcd_bte(bte val1, bte val2); +gdk_export sht gcd_sht(sht val1, sht val2); +gdk_export int gcd_int(int val1, int val2); +gdk_export wrd gcd_wrd(wrd val1, wrd val2); +gdk_export oid gcd_oid(oid val1, oid val2); +gdk_export lng gcd_lng(lng val1, lng val2); +gdk_export dbl gcd_dbl(dbl val1, dbl val2); +gdk_export flt gcd_flt(flt val1, flt val2); + /*find the position in the dimension indices (no repetitions) of the given value*/ #define dimensionFndValuePos(value, min, step) fmod((value-min), step)? BUN_NONE : (BUN)(value-min)/step diff --git a/monetdb5/modules/kernel/arrays.c b/monetdb5/modules/kernel/arrays.c --- a/monetdb5/modules/kernel/arrays.c +++ b/monetdb5/modules/kernel/arrays.c @@ -507,153 +507,58 @@ str ALGdimensionLeftfetchjoin2(bat *resu return ALGdimensionLeftfetchjoin1(result, dimsCands, dim, dims); } -str ALGdimensionLeftfetchjoin3(bat *result, const ptr* dimsCands, const bat* oidsCands, const ptr *array_in) { +str ALGdimensionLeftfetchjoin3(bat *result, const ptr* dimsCands, const ptr *array_in) { gdk_array *array = (gdk_array*)*array_in; gdk_array *dims_in = (gdk_array*)*dimsCands; - BAT *oidsCandsBAT = BATdescriptor(*oidsCands); BAT *resBAT; oid *resOids; - BUN elsNum = 1; - if(!oidsCandsBAT) { + /*count the cells in the output*/ + BUN elsNum = arrayCellsNum(dims_in); + /*create all oids*/ + if(!(resBAT = BATnew(TYPE_void, TYPE_oid, elsNum, TRANSIENT))) { arrayDelete(dims_in); - throw(MAL, "algebra.dimensionLeftfetchjoin2", RUNTIME_OBJECT_MISSING); + throw(MAL, "algebra.dimensionLeftfetchjoin2","Problem allocating new BAT"); } - /*this function is called in case of an update after a subselection - * on the dimensions. In such a case only the dimsCands should have - * values and the oidsCands should be empty*/ - /* - * the above is not true. When having update a set v =1 where x=y - * then the oidsCandsBAT is not empty - if(BATcount(oidsCandsBAT)) { - arrayDelete(dims_in); - throw(MAL,"algebra.dimensionLeftfetchjoin2", "oidsCands is not empty"); - }*/ + /* create the oids that should be updated based on the dimsCands */ + resOids = (oid*)Tloc(resBAT, BUNfirst(resBAT)); + /* fill the array with the elements in the first dimension */ + computeOids(&resOids, 0, array->dimsNum-1 , array, dims_in, arrayCellsNum(array)); - elsNum = BATcount(oidsCandsBAT); - if(!elsNum) { //there are no oids update - get oids from dimsCands - /*count the cells in the output*/ - elsNum = arrayCellsNum(dims_in); - /*create all oids*/ - if(!(resBAT = BATnew(TYPE_void, TYPE_oid, elsNum, TRANSIENT))) { - arrayDelete(dims_in); - throw(MAL, "algebra.dimensionLeftfetchjoin2","Problem allocating new BAT"); - } + BATsetcount(resBAT, elsNum); + BATseqbase(resBAT, 0); + resBAT->tsorted = 1; + resBAT->trevsorted = 0; + resBAT->tkey = 1; + resBAT->tdense = 0; - /* create the oids that should be updated based on the dimsCands */ - resOids = (oid*)Tloc(resBAT, BUNfirst(resBAT)); - /* fill the array with the elements in the first dimension */ - computeOids(&resOids, 0, array->dimsNum-1 , array, dims_in, arrayCellsNum(array)); - - BATsetcount(resBAT, elsNum); - BATseqbase(resBAT, 0); - resBAT->tsorted = 1; - resBAT->trevsorted = 0; - resBAT->tkey = 1; - resBAT->tdense = 0; - - BBPkeepref(*result = resBAT->batCacheid); - BBPunfix(oidsCandsBAT->batCacheid); - } else { - //there are oids, use only them - BBPkeepref(*result = oidsCandsBAT->batCacheid); - } + BBPkeepref(*result = resBAT->batCacheid); return MAL_SUCCEED; } -//str ALGnonDimensionLeftfetchjoin1(bat* result, const ptr *dimsCands, const bat *oidsCands, const bat *vals, const ptr *dims) { str ALGnonDimensionLeftfetchjoin1(bat* result, const bat *mbrOids, const bat *vals, const ptr *dims) { /* projecting a non-dimensional column does not differ from projecting any relational column */ - BAT *mbrCandsBAT = NULL/*, *oidsCandsBAT = NULL*/, *valsBAT = NULL, *resBAT= NULL; -// gdk_array *dimCands_in = (gdk_array*)*dimsCands; -// gdk_array *array = (gdk_array*)*dims; - -#if 0 - if(!dimCands_in) { //empty - if(!(resBAT = newempty("nonDimensionLeftfetchjoin1"))) - throw(MAL, "algebra.leftfetchjoin", "Problem allocating new BAT"); - BBPkeepref(*result = resBAT->batCacheid); - - return MAL_SUCCEED; - } -#endif + BAT *mbrCandsBAT = NULL, *valsBAT = NULL, *resBAT= NULL; (void)*dims; -// if ((oidsCandsBAT = BATdescriptor(*oidsCands)) == NULL) { -// throw(MAL, "algebra.leftfetchjoin", RUNTIME_OBJECT_MISSING); -// } - if ((valsBAT = BATdescriptor(*vals)) == NULL) { -// BBPunfix(oidsCandsBAT->batCacheid); throw(MAL, "algebra.leftfetchjoin", RUNTIME_OBJECT_MISSING); } if ((mbrCandsBAT = BATdescriptor(*mbrOids)) == NULL) { -// BBPunfix(oidsCandsBAT->batCacheid); BBPunfix(valsBAT->batCacheid); throw(MAL, "algebra.leftfetchjoin", RUNTIME_OBJECT_MISSING); } - //create the oids using the candidates -// mbrCandsBAT = projectCells(dimCands_in, array); - -#if 0 - if(BATcount(oidsCandsBAT)) { /*there is mbr and oids. Some values need to be set to null */ - BAT *r1p, *r2p, *r3p; - - //left outer join between the mbrCands and the oidsCands - if(BATsubouterjoin(&r1p, &r2p, mbrCandsBAT, oidsCandsBAT, NULL, NULL, 0 /*null never match*/, BATcount(mbrCandsBAT) /*the size of the resuls. BUN_NONE will cause it to be computed*/) != GDK_SUCCEED) { - BBPunfix(oidsCandsBAT->batCacheid); - BBPunfix(valsBAT->batCacheid); - BBPunfix(mbrCandsBAT->batCacheid); - } - r3p = BATproject(r2p, oidsCandsBAT); - resBAT = BATproject(r3p, valsBAT); - - BBPunfix(r1p->batCacheid); - BBPunfix(r2p->batCacheid); - BBPunfix(r3p->batCacheid); - } else -#endif - resBAT = BATproject(mbrCandsBAT, valsBAT); - -#if 0 - - resBAT = BATproject(mbrCandsBAT, valsBAT); - - if(BATcount(resBAT)) { //if there are no oidsCands then there is no MBR either and the result will be empty - /*iterate over mbrCandsBAT and oidsCandsBAT and set NULL to all - * values in resBAT that are found in mbrCandsBAT but not in oidsCandsBAT */ - BATiter mbrIter = bat_iterator(mbrCandsBAT); - BATiter oidsIter = bat_iterator(oidsCandsBAT); - - void *nilPtr = ATOMnilptr(BATttype(valsBAT)); - - BUN mbrCurrBun = BUNfirst(mbrCandsBAT); - BUN mbrLastBun = BUNlast(mbrCandsBAT); - BUN oidsCurrBun = BUNfirst(oidsCandsBAT); - BUN oidsLastBun = BUNlast(oidsCandsBAT); - for(; mbrCurrBun < mbrLastBun && oidsCurrBun < oidsLastBun ; mbrCurrBun++, oidsCurrBun++) { - oid mbrOid = *(oid*)BUNtail(mbrIter, mbrCurrBun); - oid oidsOid = *(oid*)BUNtail(oidsIter, oidsCurrBun); - - while(mbrOid != oidsOid && mbrCurrBun < mbrLastBun) { - BUNreplace(resBAT, &mbrCurrBun, nilPtr, FALSE); //replace with NULL - mbrCurrBun++; - mbrOid = *(oid*)BUNtail(mbrIter, mbrCurrBun); - } - } - } -#endif + resBAT = BATproject(mbrCandsBAT, valsBAT); BBPunfix(mbrCandsBAT->batCacheid); BBPunfix(valsBAT->batCacheid); -// BBPunfix(oidsCandsBAT->batCacheid); if (resBAT == NULL) throw(MAL, "algebra.leftfetchjoin", GDK_EXCEPTION); @@ -685,7 +590,6 @@ str ALGnonDimensionLeftfetchjoin2(bat* r throw(MAL, "algebra.leftfetchjoin", "Problem materialising non-dimensional column"); } - /*append the missing values to the BAT */ BATappend(nonDimensionalBAT, materialisedBAT, TRUE); BATsetcount(nonDimensionalBAT, totalCellsNum); @@ -1207,6 +1111,103 @@ str ALGouterjoin(bat *res, const bat *l, return MAL_SUCCEED; } +str ALGsubjoin1(ptr *dimsResL, ptr *dimsResR, const ptr *dimensionL, const ptr *dimsL, const ptr *dimensionR, const ptr *dimsR) { + gdk_analytic_dimension *analytic_dimensionL = (gdk_analytic_dimension*)*dimensionL; + gdk_analytic_dimension *analytic_dimensionR = (gdk_analytic_dimension*)*dimensionR; + gdk_dimension *dimL, *dimR; + + *dimsResL = *dimsL; + *dimsResR = *dimsR; + + dimL = ((gdk_array*)*dimsResL)->dims[analytic_dimensionL->dimNum]; + dimR = ((gdk_array*)*dimsResR)->dims[analytic_dimensionR->dimNum]; + +#define compareRanges(TPE) \ +do { \ + TPE minL = *(TPE*)analytic_dimensionL->min; \ + TPE maxL = *(TPE*)analytic_dimensionL->max; \ + TPE stepL = *(TPE*)analytic_dimensionL->step; \ + TPE minR = *(TPE*)analytic_dimensionR->min; \ + TPE maxR = *(TPE*)analytic_dimensionR->max; \ + TPE stepR = *(TPE*)analytic_dimensionR->step; \ +\ + TPE d = gcd_##TPE(stepR, stepL); \ + if(fmod((minL-minR),d)) {\ + dimR->min = dimL->min = dimR->max = dimL->max = 0; \ + dimR->step = dimL->step = 1; \ + } else { \ + /*find the min, max and step of the dimensions*/ \ + TPE l = minL, r=minR; \ + dimR->step = stepL/d; \ + dimL->step = stepR/d;\ + while(l != r) { \ + while(l<r) \ + l+=stepL; \ + while(r<l) \ + r+=stepR; \ + } \ + /*l = r*/ \ + dimL->min = (l-minL)/stepL; \ + dimR->min = (r-minR)/stepR; \ +\ + l = maxL, r=maxR; \ + while(l != r) { \ + while(l>r) \ + l-=stepL; \ + while(r>l) \ + r-=stepR; \ + } \ + /*l = r*/ \ + dimL->max = (l-minL)/stepL; \ + dimR->max = (r-minR)/stepR; \ + } \ +} while(0) + + switch(analytic_dimensionL->type) { + case TYPE_bte: + compareRanges(bte); + break; + case TYPE_sht: + compareRanges(sht); + break; + case TYPE_int: + compareRanges(int); + break; + case TYPE_wrd: _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list