Changeset: e8e7cbe1108a for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=e8e7cbe1108a Modified Files: gdk/gdk_arrays.c gdk/gdk_arrays.h monetdb5/modules/kernel/arrays.c monetdb5/modules/kernel/arrays.h monetdb5/modules/kernel/arrays.mal sql/backends/monet5/sql_gencode.c Branch: arrays Log Message:
a dimension stores an array with all idxs that qualify in case they canot be expressed with a range + subselect fixed to work with that diffs (truncated from 812 to 300 lines): diff --git a/gdk/gdk_arrays.c b/gdk/gdk_arrays.c --- a/gdk/gdk_arrays.c +++ b/gdk/gdk_arrays.c @@ -50,6 +50,7 @@ gdk_dimension* createDimension_##TPE(TPE dim->max = floor((max - min ) / step); \ dim->step = 1; \ dim->elsNum = dim->max +1; \ + dim->idxs = NULL; \ return dim; \ } @@ -88,8 +89,11 @@ gdk_array* arrayCopy(gdk_array *array) { gdk_return arrayDelete(gdk_array *array) { unsigned short i=0; - for(i=0; i<array->dimsNum; i++) + for(i=0; i<array->dimsNum; i++) { + if(array->dims[i]->idxs) + GDKfree(array->dims[i]->idxs); GDKfree(array->dims[i]); + } GDKfree(array->dims); GDKfree(array); diff --git a/gdk/gdk_arrays.h b/gdk/gdk_arrays.h --- a/gdk/gdk_arrays.h +++ b/gdk/gdk_arrays.h @@ -32,6 +32,7 @@ typedef struct dimensionStruct { unsigned int max; unsigned int step; //initialy this is set to 1 unsigned int elsNum; + unsigned int *idxs; //if it cannot be expressed as a dimension store the idxs analytically } gdk_dimension; typedef struct arrayStruct { diff --git a/monetdb5/modules/kernel/arrays.c b/monetdb5/modules/kernel/arrays.c --- a/monetdb5/modules/kernel/arrays.c +++ b/monetdb5/modules/kernel/arrays.c @@ -17,6 +17,7 @@ static int arrayCellsNum(gdk_array *arra return jumpSize(array, array->dimsNum); } +#if 0 /*UPDATED*/ static BUN oidToIdx(oid oidVal, int dimNum, int currentDimNum, BUN skipCells, gdk_array *array) { BUN oid = 0; @@ -35,6 +36,7 @@ static BUN oidToIdx(oid oidVal, int dimN return oid/skipCells; return oid%skipCells; } +#endif /*UPDATED*/ static BUN* oidToIdx_bulk(oid* oidVals, int valsNum, int dimNum, int currentDimNum, BUN skipCells, gdk_array *array) { @@ -77,303 +79,58 @@ static BUN* oidToIdx_bulk(oid* oidVals, return oids; } -/*UPDATED*/ -static str readCands(gdk_array** dimCands_res, BAT** oidCands_res, const ptr *dimCands, const bat *oidCands, gdk_array* array) { - gdk_array *dimCands_in = NULL; - BAT *candidatesBAT_in = NULL; - - if(oidCands) { //there are candidates that cannot be expressed as dimensions and thus are expressed with oids - if ((candidatesBAT_in = BATdescriptor(*oidCands)) == NULL) { - throw(MAL, "algebra.subselect", RUNTIME_OBJECT_MISSING); - } - } - - if(dimCands) //there are candidates exressed as dimensions - dimCands_in = (gdk_array*)*dimCands; - - //if there are no candidates then everything is a candidate - if(!dimCands && !oidCands) { - dimCands_in = arrayCopy(array); - //create an empy candidates BAT - if((candidatesBAT_in = BATnew(TYPE_void, TYPE_oid, 0, TRANSIENT)) == NULL) - throw(MAL, "algebra.subselect", GDK_EXCEPTION); - BATsetcount(candidatesBAT_in, 0); - BATseqbase(candidatesBAT_in, 0); - BATderiveProps(candidatesBAT_in, FALSE); - } - - *dimCands_res = dimCands_in; - *oidCands_res = candidatesBAT_in; - - return MAL_SUCCEED; +static gdk_dimension* updateDimCandRange(gdk_dimension *dimCand, unsigned int min, unsigned int max) { + dimCand->min = min > dimCand->min ? min : dimCand->min; + dimCand->max = max < dimCand->max ? max : dimCand->max; + dimCand->elsNum = min>max ? 0 : max-min+1; //if 0 then the dimension is empty + return dimCand; } -/*UPDATED*/ -/*Empty candidate results are distingueshed from first time results by setting the dimCands to NULL and the oidCands to empty */ -static str emptyCandidateResults(ptr *dimCandsRes, bat* oidCandsRes) { - BAT *candidatesBAT = NULL; +static gdk_dimension* updateDimCandIdxs(gdk_dimension *dimCand, unsigned int min, unsigned int max) { + unsigned int elsNum = 0; + unsigned int *idxs = GDKmalloc(sizeof(unsigned int)*dimCand->elsNum); //at most (quailfyingIdx might alresy be out of the idxs) + unsigned int i; - if((candidatesBAT = BATnew(TYPE_void, TYPE_oid, 0, TRANSIENT)) == NULL) - throw(MAL, "algebra.dimensionSubselect", GDK_EXCEPTION); - BATsetcount(candidatesBAT, 0); - BATseqbase(candidatesBAT, 0); - BATderiveProps(candidatesBAT, FALSE); + for(i=0 ; i<dimCand->elsNum; i++) { + if(dimCand->idxs[i] >= min && dimCand->idxs[i] <= max) { + idxs[elsNum] = dimCand->idxs[i]; + elsNum++; + } + } + + //release the previous idxs + GDKfree(dimCand->idxs); + //store the new idxs + dimCand->elsNum = elsNum; //if 0 then the dimension is empty + dimCand->idxs = idxs; + //upadte the min max if needed + dimCand->min = min > dimCand->min ? min : dimCand->min; + dimCand->max = max < dimCand->max ? max : dimCand->max; - BBPkeepref(*oidCandsRes = candidatesBAT->batCacheid); - *dimCandsRes = NULL; - - return MAL_SUCCEED; + return dimCand; } -/*UPDATED*/ -static gdk_dimension* updateCandidateDimensionRange(gdk_dimension *dim, unsigned int min, unsigned int max) { - if(dim->max < min || dim->min > max) //non-overlaping ranges - return NULL; - - /*the biggest of the mins and the smallest of the maximums */ - dim->min = dim->min > min ? dim->min : min; - dim->max = dim->max < max ? dim->max : max; - dim->elsNum = floor((dim->max - dim->min)/dim->step)+1; -//TODO: Take care of cases were a dimension has step <>1 as a result of multiple selections on it - - //the dimensions that are merged should have the same order - //they also have the same number of initial elements because they came from the same dimension - return dim; -} - -static BAT* joinBATs(BAT *candsBAT, BAT* dimBAT, gdk_array *array, int dimNum) { - oid *candsOIDs, *dimOIDs, *mergedOIDs; - BAT* mergedBAT; - - BUN i=0, j=0, k=0, minPos; - BUN minIdx =0 ; - BUN dimSkip = jumpSize(array, dimNum); - - oid dimOIDs_min = 0; - bool set = 0; - BUN moduloRes; - - candsOIDs = (oid*)Tloc(candsBAT, BUNfirst(candsBAT)); - dimOIDs = (oid*)Tloc(dimBAT, BUNfirst(dimBAT)); - - //the oids in dimBAT have been computed assuming that 0 is allowed in all other dimensions - //is this really true? I can verify that using the first oid in candsBAT - //if a dimension after filtering is expressed again as a dimension the min might - //not be 0 but I do not care about it when it comes to the BAT. I will resolve tha at the end - //when projecting the cells where dimensions and BAT will be combined - for(j=0; j< (unsigned long)dimNum; j++) { - BUN skipCells = 0; - //find the min oid of this dimension in the the first qualifying oid - minIdx = oidToIdx(candsOIDs[0], j+1, 0, 1, array); - if(minIdx == 0) - continue; - //all oids in the dimOIDs should be updated to comply with the min oid of the dimension - skipCells = jumpSize(array, j); - for(i=0; i<BATcount(dimBAT); i++) - dimOIDs[i] += skipCells*minIdx; - } - - if(!(mergedBAT = BATnew(TYPE_void, TYPE_oid, BATcount(candsBAT)+BATcount(dimBAT), TRANSIENT))) - return NULL; - mergedOIDs = (oid*)Tloc(mergedBAT, BUNfirst(mergedBAT)); - - moduloRes = dimOIDs[0]%dimSkip; - /* find the oids in cands that are there to reflect dimNum and keep only those that dim and cand have in common */ - for(i=0, j=0; i<BATcount(candsBAT) && j<BATcount(dimBAT);) { - /* oids in this dimension should be multiples of dimSkip */ - if(candsOIDs[i]%dimSkip == moduloRes) { - if(candsOIDs[i] < dimOIDs[j]) //it exists in one but not in the other - i++; - else if (candsOIDs[i] > dimOIDs[j]) - j++; - else { //common - mergedOIDs[k] = candsOIDs[i]; - - if(!set) { - dimOIDs_min = candsOIDs[i]; - minPos = k; - set = 1; - } - - i++; - j++; - k++; - } - } else { - /*not related with the dimension. send it to the output*/ - mergedOIDs[k] = candsOIDs[i]; - i++; - k++; - } - } - - BATseqbase(mergedBAT, 0); - BATsetcount(mergedBAT, k); - BATderiveProps(mergedBAT, FALSE); - - //adapt the candidates BAT to reflect the minimum value for the new dimension - //only the ones that are not reflecting the y dimension should be updates - minIdx = oidToIdx(dimOIDs_min, dimNum, 0, 1, array); - if(minIdx > 0) { - BUN skipCells = jumpSize(array, dimNum); - /*split it in 2 parts. Firts update all oids that are before the minPos position - * all those oids will be increased and become greater than the oid in minPos */ - if(minPos > 0) { - for(i=minPos-1; i>0; i--) { - /*all will change because they are above the minimum oid regarding the dimension*/ - mergedOIDs[i+1] = mergedOIDs[i]+skipCells*minIdx; - } - /* excluded from the loop because i>=0 always true (infinite loop)*/ - mergedOIDs[1] = mergedOIDs[0]+skipCells*minIdx; - } - mergedOIDs[0] = dimOIDs_min; - for(i=minPos+1; i<BATcount(mergedBAT) ; i++) - if(mergedOIDs[i]%dimSkip != moduloRes) - mergedOIDs[i] += skipCells*minIdx; - } - - return mergedBAT; -} - -static BAT* mergeBATs(BAT *candsBAT, BAT* dimBAT, gdk_array *array, int dimNum) { - oid *candsOIDs, *dimOIDs, *mergedOIDs; - BAT* mergedBAT; - - BUN i=0, j=0, k=0; - BUN minIdx =0 ; - - candsOIDs = (oid*)Tloc(candsBAT, BUNfirst(candsBAT)); - dimOIDs = (oid*)Tloc(dimBAT, BUNfirst(dimBAT)); - - //the oids in dimBAT have been computed assuming that 0 is allowed in all other dimensions - //is this really true? I can verify that using the first oid in candsBAT - //if a dimension after filtering is expressed again as a dimension the min might - //not be 0 but I do not care about it when it comes to the BAT. I will resolve tha at the end - //when projecting the cells where dimensions and BAT will be combined - for(j=0; j< (unsigned long)dimNum; j++) { - BUN skipCells = 0; - //find the min oid of this dimension in the the first qualifying oid - minIdx = oidToIdx(candsOIDs[0], j+1, 0, 1, array); - if(minIdx == 0) - continue; - //all oids in the dimOIDs should be updated to comply with the min oid of the dimension - skipCells = jumpSize(array, j+1); - for(i=0; i<BATcount(dimBAT); i++) - dimOIDs[i] += skipCells*minIdx; - } - //adapt the candidates BAT to reflect the minimum value for the new dimension - minIdx = oidToIdx(dimOIDs[0], dimNum, 0, 1, array); - if(minIdx > 0) { - BUN skipCells = jumpSize(array, dimNum); - for(i=0; i<BATcount(candsBAT); i++) - candsOIDs[i] += skipCells*minIdx; - } - - //finaly merge the two BATs - if(!(mergedBAT = BATnew(TYPE_void, TYPE_oid, BATcount(candsBAT)+BATcount(dimBAT), TRANSIENT))) - return NULL; - mergedOIDs = (oid*)Tloc(mergedBAT, BUNfirst(mergedBAT)); - for(i=0, j=0; i<BATcount(candsBAT) && j<BATcount(dimBAT);) { - if(candsOIDs[i] < dimOIDs[j]) { - mergedOIDs[k] = candsOIDs[i]; - i++; - } else if(candsOIDs[i] > dimOIDs[j]) { - mergedOIDs[k] = dimOIDs[j]; - j++; - } else { - mergedOIDs[k] = candsOIDs[i]; - i++; - j++; - } - k++; - - if(i == BATcount(candsBAT)) { - for(; j<BATcount(dimBAT); j++, k++) - mergedOIDs[k] = dimOIDs[j]; - } - - if(j == BATcount(dimBAT)) { - for(; i<BATcount(dimBAT); i++, k++) _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list