Changeset: d6b63e4815ff for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=d6b63e4815ff Modified Files: gdk/gdk_arrays.c monetdb5/modules/kernel/arrays.c Branch: arrays Log Message:
point selections with anti that destroy the dimension property combined with other point and range selections diffs (truncated from 504 to 300 lines): diff --git a/gdk/gdk_arrays.c b/gdk/gdk_arrays.c --- a/gdk/gdk_arrays.c +++ b/gdk/gdk_arrays.c @@ -9,6 +9,17 @@ gdk_dimension* createDimension_##TPE(int void *maxVoid = GDKmalloc(sizeof(TPE)); \ void *stepVoid = GDKmalloc(sizeof(TPE)); \ \ + if(!dim || !minVoid || !maxVoid || !stepVoid) { \ + if(dim) \ + GDKfree(dim); \ + if(minVoid) \ + GDKfree(minVoid); \ + if(maxVoid) \ + GDKfree(maxVoid); \ + if(stepVoid) \ + GDKfree(stepVoid); \ + return NULL; \ + } \ memcpy(minVoid, &min, sizeof(TPE)); \ memcpy(maxVoid, &max, sizeof(TPE)); \ memcpy(stepVoid, &step, sizeof(TPE)); \ diff --git a/monetdb5/modules/kernel/arrays.c b/monetdb5/modules/kernel/arrays.c --- a/monetdb5/modules/kernel/arrays.c +++ b/monetdb5/modules/kernel/arrays.c @@ -7,10 +7,18 @@ static gdk_dimension* getDimension(gdk_cells *dims, int dimNum) { dim_node *n; - for(n=dims->h; n->data->dimNum<dimNum; n=n->next); + for(n=dims->h; n && n->data->dimNum<dimNum; n=n->next); return n->data; } +static int jumpSize(gdk_array *array, int dimNum) { + int i=0; + BUN skip = 1; + for(i=0; i<dimNum; i++) + skip *= array->dimSizes[i]; + return skip; +} + /*takes a set of dimensions of any type and returns the correspondin * dimensions in indices format */ static gdk_cells* sizesToDimensions(gdk_array *array) { @@ -24,7 +32,7 @@ static gdk_cells* sizesToDimensions(gdk_ return resDims; } -static bool compatibleRanges(gdk_dimension *dim1, gdk_dimension *dim2) { +static bool overlapingRanges(gdk_dimension *dim1, gdk_dimension *dim2) { if(*(oid*)dim1->max < *(oid*)dim2->min || *(oid*)dim2->max < *(oid*)dim1->min) { //disjoint ranges. empty result return 0; @@ -32,25 +40,18 @@ static bool compatibleRanges(gdk_dimensi return 1; } -static gdk_dimension* merge2CandidateDimensions(gdk_dimension *dim1, gdk_dimension *dim2) { +static gdk_dimension* mergeCandidateDimensions(gdk_dimension *dim1, gdk_dimension *dim2) { oid min, max, step; - //check whether the merge creates a dimension or not - //In case it does not return NULL and then a BAT should be created - /*the biggest of the mins and the smallest of the maximums */ min = *(oid*)dim1->min > *(oid*)dim2->min ? *(oid*)dim1->min : *(oid*)dim2->min; max = *(oid*)dim1->max < *(oid*)dim2->max ? *(oid*)dim1->max : *(oid*)dim2->max; step = *(oid*)dim1->step ; //step is always 1 //the dimensions that are merged should have the same order - //they also have the same number of initial elements because the came from the same dimension + //they also have the same number of initial elements because they came from the same dimension return createDimension_oid(dim1->dimNum, dim1->initialElementsNum, min, max, step); } -static gdk_dimension* mergeCandidateDimensions(gdk_dimension *dim1, gdk_dimension *dim2) { - return merge2CandidateDimensions(dim1, dim2); -} - static BUN oidToIdx(oid oidVal, int dimNum, int currentDimNum, BUN skipCells, gdk_array *dims) { BUN oid = 0; @@ -69,7 +70,7 @@ static BUN oidToIdx(oid oidVal, int dimN return oid%skipCells; } -static BUN qualifyingOIDs(int dimNum, int skipSize, gdk_cells* oidDims, oid **resOIDs ) { +static BUN qualifyingOIDs(int dimNum, int skipSize, gdk_cells* oidDims, BAT* oidsBAT, oid **resOIDs ) { BUN sz = 0; BUN j; oid io; @@ -82,25 +83,65 @@ static BUN qualifyingOIDs(int dimNum, in oidsDim = n->data; if(dimNum == oidDims->dimsNum-1) { //last dimension - qOIDS = GDKmalloc(sizeof(oid)*oidsDim->elementsNum); - for(io=*(oid*)oidsDim->min, sz=0; io<=*(oid*)oidsDim->max; io+=*(oid*)oidsDim->step, sz++) { - qOIDS[sz] = skipSize*io; + if(oidsDim->elementsNum > 0) { + qOIDS = GDKmalloc(sizeof(oid)*oidsDim->elementsNum); + for(io=*(oid*)oidsDim->min, sz=0; io<=*(oid*)oidsDim->max; io+=*(oid*)oidsDim->step, sz++) { + qOIDS[sz] = skipSize*io; fprintf(stderr, "%u = %u\n", (unsigned int)sz, (unsigned int)qOIDS[sz]); + } + } else { + //the oids are in the BAT + //iterate over the elements in the BAT to find the indices of the dimension that have survided + //and add and idx for each one of these elements + oid* candOIDs = (oid*)Tloc(oidsBAT, BUNfirst(oidsBAT)); + int previousIdx = -1; + BUN i; + qOIDS = GDKmalloc(sizeof(oid)*oidsDim->initialElementsNum); + + for(i=0; i<BATcount(oidsBAT); i++) { + int idx = candOIDs[i]/skipSize; + if(idx > previousIdx) { + qOIDS[sz] = skipSize*idx; +fprintf(stderr, "%u = %u\n", (unsigned int)sz, (unsigned int)qOIDS[sz]); + sz++; + previousIdx = idx; + } + } } *resOIDs = qOIDS; } else { oid *resOIDs_local = NULL; - BUN addedEls = qualifyingOIDs(dimNum+1, skipSize*oidsDim->initialElementsNum, oidDims, &resOIDs_local); + BUN addedEls = qualifyingOIDs(dimNum+1, skipSize*oidsDim->initialElementsNum, oidDims, oidsBAT, &resOIDs_local); if(dimNum == 0) qOIDS = *resOIDs; - else - qOIDS = GDKmalloc(sizeof(oid)*oidsDim->elementsNum*addedEls); + else { + if(oidsDim->elementsNum > 0) + qOIDS = GDKmalloc(sizeof(oid)*oidsDim->elementsNum*addedEls); + else + qOIDS = GDKmalloc(sizeof(oid)*oidsDim->initialElementsNum*addedEls); + } for(j=0, sz=0; j<addedEls; j++) { fprintf(stderr, "-> %u = %u\n", (unsigned int)j, (unsigned int)resOIDs_local[j]); - for(io=*(oid*)oidsDim->min; io<=*(oid*)oidsDim->max; io+=*(oid*)oidsDim->step, sz++) { - qOIDS[sz] = resOIDs_local[j] + skipSize*io; + if(oidsDim->elementsNum > 0) { + for(io=*(oid*)oidsDim->min; io<=*(oid*)oidsDim->max; io+=*(oid*)oidsDim->step, sz++) { + qOIDS[sz] = resOIDs_local[j] + skipSize*io; fprintf(stderr, "%u = %u\n", (unsigned int)sz, (unsigned int)qOIDS[sz]); + } + } else { + //check the BAT + oid* candOIDs = (oid*)Tloc(oidsBAT, BUNfirst(oidsBAT)); + int previousIdx = -1; + BUN i; + for(i=0; i<BATcount(oidsBAT); i++) { + int idx = (candOIDs[i]%(skipSize*oidsDim->initialElementsNum))/skipSize; + if(idx > previousIdx) { + qOIDS[sz] = resOIDs_local[j]+skipSize*idx; +fprintf(stderr, "%u = %u\n", (unsigned int)sz, (unsigned int)qOIDS[sz]); + sz++; + previousIdx = idx; + } + } } } *resOIDs = qOIDS; @@ -196,20 +237,216 @@ static str emptyCandidateResults(ptr *ca return MAL_SUCCEED; } -static bool updateCandidateResults(gdk_cells** dimensionsCandidates_out, BAT** candidatesBAT_out, +static BAT* joinBATs(BAT *candsBAT, BAT* dimBAT, gdk_array *array, int dimNum) { + oid *candsOIDs, *dimOIDs, *mergedOIDs; + BAT* mergedBAT; + + BUN i=0, j=0, k=0, minPos; + BUN minIdx =0 ; + BUN dimSkip = jumpSize(array, dimNum); + + oid dimOIDs_min = 0; + bool set = 0; + BUN moduloRes; + + candsOIDs = (oid*)Tloc(candsBAT, BUNfirst(candsBAT)); + dimOIDs = (oid*)Tloc(dimBAT, BUNfirst(dimBAT)); + + //the oids in dimBAT have been computed assuming that 0 is allowed in all other dimensions + //is this really true? I can verify that using the first oid in candsBAT + //if a dimension after filtering is expressed again as a dimension the min might + //not be 0 but I do not care about it when it comes to the BAT. I will resolve tha at the end + //when projecting the cells where dimensions and BAT will be combined + for(j=0; j< (unsigned long)dimNum; j++) { + BUN skipCells = 0; + //find the min oid of this dimension in the the first qualifying oid + minIdx = oidToIdx(candsOIDs[0], j+1, 0, 1, array); + if(minIdx == 0) + continue; + //all oids in the dimOIDs should be updated to comply with the min oid of the dimension + skipCells = jumpSize(array, j); + for(i=0; i<BATcount(dimBAT); i++) + dimOIDs[i] += skipCells*minIdx; + } + + if(!(mergedBAT = BATnew(TYPE_void, TYPE_oid, BATcount(candsBAT)+BATcount(dimBAT), TRANSIENT))) + return NULL; + mergedOIDs = (oid*)Tloc(mergedBAT, BUNfirst(mergedBAT)); + + moduloRes = dimOIDs[0]%dimSkip; + /* find the oids in cands that are there to reflect dimNum and keep only those that dim and cand have in common */ + for(i=0, j=0; i<BATcount(candsBAT) && j<BATcount(dimBAT);) { + /* oids in this dimension should be multiples of dimSkip */ + if(candsOIDs[i]%dimSkip == moduloRes) { + if(candsOIDs[i] < dimOIDs[j]) //it exists in one but not in the other + i++; + else if (candsOIDs[i] > dimOIDs[j]) + j++; + else { //common + mergedOIDs[k] = candsOIDs[i]; + + if(!set) { + dimOIDs_min = candsOIDs[i]; + minPos = k; + set = 1; + } + + i++; + j++; + k++; + } + } else { + /*not related with the dimension. send it to the output*/ + mergedOIDs[k] = candsOIDs[i]; + i++; + k++; + } + } + + BATseqbase(mergedBAT, 0); + BATsetcount(mergedBAT, k); + BATderiveProps(mergedBAT, FALSE); + + //adapt the candidates BAT to reflect the minimum value for the new dimension + //only the ones that are not reflecting the y dimension should be updates + minIdx = oidToIdx(dimOIDs_min, dimNum, 0, 1, array); + if(minIdx > 0) { + BUN skipCells = jumpSize(array, dimNum); + /*split it in 2 parts. Firts update all oids that are before the minPos position + * all those oids will be increased and become greater than the oid in minPos */ + if(minPos > 0) { + for(i=minPos-1; i>0; i--) { + /*all will change because they are above the minimum oid regarding the dimension*/ + mergedOIDs[i+1] = mergedOIDs[i]+skipCells*minIdx; + } + /* excluded from the loop because i>=0 always true (infinite loop)*/ + mergedOIDs[1] = mergedOIDs[0]+skipCells*minIdx; + } + mergedOIDs[0] = dimOIDs_min; + for(i=minPos+1; i<BATcount(mergedBAT) ; i++) + if(mergedOIDs[i]%dimSkip != moduloRes) + mergedOIDs[i] += skipCells*minIdx; + } + + return mergedBAT; +} + +static BAT* mergeBATs(BAT *candsBAT, BAT* dimBAT, gdk_array *array, int dimNum) { + oid *candsOIDs, *dimOIDs, *mergedOIDs; + BAT* mergedBAT; + + BUN i=0, j=0, k=0; + BUN minIdx =0 ; + + candsOIDs = (oid*)Tloc(candsBAT, BUNfirst(candsBAT)); + dimOIDs = (oid*)Tloc(dimBAT, BUNfirst(dimBAT)); + + //the oids in dimBAT have been computed assuming that 0 is allowed in all other dimensions + //is this really true? I can verify that using the first oid in candsBAT + //if a dimension after filtering is expressed again as a dimension the min might + //not be 0 but I do not care about it when it comes to the BAT. I will resolve tha at the end + //when projecting the cells where dimensions and BAT will be combined + for(j=0; j< (unsigned long)dimNum; j++) { + BUN skipCells = 0; + //find the min oid of this dimension in the the first qualifying oid + minIdx = oidToIdx(candsOIDs[0], j+1, 0, 1, array); + if(minIdx == 0) + continue; + //all oids in the dimOIDs should be updated to comply with the min oid of the dimension + skipCells = jumpSize(array, j+1); + for(i=0; i<BATcount(dimBAT); i++) + dimOIDs[i] += skipCells*minIdx; + } + //adapt the candidates BAT to reflect the minimum value for the new dimension + minIdx = oidToIdx(dimOIDs[0], dimNum, 0, 1, array); + if(minIdx > 0) { + BUN skipCells = jumpSize(array, dimNum); + for(i=0; i<BATcount(candsBAT); i++) + candsOIDs[i] += skipCells*minIdx; + } + + //finaly merge the two BATs + if(!(mergedBAT = BATnew(TYPE_void, TYPE_oid, BATcount(candsBAT)+BATcount(dimBAT), TRANSIENT))) + return NULL; _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list